Friday, October 16, 2020

Pandas Merging Dataframe using SQL query



This is an excellent solution to merge two data frames 

import pandas as pd

import sqlite3

from datetime import datetime



#We'll use firelynx's tables:

presidents = pd.DataFrame({"name": ["Bush", "Obama", "Trump"],

                           "president_id":[43, 44, 45]})

terms = pd.DataFrame({'start_date': pd.date_range('2001-01-20', periods=5, freq='48M'),

                      'end_date': pd.date_range('2005-01-21', periods=5, freq='48M'),

                      'president_id': [43, 43, 44, 44, 45]})

war_declarations = pd.DataFrame({"date": [datetime(2001, 9, 14), datetime(2003, 3, 3)],

                                 "name": ["War in Afghanistan", "Iraq War"]})

#Make the db in memory

conn = sqlite3.connect(':memory:')

#write the tables

terms.to_sql('terms', conn, index=False)

presidents.to_sql('presidents', conn, index=False)

war_declarations.to_sql('wars', conn, index=False)


qry = '''

    select  

        start_date PresTermStart,

        end_date PresTermEnd,

        wars.date WarStart,

        presidents.name Pres

    from

        terms join wars on

        date between start_date and end_date join presidents on

        terms.president_id = presidents.president_id

    '''

df = pd.read_sql_query(qry, conn)



References:

https://stackoverflow.com/questions/30627968/merge-pandas-dataframes-where-one-value-is-between-two-others



No comments:

Post a Comment