Monday, 22 July 2013

Sqlite3 and Python: How to join two database tables created in Python using Sqlite3 module

I am assuming that readers of this post has working knowledge of python and sqlite3 module, if not I have added detailed comments where necessary.
In brief what it does is it takes two database tables and joins them together, including any duplicate columns only once.
For example: If first database table has columns (Id, Date, time, col1, col2, col3) and second database table has columns (Id, Data, time, col4, col5, col6) then output of function join_database() will be a new database table containing columns(Id, Date, Time, col1, col2, col3, col4, col5, col6).
You are always welcome to download the source code from here.


 import sqlite3 as lite  
 import sys  
   
 def join_database(database1Name, database2Name, database3Name):  
  ''' Join two database 'database1Name' and 'database2Name' into database 'database3Name' '''  
    
  con1 = lite.connect(database1Name)      # Connect to Database1  
  con2 = lite.connect(database2Name)      # Connect to Database2  
  con3 = lite.connect(database3Name)      # Connect to Database3  
   
  con1.row_factory = lite.Row         # For reading column names from Database1  
  con2.row_factory = lite.Row         # For reading column names from Database2  
   
  con1_new = lite.connect(database1Name)    # Create new connection with Database1 for reading data  
  con2_new = lite.connect(database2Name)    # Create new connection with Database2 for reading data  
   
  with con1, con2, con3, con1_new, con2_new:  
   cur1 = con1.cursor()            # Get cursor from connection 1 (con1)  
   cur2 = con2.cursor()            # Get cursor from connection 2 (con2)  
   cur3 = con3.cursor()            # Get cursor from connection 3 (con2)  
    
   cur1_new = con1_new.cursor()        # Get connection from connection 1_new (con1_new)  
   cur2_new = con2_new.cursor()        # Get connection from connection 2_new (con2_new)  
     
   cur1.execute("SELECT * FROM your_table_name")  # Select all column names from table your_table_name in Database1 (Why column names see line no. 11)  
   cur2.execute("SELECT * FROM your_table_name")  # Select all column names from table your_table_name in Database2 (Why column names see line no. 12)  
    
   row_1 = cur1.fetchone()      # Fetch column names from database1 table  
   row_2 = cur2.fetchone()      # Fetch column names from database2 table  
    
   columnNames = row_1.keys()    # Read column names from Database1  
     
   for item in row_2.keys():     # Append column names from Database1 with Database2, including duplicate column names only once (See description)  
    if item not in row_1.keys():  
     columnNames.append(item)  
    
   cur3.execute("CREATE TABLE IF NOT EXISTS your_table_name(Id INTEGER PRIMARY KEY, Date TEXT(10), Time TEXT(8))")  # You can change this line according to your needs  
     
   for col in columnNames[3:]:  
    cur3.execute("ALTER TABLE your_table_name ADD {} DOUBLE".format(col)) # You can change the data type(here it is DOUBLE) according to your needs  
    
   cur1_new.execute("SELECT * FROM your_table_name")  # Select complete data at once from table your_table_name in database1  
   cur2_new.execute("SELECT * FROM your_table_name")  # Select complete data at once from table your_table_name in database2  
    
   rows1 = cur1_new.fetchall()   # Read all rows from Database1  
   rows2 = cur2_new.fetchall()   # Read all rows from Database2  
    
   rows1 = [r[1:] for r in rows1]  # Except first row in rows1 read all rows  
   rows2 = [r[3:] for r in rows2]  # Except first three rows in rows2 read all rows  
    
   rows = [r1 + r2 for r1, r2 in zip(rows1, rows2)] #Combine each rows in rows1 and rows2 element wise  
    
   ques = []            # Use by sqlite for inserting into table  
     
   ques = ["?"]*len(columnNames[1:])  # Generate list [?, ?, ?, ?,........till length equals length of columnNames[1:]  
   ques = ",".join(ques)        # Generate string "?,?,?,?,?........"  
     
   columnNames = ",".join(columnNames[1:]) # Generate string "col1, col2, col3............"  
      
   for item in rows:        # Insert combined data into new Database3  
    cur3.execute("INSERT INTO your_table_name({0}) VALUES ({1})".format(columnNames, ques), item)  
      
 if __name__ == '__main__':  
  pass  

No comments:

Post a Comment