Data Ingestion & Inspection – 1

Back To Index

Aim : Create a table inside SQLITE database from CSV data using Euler toolkit.

0_data_inspection_1
In [4]:
###################################################
#  Filename : 0_data_inspection_1                 #
#  Purpose : To demonstrate data ingestion and    #
#   basic sniffing using Euler toolkit            #
#            1. Create working database           # 
#            2. Run simple SQL queries            #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
###################################################

#Following lines ad Euler into the search path
import sys
sys.path.append('C:\\Users\\singa72\\Desktop\\')
from Euler import Euler as Eu

#G L O B A L  V A R I A B L E S
#- - - - - - - - - - - - - - - -  - - 
data_folder = 'C:\\Users\\singa72\\Desktop\\Tutorial2\\bank-additional\\bank-additional\\'
data_work   = data_folder+'data_work.db'


#M A I N  F U N C T I O N
#- - - - - - - - - - - - - - - - - - -
def main():
    conn = Eu.connection(data_work)
    try: 
        #Call data_ingestion to create a working database.
        #data_ingestion()
        
        #Check the column names
        sql = '''
        SELECT sql FROM sqlite_master
        WHERE tbl_name = 'bank' AND type = 'table'
        ''' 
        Eu.run(sql,conn)
        
        #A look at the column: 'marital'
        sql = '''
        select distinct marital from bank
        '''
        Eu.run(sql,conn)
        
        #Count how many entries for each value of marital
        sql = '''
        select marital, count(*) n_people from bank
        group by marital
        '''
        Eu.run(sql,conn)
        
        #Order the results by descending order of people
        sql = '''
        select marital,count(*) n_people from bank
        group by marital order by n_people desc
        '''
        Eu.run(sql,conn)
        
    except Exception as err:
        Eu.print_error(err)
    finally:
        conn.close()


def data_ingestion():
    '''
    This function reads CSV file
    and loads it into a SQLITE 
    database in Table form.
    '''
    try:
        csvfile = data_folder+'bank_dataset.csv'
        pcsv = Eu.pycsv_reader(csvfile)
        #Dump into the database. Specify the tablename,sqlite filename etc
        pcsv.to_database(tabName='bank',database=data_work,verbose=False)
      
    except Exception as err:
        Eu.print_error(err)
    finally:
        pcsv.close()

if __name__ == '__main__':
    main()
===
sql
===
CREATE TABLE bank ( 
age       VARCHAR,
 job       VARCHAR,
 marital       VARCHAR,
 education       VARCHAR,
 deflt       VARCHAR,
 housing       VARCHAR,
 loan       VARCHAR,
 contact       VARCHAR,
 month       VARCHAR,
 day_of_week       VARCHAR,
 duration       VARCHAR,
 campaign       VARCHAR,
 pdays       VARCHAR,
 previous       VARCHAR,
 poutcome       VARCHAR,
 emp_var_rate       VARCHAR,
 cons_price_idx       VARCHAR,
 cons_conf_idx       VARCHAR,
 euribor_m       VARCHAR,
 nr_employed       VARCHAR,
 y       VARCHAR
 )
===

=======
marital
=======
married
single
divorced
unknown
=======

==========
y,n_people
==========
no,36548
yes,4640
==========

================
marital,n_people
================
married,24928
single,11568
divorced,4612
unknown,80
================

Back To Index