Data Inspection -3

Aim : Create a generic function for variable inspection using Euler toolkit.

Back To Index

0_data_inspection_3
In [1]:
###################################################
#  Filename : 0_data_inspection_3                 #
#  Purpose : To demonstrate data inspection       #
#   using Euler toolkit                           #
#            1. Create generic function for       # 
#            informal variable inspection         #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
###################################################

#Following lines add Euler into the search path
import sys
sys.path.append('C:\\Users\\singa72\\Desktop\\Euler\\')
import Euler as Eu

data_folder = 'C:\\Users\\singa72\\Desktop\\Tutorial2\\bank-additional\\bank-additional\\'
data_work   = data_folder+'data_work.db'

#M A I N   F U N C T I O N
def main():
    conn = Eu.connection(data_work)
    try:        
        sql = '''
        SELECT sql FROM sqlite_master
        WHERE tbl_name = 'bank' AND type = 'table'
        ''' 
        Eu.run(sql,conn)
        
        #Calling inspect marital function
        inspect_marital(conn)
        #Calling inspect marital function
        inspect_job(conn)
        
        #Calling inspect categorical function
        inspect_categorical_var(varName='job',conn=conn)
            
    except Exception as err:
        Eu.print_error(err)
    finally:
        conn.close()

def inspect_marital(conn):
    try:
        #A look at the 'marital'
        sql = '''
        select distinct marital from bank
        '''
        Eu.run(sql,conn)
                
        #Order the results by descending order of people
        sql = '''
        select marital,count(*) n_people from bank
        group by marital order by n_people desc
        '''
        Eu.run(sql,conn)
    except Exception as err:
        Eu.print_error(err)

        
def inspect_job(conn):
    try:
        #A look at the 'marital'
        sql = '''
        select distinct job from bank
        '''
        Eu.run(sql,conn)
                
        #Order the results by descending order of people
        sql = '''
        select job,count(*) n_people from bank
        group by job order by n_people desc
        '''
        Eu.run(sql,conn)
    except Exception as err:
        Eu.print_error(err)
    
    
def inspect_categorical_var(varName,conn):
    '''
    Informal Inspection of Variables
    Parameters:
     varName : Variable to be inspected.
     conn    : Connection to sqlite database.
    '''
    try:
        sql = '''
        select ?varN?, count(*) n_people from bank
        group by ?varN?
        '''
        sql = sql.replace('?varN?',varName)
        print (sql)
        Eu.run(sql,conn)
    except Exception as err:
        Eu.print_error(err)
        

if __name__ == '__main__':
    main()
************************************************
*                    EULER                     *
*    A SQLITE POWERED DATA SCIENCE TOOLKIT     *
*          SINGH.AP79@GMAIL.NOSPAM.COM         *
************************************************

===
sql
===
CREATE TABLE bank ( 
age       VARCHAR,
 job       VARCHAR,
 marital       VARCHAR,
 education       VARCHAR,
 deflt       VARCHAR,
 housing       VARCHAR,
 loan       VARCHAR,
 contact       VARCHAR,
 month       VARCHAR,
 day_of_week       VARCHAR,
 duration       VARCHAR,
 campaign       VARCHAR,
 pdays       VARCHAR,
 previous       VARCHAR,
 poutcome       VARCHAR,
 emp_var_rate       VARCHAR,
 cons_price_idx       VARCHAR,
 cons_conf_idx       VARCHAR,
 euribor_m       VARCHAR,
 nr_employed       VARCHAR,
 y       VARCHAR
 )
===

=======
marital
=======
married
single
divorced
unknown
=======

================
marital,n_people
================
married,24928
single,11568
divorced,4612
unknown,80
================

===
job
===
housemaid
services
admin.
blue-collar
technician
retired
management
unemployed
self-employed
unknown
entrepreneur
student
===

============
job,n_people
============
admin.,10422
blue-collar,9254
technician,6743
services,3969
management,2924
retired,1720
entrepreneur,1456
self-employed,1421
housemaid,1060
unemployed,1014
student,875
unknown,330
============


        select job, count(*) n_people from bank
        group by job
        
============
job,n_people
============
admin.,10422
blue-collar,9254
entrepreneur,1456
housemaid,1060
management,2924
retired,1720
self-employed,1421
services,3969
student,875
technician,6743
unemployed,1014
unknown,330
============

Back To Index