Data Inspection-6

Back To Index

Aim : Using generic function to inspect continuous variables.

0_data_inspection_6
In [2]:
###################################################
#  Filename : 0_data_inspection_6                 #
#  Purpose : To demonstrate data inspection       #
#   using Euler toolkit                           #
#            1. Use generic function for          # 
#            continuous variable inspection       #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
###################################################

#Following lines add Euler into the search path
import sys
sys.path.append('C:\\Users\\singa72\\Desktop\\Euler\\')
import Euler as Eu

data_folder = 'C:\\Users\\singa72\\Desktop\\Tutorial2\\bank-additional\\bank-additional\\'
data_work   = data_folder+'data_work.db'



#M A I N   F U N C T I O N  
def main():
    conn = Eu.connection(data_work)
    try: 
        
        inspect_continuous_var(varName='age',conn=conn)
        inspect_continuous_var(varName='duration',conn=conn)
        
        #Exercise : Find other continous variables (Use Data Dictionary on Synopsis Page).
        #Exercise : Call inspection on those variables.
        
    except Exception as err:
        Eu.print_error(err)
    finally:
        conn.close()

def inspect_continuous_var(varName,conn):
    try:
        #Min, Max, Mean
        sql = '''
        select min(?var?*1.) min_?var?, 
        round(avg(?var?*1.),0) mean_?var?, 
        max(?var?*1.) max_?var?
        from  bank
        '''.replace('?var?',varName)
        print ('Statistics for: '+varName)
        Eu.run(sql,conn)
        
        #Min, Max, Mean by class type
        sql = '''
        select y,count(*) n_people, 
        min(?var?*1.) min_?var?, 
        round(avg(?var?*1.),0) mean_?var?, 
        max(?var?*1.) max_?var?
        from  bank
        group by y
        '''.replace('?var?',varName)
        print ('Statistics for: '+varName+ ', grouped by y')
        Eu.run(sql,conn)
            
    except Exception as err:
        Eu.print_error(err)

if __name__ == '__main__':
    main()
Statistics for: age
========================
min_age,mean_age,max_age
========================
17.0,40.0,98.0
========================

Statistics for: age, grouped by y
===================================
y,n_people,min_age,mean_age,max_age
===================================
no,36548,17.0,40.0,95.0
yes,4640,17.0,41.0,98.0
===================================

Statistics for: duration
=======================================
min_duration,mean_duration,max_duration
=======================================
0.0,258.0,4918.0
=======================================

Statistics for: duration, grouped by y
==================================================
y,n_people,min_duration,mean_duration,max_duration
==================================================
no,36548,0.0,221.0,4918.0
yes,4640,37.0,553.0,4199.0
==================================================

Back To Index