# Data Inspection-6

Back To Index

Aim : Using generic function to inspect continuous variables.

0_data_inspection_6
In [2]:
###################################################
#  Filename : 0_data_inspection_6                 #
#  Purpose : To demonstrate data inspection       #
#   using Euler toolkit                           #
#            1. Use generic function for          #
#            continuous variable inspection       #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
###################################################

#Following lines add Euler into the search path
import sys
sys.path.append('C:\\Users\\singa72\\Desktop\\Euler\\')
import Euler as Eu

data_work   = data_folder+'data_work.db'

#M A I N   F U N C T I O N
def main():
conn = Eu.connection(data_work)
try:

inspect_continuous_var(varName='age',conn=conn)
inspect_continuous_var(varName='duration',conn=conn)

#Exercise : Find other continous variables (Use Data Dictionary on Synopsis Page).
#Exercise : Call inspection on those variables.

except Exception as err:
Eu.print_error(err)
finally:
conn.close()

def inspect_continuous_var(varName,conn):
try:
#Min, Max, Mean
sql = '''
select min(?var?*1.) min_?var?,
round(avg(?var?*1.),0) mean_?var?,
max(?var?*1.) max_?var?
from  bank
'''.replace('?var?',varName)
print ('Statistics for: '+varName)
Eu.run(sql,conn)

#Min, Max, Mean by class type
sql = '''
select y,count(*) n_people,
min(?var?*1.) min_?var?,
round(avg(?var?*1.),0) mean_?var?,
max(?var?*1.) max_?var?
from  bank
group by y
'''.replace('?var?',varName)
print ('Statistics for: '+varName+ ', grouped by y')
Eu.run(sql,conn)

except Exception as err:
Eu.print_error(err)

if __name__ == '__main__':
main()

Statistics for: age
========================
min_age,mean_age,max_age
========================
17.0,40.0,98.0
========================

Statistics for: age, grouped by y
===================================
y,n_people,min_age,mean_age,max_age
===================================
no,36548,17.0,40.0,95.0
yes,4640,17.0,41.0,98.0
===================================

Statistics for: duration
=======================================
min_duration,mean_duration,max_duration
=======================================
0.0,258.0,4918.0
=======================================

Statistics for: duration, grouped by y
==================================================
y,n_people,min_duration,mean_duration,max_duration
==================================================
no,36548,0.0,221.0,4918.0
yes,4640,37.0,553.0,4199.0
==================================================



Back To Index