# Data Inspection-5

Back To Index

Aim : Develop a generic function for inspecting continuous variables.

0_data_inspection_5
In [1]:
###################################################
#  Filename : 0_data_inspection_5                 #
#  Purpose : To demonstrate data inspection       #
#   using Euler toolkit                           #
#            1. Create generic function for       #
#            continuous variable inspection       #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
###################################################

#Following lines add Euler into the search path
import sys
sys.path.append('C:\\Users\\singa72\\Desktop\\Euler\\')
import Euler as Eu

data_work   = data_folder+'data_work.db'

#M A I N   F U N C T I O N
def main():
conn = Eu.connection(data_work)
try:

sql = '''
SELECT sql FROM sqlite_master
WHERE tbl_name = 'bank' AND type = 'table'
'''
#Eu.run(sql,conn)

##A look at the 'age'
sql = '''
select age*1. from bank limit 10
'''
Eu.run(sql,conn)

###Average age in sample
sql = '''
select avg(1.*age) mean_age  from bank
'''
###Eu.run(sql,conn)

####Min, Max, Mean
sql = '''
select min(age*1.) min_age,
round(avg(age*1.),0) mean_age,
max(age*1.) max_age
from  bank
'''
####Eu.run(sql,conn)

#####Min, Max, Mean by class type
sql = '''
select y,count(*) n_people,
min(age*1.) min_age,
round(avg(age*1.),0) mean_age,
max(age*1.) max_age
from  bank
group by y
'''
#####Eu.run(sql,conn)

# Exercise: Fix the function
#####inspect_continuous_var(varName='duration',conn=conn)
#####inspect_continuous_var(varName='age',conn=conn)

except Exception as err:
Eu.print_error(err)
finally:
conn.close()

def inspect_continuous_var(varName,conn):
'''
Exercise: Fix this function so that
it fetches information for 'varName'
'''
try:
#Min, Max, Mean
sql = '''
select min(age*1.) min_age,
round(avg(age*1.),0) mean_age,
max(age*1.) max_age
from  bank
'''
print ('Statistics for: '+varName)
Eu.run(sql,conn)

#Min, Max, Mean by class type
sql = '''
select y,count(*) n_people,
min(age*1.) min_age,
round(avg(age*1.),0) mean_age,
max(age*1.) max_age
from  bank
group by y
'''
print ('Statistics for: '+varName+ ', grouped by y')
Eu.run(sql,conn)

except Exception as err:
Eu.print_error(err)

if __name__ == '__main__':
main()

************************************************
*                    EULER                     *
*    A SQLITE POWERED DATA SCIENCE TOOLKIT     *
*          SINGH.AP79@GMAIL.NOSPAM.COM         *
************************************************

======
age*1.
======
56.0
57.0
37.0
40.0
56.0
45.0
59.0
41.0
24.0
25.0
======



Back To Index