Data Preparation 1

Back To Index

2_data_preparation_1
In [1]:
###################################################
#  Filename : 2_data_preparation_1                #
#  Purpose : To demonstrate how to construct      #
#   a coding dictionary for a discrete variable.  # 
#   We use likelihood encoding.                   #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
###################################################
import sys
sys.path.append('C:\\Users\\singa72\\Desktop\\Euler\\')

import Euler as Eu
from matplotlib import pyplot as plt

data_folder = 'C:\\Users\\singa72\\Desktop\\Tutorial2\\bank-additional\\bank-additional\\'
data_work   = data_folder+'data_work.db'

#M A I N   F U N C T I O N 
def main():
    #Make a coding dictionary for age;
    conn = Eu.connection(data_work)

    #Analyse following example
    sql = '''
    SELECT
    marital,
    sum( CASE WHEN Y = 'yes' THEN 1 ELSE 0 END) n_ppl_pass,
    count(*) n_ppl_all
    from bank
    group by marital
    '''
    
    Eu.run(sql,conn=conn)
    
    #Analyse this extension
    
    sql = '''
    SELECT
    marital,
    CASE WHEN n_ppl_all IS 0 THEN 0
    ELSE round(n_ppl_pass/n_ppl_all,4) END code
    FROM
        (
    SELECT
    marital,
    sum( CASE WHEN Y = 'yes' THEN 1. ELSE 0 END) n_ppl_pass,
    count(*) n_ppl_all
    from bank
    group by marital
    ) as T1
    '''
    
    Eu.run(sql,conn=conn)
    
    
    #Uncomment follwing to create a 
    # persistent coding dictionary
    """
    sql = '''
    DROP TABLE if exists marital_coding;
    CREATE TABLE marital_coding AS
    SELECT
    marital,
    CASE WHEN n_ppl_all IS 0 THEN 0
    ELSE round(n_ppl_pass/n_ppl_all,4) END code
    FROM
        (
    SELECT
    marital,
    sum( CASE WHEN Y = 'yes' THEN 1. ELSE 0 END) n_ppl_pass,
    count(*) n_ppl_all
    from bank
    group by marital
    ) as T1
    '''

    Eu.execute(sql,conn=conn)
    

    Eu.run('select * from marital_coding',conn)

    """

def probability_encoding_marital(conn):
    '''
    Exercise: Fix this function and call from main.
    '''
    try:
        sql = '''
        DROP TABLE if exists marital_coding;
        CREATE TABLE marital_coding AS
        
        '''

        Eu.execute(sql,conn=conn)

    except Exception as err:
        Eu.print_error(err)    
    
    
if __name__ == '__main__':
    main()
************************************************
*                    EULER                     *
*    A SQLITE POWERED DATA SCIENCE TOOLKIT     *
*          SINGH.AP79@GMAIL.NOSPAM.COM         *
************************************************

============================
marital,n_ppl_pass,n_ppl_all
============================
divorced,476,4612
married,2532,24928
single,1620,11568
unknown,12,80
============================

============
marital,code
============
divorced,0.1032
married,0.1016
single,0.14
unknown,0.15
============

Back To Index