# Data Preparation-2

Back To Index

2_data_preparation_2
In [1]:
###################################################
#  Filename : 2_data_preparation_2                #
#  Purpose : To demonstrate how to construct      #
#   a coding dictionary for a discrete variable.  #
#   We use likelihood encoding.                   #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
###################################################
import sys
sys.path.append('C:\\Users\\singa72\\Desktop\\Euler\\')

import Euler as Eu
from matplotlib import pyplot as plt

data_folder = 'C:\\Users\\singa72\\Desktop\\Tutorial2\\bank-additional\\bank-additional\\'
data_work   = data_folder+'data_work.db'

#M A I N   F U N C T I O N
def main():
#Make a coding dictionary for age;
conn = Eu.connection(data_work)
try:

probability_encoding_marital(conn)
Eu.run('select * from marital_coding',conn)

#Exercise: Fix the function and uncomment following call
#probability_encoding_categorical(varName,conn)

except Exception as err:
Eu.print_error(err)
finally:
conn.close()

def probability_encoding_marital(conn):
try:
sql = '''
DROP TABLE if exists marital_coding;
CREATE TABLE marital_coding AS
SELECT
marital,
CASE WHEN n_ppl_all IS 0 THEN 0
ELSE round(n_ppl_pass/n_ppl_all,4) END code
FROM
(
SELECT
marital,
sum( CASE WHEN Y = 'yes' THEN 1. ELSE 0 END) n_ppl_pass,
count(*) n_ppl_all
from bank
group by marital
) as T1
'''

Eu.execute(sql,conn=conn)

except Exception as err:
Eu.print_error(err)

def probability_encoding_categorical(varName,conn):
'''
Generalize the sql from probability
encoding marital to create encodings for
all the variables.
'''

pass

if __name__ == '__main__':
main()

************************************************
*                    EULER                     *
*    A SQLITE POWERED DATA SCIENCE TOOLKIT     *
*          SINGH.AP79@GMAIL.NOSPAM.COM         *
************************************************

DROP TABLE if exists marital_coding

CREATE TABLE marital_coding AS
SELECT
marital,
CASE WHEN n_ppl_all IS 0 THEN 0
ELSE round(n_ppl_pass/n_ppl_all,4) END code
FROM
(
SELECT
marital,
sum( CASE WHEN Y = 'yes' THEN 1. ELSE 0 END) n_ppl_pass,
count(*) n_ppl_all
from bank
group by marital
) as T1

============
marital,code
============
divorced,0.1032
married,0.1016
single,0.14
unknown,0.15
============



Back To Index