Data Preparation 5

Back To Index

2_data_preparation_5
In [ ]:
###################################################
#  Filename : 2_data_preparation_5                #
#  Purpose : To prepare a modeling ready          # 
#   dataset, by applying coding from              #
#   code dictionaries.                            #
#  Author : Niel S.                               #
#  (c) The English Tea Company LLC                #
################################################### 
def main():
    conn = Eu.connection(data_work)
  
    try:
        #A list (tupple actually) of variables for probability encoding.
        vvarsL = ('marital','job','education', 'deflt', 
        'housing','contact','campaign','month', 'loan',
        'day_of_week','poutcome','previous',
        'age','duration','pdays','emp_var_rate',
        'cons_price_idx','cons_conf_idx',
        'euribor_m','nr_employed','y')
        
        vvars = ',\n'.join(vvarsL)
        
        #Just see what happened up there.
        print (vvars)
        
        #Remove following statement to demonstrate what comes below.
        return
    
        #Create a copy of the original table.
        # If you wish to drop some of the columns from 
        # modeling, now is the time.
        sql = '''
        DROP TABLE IF EXISTS bank_coded;
        CREATE TABLE bank_coded as

        select
        ?variable_list?
        from bank
        '''.replace('?variable_list?',vvars)
        
        
        print (sql)
        
        Eu.execute(sql,conn)
        
        #Remove following statement to demonstrate what comes below.
        return
        
        #Now replace actual values by codes.
        ##Note there are better/faster ways of doing 
        ###following but this one is the easiest to 
        ####understand.
        sql = '''
        UPDATE bank_coded 
        SET ?var? = (select code
        from ?var?_coding  
        where bank_coded.?var?=?var?_coding.?var?) 
        '''
        
        for v in vvarsL:
            sqli = sql.replace('?var?',v)
            Eu.execute(sqli,conn)
            Eu.run('select * from bank_coded limit 10',conn=conn)
        

        
    except Exception as err:
        Eu.print_error(err)
    finally:
        conn.close()




if __name__ == '__main__':
    main()

Back To Index