python-featuretools新しいフィーチャーを生成

13735 ワード

import featuretools as ft

#   EntitySet
es = ft.EntitySet(id = 'clients')
#   entity EntitySet
es = es.entity_from_dataframe(entity_id = 'app', dataframe = app, index = 'SK_ID_CURR', variable_types=app_types)
es = es.entity_from_dataframe(entity_id = 'bureau', dataframe = bureau, index = 'SK_ID_BUREAU')
es = es.entity_from_dataframe(entity_id = 'previous', dataframe = previous, index = 'SK_ID_PREV', variable_types= previous_types )
es = es.entity_from_dataframe(entity_id = 'bureau_balance', dataframe = bureau_balance, 
                             make_index = True, index = 'bureaubalance_index')

es = es.entity_from_dataframe(entity_id = 'cash', dataframe = cash, 
                             make_index = True, index = 'cash_index')

es = es.entity_from_dataframe(entity_id = 'installments', dataframe = installments,
                             make_index = True, index = 'installments_index')

es = es.entity_from_dataframe(entity_id = 'credit', dataframe = credit,
                             make_index = True, index = 'credit_index')
#           
r_app_bureau = ft.Relationship(es['app']['SK_ID_CURR'], es['bureau']['SK_ID_CURR'])
r_bureau_balance = ft.Relationship(es['bureau']['SK_ID_BUREAU'], es['bureau_balance']['SK_ID_BUREAU'])
r_app_previous = ft.Relationship(es['app']['SK_ID_CURR'], es['previous']['SK_ID_CURR'])
r_previous_cash = ft.Relationship(es['previous']['SK_ID_PREV'], es['cash']['SK_ID_PREV'])
r_previous_installments = ft.Relationship(es['previous']['SK_ID_PREV'], es['installments']['SK_ID_PREV'])
r_previous_credit = ft.Relationship(es['previous']['SK_ID_PREV'], es['credit']['SK_ID_PREV'])
#       EntitySet
es = es.add_relationships([r_app_bureau, r_bureau_balance,  r_app_previous, r_previous_cash, r_previous_installments, r_previous_credit])
#   feature primitives,primitives             
primitives = ft.list_primitives()
#      premitives
agg_primitives = ["sum", "max", "min", "mean", "count", "percent_true", "num_unique", "mode"]
trans_primitives = ['percentile', 'and', 'diff']
# Deep feature synthesis,Deep feature synthesis            
feature_names = ft.dfs(entityset = es, target_entity = 'app', 
                        trans_primitives = trans_primitives,
                        agg_primitives = agg_primitives, 
                        max_depth = 3, n_jobs = 1, verbose = 1,
                        features_only = True)
#            
ft.save_features(features, '../input/features.txt')
#       features
feature_defs = ft.load_features(external_path + '/input/features.txt')