From 1f2ff96e6480a62089fcac35154a956c218ed678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Simon?= Date: Tue, 5 May 2015 21:55:13 -0400 Subject: Clean data module and generalize use of hdf5. --- config/dest_simple_mlp_2_cs.py | 6 ++++-- config/dest_simple_mlp_2_cswdt.py | 6 ++++-- config/dest_simple_mlp_2_noembed.py | 2 ++ config/dest_simple_mlp_tgtcls_0_cs.py | 8 +++++--- config/dest_simple_mlp_tgtcls_1_cs.py | 8 +++++--- config/dest_simple_mlp_tgtcls_1_cswdt.py | 8 +++++--- config/dest_simple_mlp_tgtcls_1_cswdtx.py | 8 +++++--- 7 files changed, 30 insertions(+), 16 deletions(-) (limited to 'config') diff --git a/config/dest_simple_mlp_2_cs.py b/config/dest_simple_mlp_2_cs.py index 2cec78d..0dd2704 100644 --- a/config/dest_simple_mlp_2_cs.py +++ b/config/dest_simple_mlp_2_cs.py @@ -8,8 +8,8 @@ n_end_pts = 5 n_valid = 1000 dim_embeddings = [ - ('origin_call', data.n_train_clients+1, 10), - ('origin_stand', data.n_stands+1, 10) + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10) ] dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) @@ -19,3 +19,5 @@ dim_output = 2 learning_rate = 0.0001 momentum = 0.99 batch_size = 32 + +valid_set = 'cuts/test_times_0' diff --git a/config/dest_simple_mlp_2_cswdt.py b/config/dest_simple_mlp_2_cswdt.py index f6ddf34..1011488 100644 --- a/config/dest_simple_mlp_2_cswdt.py +++ b/config/dest_simple_mlp_2_cswdt.py @@ -8,8 +8,8 @@ n_end_pts = 5 n_valid = 1000 dim_embeddings = [ - ('origin_call', data.n_train_clients+1, 10), - ('origin_stand', data.n_stands+1, 10), + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), ('week_of_year', 52, 10), ('day_of_week', 7, 10), ('qhour_of_day', 24 * 4, 10), @@ -23,3 +23,5 @@ dim_output = 2 learning_rate = 0.0001 momentum = 0.99 batch_size = 32 + +valid_set = 'cuts/test_times_0' diff --git a/config/dest_simple_mlp_2_noembed.py b/config/dest_simple_mlp_2_noembed.py index 3832146..3cddcb9 100644 --- a/config/dest_simple_mlp_2_noembed.py +++ b/config/dest_simple_mlp_2_noembed.py @@ -16,3 +16,5 @@ dim_output = 2 learning_rate = 0.0001 momentum = 0.99 batch_size = 32 + +valid_set = 'cuts/test_times_0' diff --git a/config/dest_simple_mlp_tgtcls_0_cs.py b/config/dest_simple_mlp_tgtcls_0_cs.py index a8a5a0e..031cd12 100644 --- a/config/dest_simple_mlp_tgtcls_0_cs.py +++ b/config/dest_simple_mlp_tgtcls_0_cs.py @@ -9,11 +9,11 @@ n_end_pts = 5 n_valid = 1000 -with open(data.DATA_PATH + "/arrival-clusters.pkl") as f: tgtcls = cPickle.load(f) +with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) dim_embeddings = [ - ('origin_call', data.n_train_clients+1, 10), - ('origin_stand', data.n_stands+1, 10) + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10) ] dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) @@ -23,3 +23,5 @@ dim_output = tgtcls.shape[0] learning_rate = 0.0001 momentum = 0.99 batch_size = 32 + +valid_set = 'cuts/test_times_0' diff --git a/config/dest_simple_mlp_tgtcls_1_cs.py b/config/dest_simple_mlp_tgtcls_1_cs.py index 8136f10..48d9fa0 100644 --- a/config/dest_simple_mlp_tgtcls_1_cs.py +++ b/config/dest_simple_mlp_tgtcls_1_cs.py @@ -9,11 +9,11 @@ n_end_pts = 5 n_valid = 1000 -with open(data.DATA_PATH + "/arrival-clusters.pkl") as f: tgtcls = cPickle.load(f) +with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) dim_embeddings = [ - ('origin_call', data.n_train_clients+1, 10), - ('origin_stand', data.n_stands+1, 10) + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10) ] dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) @@ -23,3 +23,5 @@ dim_output = tgtcls.shape[0] learning_rate = 0.0001 momentum = 0.99 batch_size = 32 + +valid_set = 'cuts/test_times_0' diff --git a/config/dest_simple_mlp_tgtcls_1_cswdt.py b/config/dest_simple_mlp_tgtcls_1_cswdt.py index af7b2a3..6aa2a03 100644 --- a/config/dest_simple_mlp_tgtcls_1_cswdt.py +++ b/config/dest_simple_mlp_tgtcls_1_cswdt.py @@ -9,11 +9,11 @@ n_end_pts = 5 n_valid = 1000 -with open(data.DATA_PATH + "/arrival-clusters.pkl") as f: tgtcls = cPickle.load(f) +with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) dim_embeddings = [ - ('origin_call', data.n_train_clients+1, 10), - ('origin_stand', data.n_stands+1, 10), + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), ('week_of_year', 52, 10), ('day_of_week', 7, 10), ('qhour_of_day', 24 * 4, 10), @@ -27,3 +27,5 @@ dim_output = tgtcls.shape[0] learning_rate = 0.0001 momentum = 0.99 batch_size = 32 + +valid_set = 'cuts/test_times_0' diff --git a/config/dest_simple_mlp_tgtcls_1_cswdtx.py b/config/dest_simple_mlp_tgtcls_1_cswdtx.py index b9832df..7918242 100644 --- a/config/dest_simple_mlp_tgtcls_1_cswdtx.py +++ b/config/dest_simple_mlp_tgtcls_1_cswdtx.py @@ -9,11 +9,11 @@ n_end_pts = 5 n_valid = 1000 -with open(data.DATA_PATH + "/arrival-clusters.pkl") as f: tgtcls = cPickle.load(f) +with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) dim_embeddings = [ - ('origin_call', data.n_train_clients+1, 10), - ('origin_stand', data.n_stands+1, 10), + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), ('week_of_year', 52, 10), ('day_of_week', 7, 10), ('qhour_of_day', 24 * 4, 10), @@ -28,3 +28,5 @@ dim_output = tgtcls.shape[0] learning_rate = 0.0001 momentum = 0.99 batch_size = 32 + +valid_set = 'cuts/test_times_0' -- cgit v1.2.3