From 88cdc3f8047a05bc5971eaa915ca6626f89a3e78 Mon Sep 17 00:00:00 2001 From: AdeB Date: Wed, 24 Jun 2015 15:12:15 -0400 Subject: New configs. training step rule out of train.py --- config/dest_simple_mlp_emb_only.py | 22 +++++++++--------- config/memory_network_adeb.py | 46 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 11 deletions(-) create mode 100644 config/memory_network_adeb.py (limited to 'config') diff --git a/config/dest_simple_mlp_emb_only.py b/config/dest_simple_mlp_emb_only.py index e5c91b8..76acdfa 100644 --- a/config/dest_simple_mlp_emb_only.py +++ b/config/dest_simple_mlp_emb_only.py @@ -6,26 +6,26 @@ from model.mlp_emb import Model, Stream use_cuts_for_training = True dim_embeddings = [ - ('origin_call', data.origin_call_train_size, 10), - ('origin_stand', data.stands_size, 10), - ('week_of_year', 52, 10), - ('day_of_week', 7, 10), + # ('origin_call', data.origin_call_train_size, 100), + # ('origin_stand', data.stands_size, 100), + # ('week_of_year', 52, 100), + # ('day_of_week', 7, 100), ('qhour_of_day', 24 * 4, 10), - ('day_type', 3, 10), + ('day_type', 3, 1), ] dim_input = sum(x for (_, _, x) in dim_embeddings) -dim_hidden = [200, 100] +dim_hidden = [10, 10] output_mode = "destination" dim_output = 2 -embed_weights_init = IsotropicGaussian(0.001) +embed_weights_init = IsotropicGaussian(0.01) mlp_weights_init = IsotropicGaussian(0.01) -mlp_biases_init = Constant(0.001) +mlp_biases_init = IsotropicGaussian(0.001) -learning_rate = 0.0001 -momentum = 0.99 -batch_size = 32 +learning_rate = 0.001 +momentum = 0.9 +batch_size = 100 valid_set = 'cuts/test_times_0' max_splits = 100 diff --git a/config/memory_network_adeb.py b/config/memory_network_adeb.py new file mode 100644 index 0000000..1d7dc5d --- /dev/null +++ b/config/memory_network_adeb.py @@ -0,0 +1,46 @@ +from blocks.initialization import IsotropicGaussian, Constant +from blocks.algorithms import AdaDelta, CompositeRule, GradientDescent, RemoveNotFinite, StepRule, Momentum + +import data +from model.memory_network import Model, Stream + + +n_begin_end_pts = 5 # how many points we consider at the beginning and end of the known trajectory + +dim_embeddings = [ + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), + ('week_of_year', 52, 10), + ('day_of_week', 7, 10), + ('qhour_of_day', 24 * 4, 10), + ('day_type', 3, 10), +] + + +class MLPConfig(object): + __slots__ = ('dim_input', 'dim_hidden', 'dim_output', 'weights_init', 'biases_init') + +prefix_encoder = MLPConfig() +prefix_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +prefix_encoder.dim_hidden = [100, 100] +prefix_encoder.weights_init = IsotropicGaussian(0.001) +prefix_encoder.biases_init = Constant(0.0001) + +candidate_encoder = MLPConfig() +candidate_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +candidate_encoder.dim_hidden = [100, 100] +candidate_encoder.weights_init = IsotropicGaussian(0.001) +candidate_encoder.biases_init = Constant(0.0001) + + +embed_weights_init = IsotropicGaussian(0.001) + +step_rule = Momentum(learning_rate=0.001, momentum=0.9) +batch_size = 32 + +valid_set = 'cuts/test_times_0' +max_splits = 1 +num_cuts = 1000 + +train_candidate_size = 1000 +valid_candidate_size = 10000 -- cgit v1.2.3