diff options
author | Alex Auvolat <alex.auvolat@ens.fr> | 2015-07-25 16:21:08 -0400 |
---|---|---|
committer | Alex Auvolat <alex.auvolat@ens.fr> | 2015-07-25 16:21:08 -0400 |
commit | 8876175ad46d43e1a387b11ad398defac588b901 (patch) | |
tree | d498a41c6745d6300dcf858b6f546988696f5c2f | |
parent | 4b0c1a83bc8028983eeb4c4bda19e05954cb6ac2 (diff) | |
download | taxi-8876175ad46d43e1a387b11ad398defac588b901.tar.gz taxi-8876175ad46d43e1a387b11ad398defac588b901.zip |
Add memory_net_bidir_mom
-rw-r--r-- | config/bidirectional_tgtcls_1_momentum.py | 4 | ||||
-rw-r--r-- | config/memory_network_bidir_momentum.py | 58 |
2 files changed, 60 insertions, 2 deletions
diff --git a/config/bidirectional_tgtcls_1_momentum.py b/config/bidirectional_tgtcls_1_momentum.py index b286e0f..65ad021 100644 --- a/config/bidirectional_tgtcls_1_momentum.py +++ b/config/bidirectional_tgtcls_1_momentum.py @@ -27,12 +27,12 @@ embed_weights_init = IsotropicGaussian(0.01) weights_init = IsotropicGaussian(0.1) biases_init = Constant(0.01) -batch_size = 100 +batch_size = 300 batch_sort_size = 20 max_splits = 100 # monitor_freq = 10000 # temporary, for finding good learning rate -step_rule= Momentum(learning_rate=0.1, momentum=0.9) +step_rule= Momentum(learning_rate=0.01, momentum=0.9) diff --git a/config/memory_network_bidir_momentum.py b/config/memory_network_bidir_momentum.py new file mode 100644 index 0000000..e5863ae --- /dev/null +++ b/config/memory_network_bidir_momentum.py @@ -0,0 +1,58 @@ +from blocks.initialization import IsotropicGaussian, Constant +from blocks.algorithms import Momentum + +from blocks.bricks import Tanh + +import data +from model.memory_network_bidir import Model, Stream + + +dim_embeddings = [ + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), + ('week_of_year', 52, 10), + ('day_of_week', 7, 10), + ('qhour_of_day', 24 * 4, 10), + ('day_type', 3, 10), +] + +embed_weights_init = IsotropicGaussian(0.001) + + +class RNNConfig(object): + __slots__ = ('rec_state_dim', 'dim_embeddings', 'embed_weights_init', + 'dim_hidden', 'weights_init', 'biases_init') + +prefix_encoder = RNNConfig() +prefix_encoder.dim_embeddings = dim_embeddings +prefix_encoder.embed_weights_init = embed_weights_init +prefix_encoder.rec_state_dim = 100 +prefix_encoder.dim_hidden = [100, 100] +prefix_encoder.weights_init = IsotropicGaussian(0.01) +prefix_encoder.biases_init = Constant(0.001) + +candidate_encoder = RNNConfig() +candidate_encoder.dim_embeddings = dim_embeddings +candidate_encoder.embed_weights_init = embed_weights_init +candidate_encoder.rec_state_dim = 100 +candidate_encoder.dim_hidden = [100, 100] +candidate_encoder.weights_init = IsotropicGaussian(0.01) +candidate_encoder.biases_init = Constant(0.001) + +representation_size = 100 +representation_activation = Tanh + +normalize_representation = True + + +batch_size = 32 +batch_sort_size = 20 + +max_splits = 100 +num_cuts = 1000 + +train_candidate_size = 1000 +valid_candidate_size = 1000 +test_candidate_size = 1000 + +step_rule = Momentum(learning_rate=0.01, momentum=0.9) |