Unify parameters for joint_simple_tgtcls_111_cswdtx_bigger{,_dropout}

author: Alex Auvolat <alex.auvolat@ens.fr> 2015-07-02 11:15:37 -0400
committer: Alex Auvolat <alex.auvolat@ens.fr> 2015-07-02 11:18:45 -0400
commit: 3f3ab2bfe3ebfa266d433012be1c89c722d63352 (patch)
tree: 589915018911ec364dccb4b897ab108913be464f /config
parent: 32b078f28add3d22529e55aeac6674d924e9b510 (diff)
download: taxi-3f3ab2bfe3ebfa266d433012be1c89c722d63352.tar.gz
taxi-3f3ab2bfe3ebfa266d433012be1c89c722d63352.zip
3 files changed, 8 insertions, 9 deletions
diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py
index 93ff5c7..8e991a1 100644
--- a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py
+++ b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py
@@ -29,14 +29,14 @@ dim_embeddings = [
 
 # Common network part
 dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings)
-dim_hidden = [1000]
+dim_hidden = [5000]
 
 # Destination prediction part
-dim_hidden_dest = [400]
+dim_hidden_dest = [1000]
 dim_output_dest = dest_tgtcls.shape[0]
 
 # Time prediction part
-dim_hidden_time = [400]
+dim_hidden_time = [500]
 dim_output_time = len(time_tgtcls)
 
 # Cost ratio between distance cost and time cost
@@ -46,8 +46,7 @@ embed_weights_init = IsotropicGaussian(0.01)
 mlp_weights_init = IsotropicGaussian(0.1)
 mlp_biases_init = Constant(0.01)
 
-learning_rate = 0.000001
-momentum = 0.99
+# use adadelta, so no learning_rate or momentum
 batch_size = 200
 
 valid_set = 'cuts/test_times_0'
diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
index b047d7c..e1bd840 100644
--- a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
+++ b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
@@ -46,11 +46,10 @@ embed_weights_init = IsotropicGaussian(0.01)
 mlp_weights_init = IsotropicGaussian(0.1)
 mlp_biases_init = Constant(0.01)
 
-# apply_dropout = True
-# dropout_p = 0.5
+apply_dropout = True
+dropout_p = 0.5
 
-learning_rate = 0.001
-momentum = 0.9
+# use adadelta, so no learning_rate or momentum
 batch_size = 200
 
 valid_set = 'cuts/test_times_0'
diff --git a/config/memory_network_1.py b/config/memory_network_1.py
index 68e23bd..813c9d2 100644
--- a/config/memory_network_1.py
+++ b/config/memory_network_1.py
@@ -31,6 +31,7 @@ candidate_encoder.dim_hidden = [100, 100, 100]
 candidate_encoder.weights_init = IsotropicGaussian(0.01)
 candidate_encoder.biases_init = Constant(0.001)
 
+normalize_representation = True
 
 embed_weights_init = IsotropicGaussian(0.001)
author	Alex Auvolat <alex.auvolat@ens.fr>	2015-07-02 11:15:37 -0400
committer	Alex Auvolat <alex.auvolat@ens.fr>	2015-07-02 11:18:45 -0400
commit	3f3ab2bfe3ebfa266d433012be1c89c722d63352 (patch)
tree	589915018911ec364dccb4b897ab108913be464f /config
parent	32b078f28add3d22529e55aeac6674d924e9b510 (diff)
download	taxi-3f3ab2bfe3ebfa266d433012be1c89c722d63352.tar.gz taxi-3f3ab2bfe3ebfa266d433012be1c89c722d63352.zip