aboutsummaryrefslogtreecommitdiff
path: root/model.py
diff options
context:
space:
mode:
authorAlex Auvolat <alex.auvolat@ens.fr>2015-04-27 19:17:29 -0400
committerAlex Auvolat <alex.auvolat@ens.fr>2015-04-27 19:17:29 -0400
commit902a8dcb40b3da9492093edd5bda356240f29eb0 (patch)
tree7fa12de23a33f8e39fc2538077c995244999e4cd /model.py
parentcdd050295c3c6df780bdc65088959d908b2cf2a5 (diff)
downloadtaxi-902a8dcb40b3da9492093edd5bda356240f29eb0.tar.gz
taxi-902a8dcb40b3da9492093edd5bda356240f29eb0.zip
Add normalization
Diffstat (limited to 'model.py')
-rw-r--r--model.py24
1 files changed, 17 insertions, 7 deletions
diff --git a/model.py b/model.py
index c3ada3e..e104990 100644
--- a/model.py
+++ b/model.py
@@ -21,7 +21,7 @@ from fuel.transformers import Batch
from fuel.streams import DataStream
from fuel.schemes import ConstantScheme
-from blocks.algorithms import GradientDescent, Scale, AdaDelta
+from blocks.algorithms import GradientDescent, Scale, AdaDelta, Momentum
from blocks.graph import ComputationGraph
from blocks.main_loop import MainLoop
from blocks.extensions import Printing
@@ -45,13 +45,22 @@ n_end_pts = 5
dim_embed = 50
dim_hidden = 200
-learning_rate = 0.01
+learning_rate = 0.002
+momentum = 0.9
batch_size = 32
def main():
# The input and the targets
x_firstk = tensor.matrix('first_k')
+ n = x_firstk.shape[0]
+ x_firstk = (x_firstk.reshape((n, n_begin_end_pts, 2)) - data.porto_center[None, None, :]) / data.data_std[None, None, :]
+ x_firstk = x_firstk.reshape((n, 2 * n_begin_end_pts))
+
x_lastk = tensor.matrix('last_k')
+ n = x_lastk.shape[0]
+ x_lastk = (x_lastk.reshape((n, n_begin_end_pts, 2)) - data.porto_center[None, None, :]) / data.data_std[None, None, :]
+ x_lastk = x_lastk.reshape((n, 2 * n_begin_end_pts))
+
x_client = tensor.lvector('origin_call')
x_stand = tensor.lvector('origin_stand')
y = tensor.matrix('destination')
@@ -75,9 +84,10 @@ def main():
# hidden = theano.printing.Print("hidden")(hidden)
outputs = output_layer.apply(hidden)
+ # Normalize & Center
+ outputs = data.data_std * outputs + data.porto_center
+
# Calculate the cost
- # cost = (outputs - y).norm(2, axis=1).mean()
- # outputs = numpy.array([[ -8.621953, 41.162142]], dtype='float32') + 0 * outputs
cost = (outputs - y).norm(2, axis=1).mean()
cost.name = 'cost'
hcost = hdist.hdist(outputs, y).mean()
@@ -88,7 +98,7 @@ def main():
stand_embed_table.weights_init = IsotropicGaussian(0.001)
hidden_layer.weights_init = IsotropicGaussian(0.01)
hidden_layer.biases_init = Constant(0.001)
- output_layer.weights_init = IsotropicGaussian(0.001)
+ output_layer.weights_init = IsotropicGaussian(0.01)
output_layer.biases_init = Constant(0.001)
client_embed_table.initialize()
@@ -119,8 +129,8 @@ def main():
params = VariableFilter(bricks=[Linear])(cg.parameters)
algorithm = GradientDescent(
cost=cost,
- step_rule=AdaDelta(decay_rate=0.5),
- # step_rule=Scale(learning_rate=learning_rate),
+ # step_rule=AdaDelta(decay_rate=0.5),
+ step_rule=Momentum(learning_rate=learning_rate, momentum=momentum),
params=params)
extensions=[DataStreamMonitoring([cost, hcost], valid_stream,