2 files changed, 21 insertions, 7 deletions
diff --git a/data.py b/data.py
index d2c4f77..7708863 100644
--- a/data.py
+++ b/data.py
@@ -1,6 +1,7 @@
 import ast, csv
 import socket
 import fuel
+import numpy
 from enum import Enum
 from fuel.datasets import Dataset
 from fuel.streams import DataStream
@@ -13,6 +14,9 @@ else:
 
 client_ids = {int(x): y+1 for y, x in enumerate(open(DATA_PATH+"/client_ids.txt"))}
 
+porto_center = numpy.array([[ -8.61612, 41.1573]], dtype='float32')
+data_std = numpy.sqrt(numpy.array([[ 0.00333233, 0.00549598]], dtype='float32'))
+
 class CallType(Enum):
     CENTRAL = 0
     STAND = 1
diff --git a/model.py b/model.py
index c3ada3e..e104990 100644
--- a/model.py
+++ b/model.py
@@ -21,7 +21,7 @@ from fuel.transformers import Batch
 from fuel.streams import DataStream
 from fuel.schemes import ConstantScheme
 
-from blocks.algorithms import GradientDescent, Scale, AdaDelta
+from blocks.algorithms import GradientDescent, Scale, AdaDelta, Momentum
 from blocks.graph import ComputationGraph
 from blocks.main_loop import MainLoop
 from blocks.extensions import Printing
@@ -45,13 +45,22 @@ n_end_pts = 5
 dim_embed = 50
 dim_hidden = 200
 
-learning_rate = 0.01
+learning_rate = 0.002
+momentum = 0.9
 batch_size = 32
 
 def main():
     # The input and the targets
     x_firstk = tensor.matrix('first_k')
+    n = x_firstk.shape[0]
+    x_firstk = (x_firstk.reshape((n, n_begin_end_pts, 2)) - data.porto_center[None, None, :]) / data.data_std[None, None, :]
+    x_firstk = x_firstk.reshape((n, 2 * n_begin_end_pts))
+
     x_lastk = tensor.matrix('last_k')
+    n = x_lastk.shape[0]
+    x_lastk = (x_lastk.reshape((n, n_begin_end_pts, 2)) - data.porto_center[None, None, :]) / data.data_std[None, None, :]
+    x_lastk = x_lastk.reshape((n, 2 * n_begin_end_pts))
+
     x_client = tensor.lvector('origin_call')
     x_stand = tensor.lvector('origin_stand')
     y = tensor.matrix('destination')
@@ -75,9 +84,10 @@ def main():
     # hidden = theano.printing.Print("hidden")(hidden)
     outputs = output_layer.apply(hidden)
 
+    # Normalize & Center
+    outputs = data.data_std * outputs + data.porto_center
+
     # Calculate the cost
-    # cost = (outputs - y).norm(2, axis=1).mean()
-    # outputs = numpy.array([[ -8.621953, 41.162142]], dtype='float32') + 0 * outputs
     cost = (outputs - y).norm(2, axis=1).mean()
     cost.name = 'cost'
     hcost = hdist.hdist(outputs, y).mean()
@@ -88,7 +98,7 @@ def main():
     stand_embed_table.weights_init = IsotropicGaussian(0.001)
     hidden_layer.weights_init = IsotropicGaussian(0.01)
     hidden_layer.biases_init = Constant(0.001)
-    output_layer.weights_init = IsotropicGaussian(0.001)
+    output_layer.weights_init = IsotropicGaussian(0.01)
     output_layer.biases_init = Constant(0.001)
 
     client_embed_table.initialize()
@@ -119,8 +129,8 @@ def main():
     params = VariableFilter(bricks=[Linear])(cg.parameters)
     algorithm = GradientDescent(
         cost=cost,
-        step_rule=AdaDelta(decay_rate=0.5),
-        # step_rule=Scale(learning_rate=learning_rate),
+        # step_rule=AdaDelta(decay_rate=0.5),
+        step_rule=Momentum(learning_rate=learning_rate, momentum=momentum),
         params=params)
 
     extensions=[DataStreamMonitoring([cost, hcost], valid_stream,