aboutsummaryrefslogtreecommitdiff
path: root/model.py
diff options
context:
space:
mode:
authorAlex Auvolat <alex.auvolat@ens.fr>2015-04-27 18:27:58 -0400
committerAlex Auvolat <alex.auvolat@ens.fr>2015-04-27 18:27:58 -0400
commitcdd050295c3c6df780bdc65088959d908b2cf2a5 (patch)
treec271a6a559ba8b4a2c5e1e61fea7729f9313d265 /model.py
parent9a60f6c4e39c09187710608a9e225b6024b34364 (diff)
downloadtaxi-cdd050295c3c6df780bdc65088959d908b2cf2a5.tar.gz
taxi-cdd050295c3c6df780bdc65088959d908b2cf2a5.zip
A reasonnable neural network, only it doesn't work.
Diffstat (limited to 'model.py')
-rw-r--r--model.py28
1 files changed, 19 insertions, 9 deletions
diff --git a/model.py b/model.py
index 0d47710..c3ada3e 100644
--- a/model.py
+++ b/model.py
@@ -5,10 +5,13 @@ from argparse import ArgumentParser
import numpy
import theano
+from theano import printing
from theano import tensor
from theano.ifelse import ifelse
-from blocks.bricks import MLP, Rectifier, Linear
+from blocks.filter import VariableFilter
+
+from blocks.bricks import MLP, Rectifier, Linear, Sigmoid, Identity
from blocks.bricks.lookup import LookupTable
from blocks.initialization import IsotropicGaussian, Constant
@@ -18,7 +21,7 @@ from fuel.transformers import Batch
from fuel.streams import DataStream
from fuel.schemes import ConstantScheme
-from blocks.algorithms import GradientDescent, Scale
+from blocks.algorithms import GradientDescent, Scale, AdaDelta
from blocks.graph import ComputationGraph
from blocks.main_loop import MainLoop
from blocks.extensions import Printing
@@ -43,7 +46,7 @@ dim_embed = 50
dim_hidden = 200
learning_rate = 0.01
-batch_size = 64
+batch_size = 32
def main():
# The input and the targets
@@ -65,25 +68,31 @@ def main():
client_embed = client_embed_table.apply(x_client).flatten(ndim=2)
stand_embed = stand_embed_table.apply(x_stand).flatten(ndim=2)
inputs = tensor.concatenate([x_firstk, x_lastk,
- client_embed, stand_embed],
+ client_embed.zeros_like(), stand_embed.zeros_like()],
axis=1)
+ # inputs = theano.printing.Print("inputs")(inputs)
hidden = hidden_layer.apply(inputs)
+ # hidden = theano.printing.Print("hidden")(hidden)
outputs = output_layer.apply(hidden)
# Calculate the cost
# cost = (outputs - y).norm(2, axis=1).mean()
# outputs = numpy.array([[ -8.621953, 41.162142]], dtype='float32') + 0 * outputs
- cost = hdist.hdist(outputs, y).mean()
+ cost = (outputs - y).norm(2, axis=1).mean()
cost.name = 'cost'
+ hcost = hdist.hdist(outputs, y).mean()
+ hcost.name = 'hcost'
# Initialization
client_embed_table.weights_init = IsotropicGaussian(0.001)
+ stand_embed_table.weights_init = IsotropicGaussian(0.001)
hidden_layer.weights_init = IsotropicGaussian(0.01)
hidden_layer.biases_init = Constant(0.001)
output_layer.weights_init = IsotropicGaussian(0.001)
output_layer.biases_init = Constant(0.001)
client_embed_table.initialize()
+ stand_embed_table.initialize()
hidden_layer.initialize()
output_layer.initialize()
@@ -107,13 +116,14 @@ def main():
# Training
cg = ComputationGraph(cost)
+ params = VariableFilter(bricks=[Linear])(cg.parameters)
algorithm = GradientDescent(
cost=cost,
- # step_rule=AdaDelta(decay_rate=0.5),
- step_rule=Scale(learning_rate=learning_rate),
- params=cg.parameters)
+ step_rule=AdaDelta(decay_rate=0.5),
+ # step_rule=Scale(learning_rate=learning_rate),
+ params=params)
- extensions=[DataStreamMonitoring([cost], valid_stream,
+ extensions=[DataStreamMonitoring([cost, hcost], valid_stream,
prefix='valid',
every_n_batches=1000),
Printing(every_n_batches=1000),