diff options
Diffstat (limited to 'model.py')
-rw-r--r-- | model.py | 28 |
1 files changed, 19 insertions, 9 deletions
@@ -5,10 +5,13 @@ from argparse import ArgumentParser import numpy import theano +from theano import printing from theano import tensor from theano.ifelse import ifelse -from blocks.bricks import MLP, Rectifier, Linear +from blocks.filter import VariableFilter + +from blocks.bricks import MLP, Rectifier, Linear, Sigmoid, Identity from blocks.bricks.lookup import LookupTable from blocks.initialization import IsotropicGaussian, Constant @@ -18,7 +21,7 @@ from fuel.transformers import Batch from fuel.streams import DataStream from fuel.schemes import ConstantScheme -from blocks.algorithms import GradientDescent, Scale +from blocks.algorithms import GradientDescent, Scale, AdaDelta from blocks.graph import ComputationGraph from blocks.main_loop import MainLoop from blocks.extensions import Printing @@ -43,7 +46,7 @@ dim_embed = 50 dim_hidden = 200 learning_rate = 0.01 -batch_size = 64 +batch_size = 32 def main(): # The input and the targets @@ -65,25 +68,31 @@ def main(): client_embed = client_embed_table.apply(x_client).flatten(ndim=2) stand_embed = stand_embed_table.apply(x_stand).flatten(ndim=2) inputs = tensor.concatenate([x_firstk, x_lastk, - client_embed, stand_embed], + client_embed.zeros_like(), stand_embed.zeros_like()], axis=1) + # inputs = theano.printing.Print("inputs")(inputs) hidden = hidden_layer.apply(inputs) + # hidden = theano.printing.Print("hidden")(hidden) outputs = output_layer.apply(hidden) # Calculate the cost # cost = (outputs - y).norm(2, axis=1).mean() # outputs = numpy.array([[ -8.621953, 41.162142]], dtype='float32') + 0 * outputs - cost = hdist.hdist(outputs, y).mean() + cost = (outputs - y).norm(2, axis=1).mean() cost.name = 'cost' + hcost = hdist.hdist(outputs, y).mean() + hcost.name = 'hcost' # Initialization client_embed_table.weights_init = IsotropicGaussian(0.001) + stand_embed_table.weights_init = IsotropicGaussian(0.001) hidden_layer.weights_init = IsotropicGaussian(0.01) hidden_layer.biases_init = Constant(0.001) output_layer.weights_init = IsotropicGaussian(0.001) output_layer.biases_init = Constant(0.001) client_embed_table.initialize() + stand_embed_table.initialize() hidden_layer.initialize() output_layer.initialize() @@ -107,13 +116,14 @@ def main(): # Training cg = ComputationGraph(cost) + params = VariableFilter(bricks=[Linear])(cg.parameters) algorithm = GradientDescent( cost=cost, - # step_rule=AdaDelta(decay_rate=0.5), - step_rule=Scale(learning_rate=learning_rate), - params=cg.parameters) + step_rule=AdaDelta(decay_rate=0.5), + # step_rule=Scale(learning_rate=learning_rate), + params=params) - extensions=[DataStreamMonitoring([cost], valid_stream, + extensions=[DataStreamMonitoring([cost, hcost], valid_stream, prefix='valid', every_n_batches=1000), Printing(every_n_batches=1000), |