From b637e0bc7b123fe41ea2247ebb7aa311c88b81e0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 2 Jul 2015 11:30:41 -0400 Subject: Step rule & dropout params cleanup --- config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py | 7 +++++-- train.py | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py index e1bd840..4476879 100644 --- a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py +++ b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py @@ -2,6 +2,9 @@ import os import cPickle from blocks.initialization import IsotropicGaussian, Constant +from blocks.bricks import Rectifier, Tanh, Logistic +from blocks.filter import VariableFilter +from blocks import roles import data from model.joint_simple_mlp_tgtcls import Model, Stream @@ -46,8 +49,8 @@ embed_weights_init = IsotropicGaussian(0.01) mlp_weights_init = IsotropicGaussian(0.1) mlp_biases_init = Constant(0.01) -apply_dropout = True -dropout_p = 0.5 +dropout = 0.5 +dropout_inputs = VariableFilter(bricks=[Rectifier], name='output') # use adadelta, so no learning_rate or momentum batch_size = 200 diff --git a/train.py b/train.py index 0d40f84..1b01833 100755 --- a/train.py +++ b/train.py @@ -132,12 +132,17 @@ if __name__ == "__main__": parameters_size += reduce(operator.mul, value.get_value().shape, 1) logger.info('Total number of parameters: %d in %d matrices' % (parameters_size, len(cg.get_params()))) + if hasattr(config, 'step_rule'): + step_rule = config.step_rule + else: + step_rule = AdaDelta() + params = cg.parameters algorithm = GradientDescent( cost=cost, step_rule=CompositeRule([ ElementwiseRemoveNotFinite(), - config.step_rule, + step_rule ]), params=params) -- cgit v1.2.3