diff options
-rw-r--r-- | config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py | 7 | ||||
-rwxr-xr-x | train.py | 7 |
2 files changed, 11 insertions, 3 deletions
diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py index e1bd840..4476879 100644 --- a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py +++ b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py @@ -2,6 +2,9 @@ import os import cPickle from blocks.initialization import IsotropicGaussian, Constant +from blocks.bricks import Rectifier, Tanh, Logistic +from blocks.filter import VariableFilter +from blocks import roles import data from model.joint_simple_mlp_tgtcls import Model, Stream @@ -46,8 +49,8 @@ embed_weights_init = IsotropicGaussian(0.01) mlp_weights_init = IsotropicGaussian(0.1) mlp_biases_init = Constant(0.01) -apply_dropout = True -dropout_p = 0.5 +dropout = 0.5 +dropout_inputs = VariableFilter(bricks=[Rectifier], name='output') # use adadelta, so no learning_rate or momentum batch_size = 200 @@ -132,12 +132,17 @@ if __name__ == "__main__": parameters_size += reduce(operator.mul, value.get_value().shape, 1) logger.info('Total number of parameters: %d in %d matrices' % (parameters_size, len(cg.get_params()))) + if hasattr(config, 'step_rule'): + step_rule = config.step_rule + else: + step_rule = AdaDelta() + params = cg.parameters algorithm = GradientDescent( cost=cost, step_rule=CompositeRule([ ElementwiseRemoveNotFinite(), - config.step_rule, + step_rule ]), params=params) |