aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py7
-rwxr-xr-xtrain.py7
2 files changed, 11 insertions, 3 deletions
diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
index e1bd840..4476879 100644
--- a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
+++ b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
@@ -2,6 +2,9 @@ import os
import cPickle
from blocks.initialization import IsotropicGaussian, Constant
+from blocks.bricks import Rectifier, Tanh, Logistic
+from blocks.filter import VariableFilter
+from blocks import roles
import data
from model.joint_simple_mlp_tgtcls import Model, Stream
@@ -46,8 +49,8 @@ embed_weights_init = IsotropicGaussian(0.01)
mlp_weights_init = IsotropicGaussian(0.1)
mlp_biases_init = Constant(0.01)
-apply_dropout = True
-dropout_p = 0.5
+dropout = 0.5
+dropout_inputs = VariableFilter(bricks=[Rectifier], name='output')
# use adadelta, so no learning_rate or momentum
batch_size = 200
diff --git a/train.py b/train.py
index 0d40f84..1b01833 100755
--- a/train.py
+++ b/train.py
@@ -132,12 +132,17 @@ if __name__ == "__main__":
parameters_size += reduce(operator.mul, value.get_value().shape, 1)
logger.info('Total number of parameters: %d in %d matrices' % (parameters_size, len(cg.get_params())))
+ if hasattr(config, 'step_rule'):
+ step_rule = config.step_rule
+ else:
+ step_rule = AdaDelta()
+
params = cg.parameters
algorithm = GradientDescent(
cost=cost,
step_rule=CompositeRule([
ElementwiseRemoveNotFinite(),
- config.step_rule,
+ step_rule
]),
params=params)