diff options
author | Alex Auvolat <alex.auvolat@ens.fr> | 2015-07-20 13:57:49 -0400 |
---|---|---|
committer | Alex Auvolat <alex.auvolat@ens.fr> | 2015-07-20 13:57:49 -0400 |
commit | 99c37ecef356c20673b4ecd5030749e3a6abcf7a (patch) | |
tree | 3289721d7882d811f2584ee9701255cb2cd694c1 /dgsrnn.py | |
parent | 7bf692d9ae344ccef044923f131f5ce8de85b0b4 (diff) | |
download | text-rnn-99c37ecef356c20673b4ecd5030749e3a6abcf7a.tar.gz text-rnn-99c37ecef356c20673b4ecd5030749e3a6abcf7a.zip |
New model CCHLSTM ; other models broken.
Please enter the commit message for your changes. Lines starting
Diffstat (limited to 'dgsrnn.py')
-rw-r--r-- | dgsrnn.py | 33 |
1 files changed, 22 insertions, 11 deletions
@@ -2,6 +2,8 @@ import theano from theano import tensor import numpy +from theano.tensor.shared_randomstreams import RandomStreams + from blocks.algorithms import Momentum, AdaDelta, RMSProp, Adam from blocks.bricks import Activation, Tanh, Logistic, Softmax, Rectifier, Linear, MLP, Initializable, Identity from blocks.bricks.base import application, lazy @@ -13,6 +15,8 @@ from blocks.filter import VariableFilter from blocks.roles import WEIGHT, INITIAL_STATE, add_role from blocks.graph import ComputationGraph, apply_noise, apply_dropout +rng = RandomStreams() + class TRectifier(Activation): @application(inputs=['input_'], outputs=['output']) def apply(self, input_): @@ -21,8 +25,8 @@ class TRectifier(Activation): # An epoch will be composed of 'num_seqs' sequences of len 'seq_len' # divided in chunks of lengh 'seq_div_size' num_seqs = 10 -seq_len = 2000 -seq_div_size = 100 +seq_len = 1000 +seq_div_size = 5 io_dim = 256 @@ -36,20 +40,21 @@ output_hidden_activations = [] weight_noise_std = 0.05 -output_h_dropout = 0.5 +output_h_dropout = 0.0 +drop_update = 0.0 -l1_state = 0.01 -l1_reset = 0.01 +l1_state = 0.00 +l1_reset = 0.1 step_rule = 'momentum' -learning_rate = 0.01 +learning_rate = 0.001 momentum = 0.99 -param_desc = '%s,t%s,o%s-n%s-d%s-L1:%s,%s-%s' % ( +param_desc = '%s,t%s,o%s-n%s-d%s,%s-L1:%s,%s-%s' % ( repr(state_dim), repr(transition_hidden), repr(output_hidden), repr(weight_noise_std), - repr(output_h_dropout), + repr(output_h_dropout), repr(drop_update), repr(l1_state), repr(l1_reset), step_rule ) @@ -105,13 +110,15 @@ class DGSRNN(BaseRecurrent, Initializable): return self.state_dim return super(GFGRU, self).get_dim(name) - @recurrent(sequences=['inputs'], states=['state'], + @recurrent(sequences=['inputs', 'drop_updates_mask'], states=['state'], outputs=['state', 'reset'], contexts=[]) - def apply(self, inputs=None, state=None): + def apply(self, inputs=None, drop_updates_mask=None, state=None): inter_v = self.inter.apply(tensor.concatenate([inputs, state], axis=1)) reset_v = self.reset.apply(inter_v) update_v = self.update.apply(inter_v) + reset_v = reset_v * drop_updates_mask + new_state = state * (1 - reset_v) + reset_v * update_v return new_state, reset_v @@ -141,7 +148,11 @@ class Model(): prev_state = theano.shared(numpy.zeros((num_seqs, state_dim)).astype(theano.config.floatX), name='state') - states, resets = dgsrnn.apply(in_onehot.dimshuffle(1, 0, 2), state=prev_state) + states, resets = dgsrnn.apply(inputs=in_onehot.dimshuffle(1, 0, 2), + drop_updates_mask=rng.binomial(size=(inp.shape[1], inp.shape[0], state_dim), + p=1-drop_update, + dtype=theano.config.floatX), + state=prev_state) states = states.dimshuffle(1, 0, 2) resets = resets.dimshuffle(1, 0, 2) |