diff options
author | Alex Auvolat <alex@adnab.me> | 2016-03-08 13:26:28 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2016-03-08 13:26:28 +0100 |
commit | 2f479926c16d2911d0dd878c21de082abfc5b237 (patch) | |
tree | b399e9ad9af04a9449334dff1a47449808b7ca13 /cchlstm.py | |
parent | 23093608e0edc43477c3a2ed804ae1016790f7e4 (diff) | |
download | text-rnn-2f479926c16d2911d0dd878c21de082abfc5b237.tar.gz text-rnn-2f479926c16d2911d0dd878c21de082abfc5b237.zip |
Revive project
Diffstat (limited to 'cchlstm.py')
-rw-r--r-- | cchlstm.py | 248 |
1 files changed, 0 insertions, 248 deletions
diff --git a/cchlstm.py b/cchlstm.py deleted file mode 100644 index 78c9a1f..0000000 --- a/cchlstm.py +++ /dev/null @@ -1,248 +0,0 @@ -import theano -from theano import tensor -import numpy - -from theano.tensor.shared_randomstreams import RandomStreams - -from blocks.algorithms import Momentum, AdaDelta, RMSProp -from blocks.bricks import Tanh, Softmax, Linear, MLP, Initializable -from blocks.bricks.lookup import LookupTable -from blocks.bricks.recurrent import LSTM, BaseRecurrent, recurrent -from blocks.initialization import IsotropicGaussian, Constant - -from blocks.filter import VariableFilter -from blocks.roles import WEIGHT -from blocks.graph import ComputationGraph, apply_noise, apply_dropout - -rng = RandomStreams() - -# An epoch will be composed of 'num_seqs' sequences of len 'seq_len' -# divided in chunks of lengh 'seq_div_size' -num_seqs = 50 -seq_len = 2000 -seq_div_size = 100 - -io_dim = 256 - -# Model structure -hidden_dims = [512, 512, 512, 512, 512] -activation_function = Tanh() - -cond_cert = [0.5, 0.5, 0.5, 0.5] -block_prob = [0.1, 0.1, 0.1, 0.1] - -# Regularization -w_noise_std = 0.02 - -# Step rule -step_rule = 'adadelta' -learning_rate = 0.1 -momentum = 0.9 - - -param_desc = '%s(x%sp%s)-n%s-%dx%d(%d)-%s' % ( - repr(hidden_dims), repr(cond_cert), repr(block_prob), - repr(w_noise_std), - num_seqs, seq_len, seq_div_size, - step_rule - ) - -save_freq = 5 -on_irc = False - -# parameters for sample generation -sample_len = 200 -sample_temperature = 0.7 #0.5 -sample_freq = 1 - -if step_rule == 'rmsprop': - step_rule = RMSProp() -elif step_rule == 'adadelta': - step_rule = AdaDelta() -elif step_rule == 'momentum': - step_rule = Momentum(learning_rate=learning_rate, momentum=momentum) -else: - assert(False) - -class CCHLSTM(BaseRecurrent, Initializable): - def __init__(self, io_dim, hidden_dims, cond_cert, activation=None, **kwargs): - super(CCHLSTM, self).__init__(**kwargs) - - self.cond_cert = cond_cert - - self.io_dim = io_dim - self.hidden_dims = hidden_dims - - self.children = [] - self.layers = [] - - self.softmax = Softmax() - self.children.append(self.softmax) - - for i, d in enumerate(hidden_dims): - i0 = LookupTable(length=io_dim, - dim=4*d, - name='i0-%d'%i) - self.children.append(i0) - - if i > 0: - i1 = Linear(input_dim=hidden_dims[i-1], - output_dim=4*d, - name='i1-%d'%i) - self.children.append(i1) - else: - i1 = None - - lstm = LSTM(dim=d, activation=activation, - name='LSTM-%d'%i) - self.children.append(lstm) - - o = Linear(input_dim=d, - output_dim=io_dim, - name='o-%d'%i) - self.children.append(o) - - self.layers.append((i0, i1, lstm, o)) - - - @recurrent(contexts=[]) - def apply(self, inputs, **kwargs): - - l0i, _, l0l, l0o = self.layers[0] - l0iv = l0i.apply(inputs) - new_states0, new_cells0 = l0l.apply(states=kwargs['states0'], - cells=kwargs['cells0'], - inputs=l0iv, - iterate=False) - l0ov = l0o.apply(new_states0) - - pos = l0ov - ps = new_states0 - - passnext = tensor.ones((inputs.shape[0],)) - out_sc = [new_states0, new_cells0, passnext] - - for i, (cch, (i0, i1, l, o)) in enumerate(zip(self.cond_cert, self.layers[1:])): - pop = self.softmax.apply(pos) - best = pop.max(axis=1) - passnext = passnext * tensor.le(best, cch) * kwargs['pass%d'%i] - - i0v = i0.apply(inputs) - i1v = i1.apply(ps) - - prev_states = kwargs['states%d'%i] - prev_cells = kwargs['cells%d'%i] - new_states, new_cells = l.apply(inputs=i0v + i1v, - states=prev_states, - cells=prev_cells, - iterate=False) - new_states = tensor.switch(passnext[:, None], new_states, prev_states) - new_cells = tensor.switch(passnext[:, None], new_cells, prev_cells) - out_sc += [new_states, new_cells, passnext] - - ov = o.apply(new_states) - pos = tensor.switch(passnext[:, None], pos + ov, pos) - ps = new_states - - return [pos] + out_sc - - def get_dim(self, name): - dims = {'pred': self.io_dim} - for i, d in enumerate(self.hidden_dims): - dims['states%d'%i] = dims['cells%d'%i] = d - if name in dims: - return dims[name] - return super(CCHLSTM, self).get_dim(name) - - @apply.property('sequences') - def apply_sequences(self): - return ['inputs'] + ['pass%d'%i for i in range(len(self.hidden_dims)-1)] - - @apply.property('states') - def apply_states(self): - ret = [] - for i in range(len(self.hidden_dims)): - ret += ['states%d'%i, 'cells%d'%i] - return ret - - @apply.property('outputs') - def apply_outputs(self): - ret = ['pred'] - for i in range(len(self.hidden_dims)): - ret += ['states%d'%i, 'cells%d'%i, 'active%d'%i] - return ret - - -class Model(): - def __init__(self): - inp = tensor.lmatrix('bytes') - - # Make state vars - state_vars = {} - for i, d in enumerate(hidden_dims): - state_vars['states%d'%i] = theano.shared(numpy.zeros((num_seqs, d)) - .astype(theano.config.floatX), - name='states%d'%i) - state_vars['cells%d'%i] = theano.shared(numpy.zeros((num_seqs, d)) - .astype(theano.config.floatX), - name='cells%d'%i) - # Construct brick - cchlstm = CCHLSTM(io_dim=io_dim, - hidden_dims=hidden_dims, - cond_cert=cond_cert, - activation=activation_function) - - # Random pass - passdict = {} - for i, p in enumerate(block_prob): - passdict['pass%d'%i] = rng.binomial(size=(inp.shape[1], inp.shape[0]), p=1-p) - - # Apply it - outs = cchlstm.apply(inputs=inp.dimshuffle(1, 0), - **dict(state_vars.items() + passdict.items())) - states = [] - active_prop = [] - for i in range(len(hidden_dims)): - states.append((state_vars['states%d'%i], outs[3*i+1][-1, :, :])) - states.append((state_vars['cells%d'%i], outs[3*i+2][-1, :, :])) - active_prop.append(outs[3*i+3].mean()) - active_prop[-1].name = 'active_prop_%d'%i - - out = outs[0].dimshuffle(1, 0, 2) - - # Do prediction and calculate cost - pred = out.argmax(axis=2) - - cost = Softmax().categorical_cross_entropy(inp[:, 1:].flatten(), - out[:, :-1, :].reshape((inp.shape[0]*(inp.shape[1]-1), - io_dim))) - error_rate = tensor.neq(inp[:, 1:].flatten(), pred[:, :-1].flatten()).mean() - - # Initialize all bricks - for brick in [cchlstm]: - brick.weights_init = IsotropicGaussian(0.1) - brick.biases_init = Constant(0.) - brick.initialize() - - # Apply noise and dropoutvars - cg = ComputationGraph([cost, error_rate]) - if w_noise_std > 0: - noise_vars = VariableFilter(roles=[WEIGHT])(cg) - cg = apply_noise(cg, noise_vars, w_noise_std) - [cost_reg, error_rate_reg] = cg.outputs - - self.sgd_cost = cost_reg - self.monitor_vars = [[cost, cost_reg], - [error_rate, error_rate_reg], - active_prop] - - cost.name = 'cost' - cost_reg.name = 'cost_reg' - error_rate.name = 'error_rate' - error_rate_reg.name = 'error_rate_reg' - - self.out = out - self.pred = pred - - self.states = states - |