diff options
author | Alex Auvolat <alex@adnab.me> | 2016-03-08 14:39:28 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2016-03-08 14:39:28 +0100 |
commit | 3601cb8e9d99ccd3b7e8791415bb64206b6c530e (patch) | |
tree | 33ac40f1dff913891c168e77af193bd3bd6f74f2 | |
parent | 2f479926c16d2911d0dd878c21de082abfc5b237 (diff) | |
download | text-rnn-3601cb8e9d99ccd3b7e8791415bb64206b6c530e.tar.gz text-rnn-3601cb8e9d99ccd3b7e8791415bb64206b6c530e.zip |
Fix!
-rw-r--r-- | config/hpc-lstm-1.py | 3 | ||||
-rw-r--r-- | model/hpc_lstm.py | 31 |
2 files changed, 23 insertions, 11 deletions
diff --git a/config/hpc-lstm-1.py b/config/hpc-lstm-1.py index e4009d5..ad8b121 100644 --- a/config/hpc-lstm-1.py +++ b/config/hpc-lstm-1.py @@ -14,7 +14,8 @@ seq_div_size = 100 hidden_dims = [128, 128, 256, 512] cost_factors = [1., 1., 1., 1.] -hidden_q = [0.02, 0.02, 0.05, 0.05] +hidden_q = [0.1, 0.2, 0.2, 0.4] +error_scale_factor = [2., 1.5, 1.5, 1.5] activation_function = Tanh() out_hidden = [512] diff --git a/model/hpc_lstm.py b/model/hpc_lstm.py index 8c9cd90..3e4e878 100644 --- a/model/hpc_lstm.py +++ b/model/hpc_lstm.py @@ -28,7 +28,10 @@ class Model(): hidden = [] costs = [] next_target = in_onehot.dimshuffle(1, 0, 2) - for i, (hdim, cf, q) in enumerate(zip(config.hidden_dims, config.cost_factors, config.hidden_q)): + for i, (hdim, cf, q, esf) in enumerate(zip(config.hidden_dims, + config.cost_factors, + config.hidden_q, + config.error_scale_factor)): init_state = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX), name='st0_%d'%i) init_cell = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX), @@ -41,19 +44,25 @@ class Model(): linear2 = Linear(input_dim=hdim, output_dim=config.io_dim, name='lstm_out_%d'%i) tanh = Tanh('lstm_out_tanh_%d'%i) bricks += [linear, lstm, linear2, tanh] + if i > 0: + linear1 = Linear(input_dim=config.hidden_dims[i-1], output_dim=4*hdim, + name='lstm_in2_%d'%i) + bricks += [linear1] inter = linear.apply(theano.gradient.disconnected_grad(next_target)) + if i > 0: + inter += linear1.apply(theano.gradient.disconnected_grad(hidden[-1][:-1,:,:])) new_hidden, new_cells = lstm.apply(inter, states=init_state, cells=init_cell) states.append((init_state, new_hidden[-1, :, :])) states.append((init_cell, new_cells[-1, :, :])) - hidden += [tensor.concatenate([init_state[None,:,:], new_hidden[:-1,:,:]],axis=0)] - pred = tanh.apply(linear2.apply(hidden[-1])) + hidden += [tensor.concatenate([init_state[None,:,:], new_hidden],axis=0)] + pred = tanh.apply(linear2.apply(hidden[-1][:-1,:,:])) diff = next_target - pred costs += [numpy.float32(cf) * ((abs(next_target)+q)*(diff**2)).sum(axis=2).mean()] - next_target = diff + next_target = diff*esf # Construct output from hidden states @@ -65,7 +74,8 @@ class Model(): pred_linear = Linear(input_dim=dim, output_dim=out_dims[0], name='pred_linear_%d'%i) bricks.append(pred_linear) - out_parts.append(pred_linear.apply(theano.gradient.disconnected_grad(state))) + lin = state if i == 0 else theano.gradient.disconnected_grad(state) + out_parts.append(pred_linear.apply(lin)) # Do prediction and calculate cost out = sum(out_parts) @@ -77,14 +87,15 @@ class Model(): +[Identity()], name='out_mlp') bricks.append(mlp) - out = mlp.apply(out.reshape((inp.shape[0]*inp.shape[1],-1))).reshape((inp.shape[0],inp.shape[1],-1)) + out = mlp.apply(out.reshape((inp.shape[0]*(inp.shape[1]+1),-1)) + ).reshape((inp.shape[0],inp.shape[1]+1,-1)) pred = out.argmax(axis=2) cost = Softmax().categorical_cross_entropy(inp.flatten(), - out.reshape((inp.shape[0]*inp.shape[1], + out[:,:-1,:].reshape((inp.shape[0]*inp.shape[1], config.io_dim))).mean() - error_rate = tensor.neq(inp.flatten(), pred.flatten()).mean() + error_rate = tensor.neq(inp.flatten(), pred[:,:-1].flatten()).mean() sgd_cost = cost + sum(costs) @@ -106,8 +117,8 @@ class Model(): self.monitor_vars = [costs, [cost], [error_rate]] - self.out = out - self.pred = pred + self.out = out[:,1:,:] + self.pred = pred[:,1:] self.states = states |