summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2016-03-08 14:39:28 +0100
committerAlex Auvolat <alex@adnab.me>2016-03-08 14:39:28 +0100
commit3601cb8e9d99ccd3b7e8791415bb64206b6c530e (patch)
tree33ac40f1dff913891c168e77af193bd3bd6f74f2
parent2f479926c16d2911d0dd878c21de082abfc5b237 (diff)
downloadtext-rnn-3601cb8e9d99ccd3b7e8791415bb64206b6c530e.tar.gz
text-rnn-3601cb8e9d99ccd3b7e8791415bb64206b6c530e.zip
Fix!
-rw-r--r--config/hpc-lstm-1.py3
-rw-r--r--model/hpc_lstm.py31
2 files changed, 23 insertions, 11 deletions
diff --git a/config/hpc-lstm-1.py b/config/hpc-lstm-1.py
index e4009d5..ad8b121 100644
--- a/config/hpc-lstm-1.py
+++ b/config/hpc-lstm-1.py
@@ -14,7 +14,8 @@ seq_div_size = 100
hidden_dims = [128, 128, 256, 512]
cost_factors = [1., 1., 1., 1.]
-hidden_q = [0.02, 0.02, 0.05, 0.05]
+hidden_q = [0.1, 0.2, 0.2, 0.4]
+error_scale_factor = [2., 1.5, 1.5, 1.5]
activation_function = Tanh()
out_hidden = [512]
diff --git a/model/hpc_lstm.py b/model/hpc_lstm.py
index 8c9cd90..3e4e878 100644
--- a/model/hpc_lstm.py
+++ b/model/hpc_lstm.py
@@ -28,7 +28,10 @@ class Model():
hidden = []
costs = []
next_target = in_onehot.dimshuffle(1, 0, 2)
- for i, (hdim, cf, q) in enumerate(zip(config.hidden_dims, config.cost_factors, config.hidden_q)):
+ for i, (hdim, cf, q, esf) in enumerate(zip(config.hidden_dims,
+ config.cost_factors,
+ config.hidden_q,
+ config.error_scale_factor)):
init_state = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX),
name='st0_%d'%i)
init_cell = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX),
@@ -41,19 +44,25 @@ class Model():
linear2 = Linear(input_dim=hdim, output_dim=config.io_dim, name='lstm_out_%d'%i)
tanh = Tanh('lstm_out_tanh_%d'%i)
bricks += [linear, lstm, linear2, tanh]
+ if i > 0:
+ linear1 = Linear(input_dim=config.hidden_dims[i-1], output_dim=4*hdim,
+ name='lstm_in2_%d'%i)
+ bricks += [linear1]
inter = linear.apply(theano.gradient.disconnected_grad(next_target))
+ if i > 0:
+ inter += linear1.apply(theano.gradient.disconnected_grad(hidden[-1][:-1,:,:]))
new_hidden, new_cells = lstm.apply(inter,
states=init_state,
cells=init_cell)
states.append((init_state, new_hidden[-1, :, :]))
states.append((init_cell, new_cells[-1, :, :]))
- hidden += [tensor.concatenate([init_state[None,:,:], new_hidden[:-1,:,:]],axis=0)]
- pred = tanh.apply(linear2.apply(hidden[-1]))
+ hidden += [tensor.concatenate([init_state[None,:,:], new_hidden],axis=0)]
+ pred = tanh.apply(linear2.apply(hidden[-1][:-1,:,:]))
diff = next_target - pred
costs += [numpy.float32(cf) * ((abs(next_target)+q)*(diff**2)).sum(axis=2).mean()]
- next_target = diff
+ next_target = diff*esf
# Construct output from hidden states
@@ -65,7 +74,8 @@ class Model():
pred_linear = Linear(input_dim=dim, output_dim=out_dims[0],
name='pred_linear_%d'%i)
bricks.append(pred_linear)
- out_parts.append(pred_linear.apply(theano.gradient.disconnected_grad(state)))
+ lin = state if i == 0 else theano.gradient.disconnected_grad(state)
+ out_parts.append(pred_linear.apply(lin))
# Do prediction and calculate cost
out = sum(out_parts)
@@ -77,14 +87,15 @@ class Model():
+[Identity()],
name='out_mlp')
bricks.append(mlp)
- out = mlp.apply(out.reshape((inp.shape[0]*inp.shape[1],-1))).reshape((inp.shape[0],inp.shape[1],-1))
+ out = mlp.apply(out.reshape((inp.shape[0]*(inp.shape[1]+1),-1))
+ ).reshape((inp.shape[0],inp.shape[1]+1,-1))
pred = out.argmax(axis=2)
cost = Softmax().categorical_cross_entropy(inp.flatten(),
- out.reshape((inp.shape[0]*inp.shape[1],
+ out[:,:-1,:].reshape((inp.shape[0]*inp.shape[1],
config.io_dim))).mean()
- error_rate = tensor.neq(inp.flatten(), pred.flatten()).mean()
+ error_rate = tensor.neq(inp.flatten(), pred[:,:-1].flatten()).mean()
sgd_cost = cost + sum(costs)
@@ -106,8 +117,8 @@ class Model():
self.monitor_vars = [costs, [cost],
[error_rate]]
- self.out = out
- self.pred = pred
+ self.out = out[:,1:,:]
+ self.pred = pred[:,1:]
self.states = states