summaryrefslogtreecommitdiff
path: root/model/lstm.py
diff options
context:
space:
mode:
Diffstat (limited to 'model/lstm.py')
-rw-r--r--model/lstm.py43
1 files changed, 32 insertions, 11 deletions
diff --git a/model/lstm.py b/model/lstm.py
index 4d715d5..d928c88 100644
--- a/model/lstm.py
+++ b/model/lstm.py
@@ -19,30 +19,34 @@ class Model():
inp[:, :, None]).astype(theano.config.floatX)
in_onehot.name = 'in_onehot'
+ costs_xreg = []
+
# Construct hidden states
- dims = [config.io_dim] + config.hidden_dims
+ dims = [config.io_dim]
hidden = [in_onehot.dimshuffle(1, 0, 2)]
bricks = []
states = []
- for i in xrange(1, len(dims)):
- init_state = theano.shared(numpy.zeros((config.num_seqs, dims[i])).astype(theano.config.floatX),
+ for i in xrange(1, len(config.layers)+1):
+ p = config.layers[i-1]
+
+ init_state = theano.shared(numpy.zeros((config.num_seqs, p['dim'])).astype(theano.config.floatX),
name='st0_%d'%i)
- init_cell = theano.shared(numpy.zeros((config.num_seqs, dims[i])).astype(theano.config.floatX),
+ init_cell = theano.shared(numpy.zeros((config.num_seqs, p['dim'])).astype(theano.config.floatX),
name='cell0_%d'%i)
- linear = Linear(input_dim=dims[i-1], output_dim=4*dims[i],
+ linear = Linear(input_dim=dims[i-1], output_dim=4*p['dim'],
name="lstm_in_%d"%i)
bricks.append(linear)
inter = linear.apply(hidden[-1])
if config.i2h_all and i > 1:
- linear2 = Linear(input_dim=dims[0], output_dim=4*dims[i],
+ linear2 = Linear(input_dim=dims[0], output_dim=4*p['dim'],
name="lstm_in0_%d"%i)
bricks.append(linear2)
inter = inter + linear2.apply(hidden[0])
inter.name = 'inter_bis_%d'%i
- lstm = LSTM(dim=dims[i], activation=config.activation_function,
+ lstm = LSTM(dim=p['dim'], activation=config.activation_function,
name="lstm_rec_%d"%i)
bricks.append(lstm)
@@ -52,6 +56,17 @@ class Model():
states.append((init_state, new_hidden[-1, :, :]))
states.append((init_cell, new_cells[-1, :, :]))
+ if 'xreg' in p and p['xreg'] is not None:
+ n, s, w1, w2, w3 = p['xreg']
+ cost_x1 = w1 * ((new_hidden.mean(axis=2) - s)**2).mean()
+ cost_x2 = w2 * ((new_hidden.mean(axis=(0,1)) - s)**2).mean()
+ cost_x3 = -w3 * abs(new_hidden - s).mean()
+ cost_x1.name = 'cost_x1_%d'%i
+ cost_x2.name = 'cost_x2_%d'%i
+ cost_x3.name = 'cost_x3_%d'%i
+ costs_xreg += [cost_x1, cost_x2, cost_x3]
+
+ dims.append(p['dim'])
hidden.append(new_hidden)
for i, (u, v) in enumerate(states):
@@ -79,13 +94,17 @@ class Model():
print "**** inp", inp.dtype
print "**** out", out.dtype
print "**** pred", pred.dtype
- cost = Softmax().categorical_cross_entropy(inp[:, 1:].flatten(),
+ cost0 = Softmax().categorical_cross_entropy(inp[:, 1:].flatten(),
out[:, :-1, :].reshape((inp.shape[0]*(inp.shape[1]-1),
config.io_dim))).mean()
+ cost0.name = 'cost0'
error_rate = tensor.neq(inp[:, 1:].flatten(), pred[:, :-1].flatten()).astype(theano.config.floatX).mean()
- print "**** cost", cost.dtype
+ print "**** cost0", cost0.dtype
print "**** error_rate", error_rate.dtype
+ costs = [cost0] + costs_xreg
+ cost = sum(costs)
+
# Initialize all bricks
for brick in bricks:
brick.weights_init = IsotropicGaussian(0.1)
@@ -93,13 +112,14 @@ class Model():
brick.initialize()
# Apply noise and dropout
- cg = ComputationGraph([cost, error_rate])
+ cg = ComputationGraph([cost, error_rate] + costs)
if config.w_noise_std > 0:
noise_vars = VariableFilter(roles=[WEIGHT])(cg)
cg = apply_noise(cg, noise_vars, config.w_noise_std)
if config.i_dropout > 0:
cg = apply_dropout(cg, hidden[1:], config.i_dropout)
- [cost_reg, error_rate_reg] = cg.outputs
+ [cost_reg, error_rate_reg] = cg.outputs[:2]
+ costs_reg = cg.outputs[2:]
print "**** cost_reg", cost_reg.dtype
print "**** error_rate_reg", error_rate_reg.dtype
@@ -119,6 +139,7 @@ class Model():
error_rate.name = 'error_rate'
error_rate_reg.name = 'error_rate_reg'
self.monitor_vars = [[cost_reg],
+ costs_reg,
[error_rate_reg]]
self.out = out