summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config/hpc-lstm-1.py10
-rw-r--r--model/hpc_lstm.py18
2 files changed, 17 insertions, 11 deletions
diff --git a/config/hpc-lstm-1.py b/config/hpc-lstm-1.py
index dac0ff1..abd4e38 100644
--- a/config/hpc-lstm-1.py
+++ b/config/hpc-lstm-1.py
@@ -1,10 +1,17 @@
+import numpy
+from numpy.random import RandomState
+
from blocks.algorithms import AdaDelta, Momentum
from blocks.bricks import Tanh, Rectifier
from model.hpc_lstm import Model
dataset = 'data/logcompil-2016-03-07.txt'
+
io_dim = 256
+repr_dim = 512
+embedding_matrix = (RandomState(42).binomial(1, 20./512., ((io_dim, repr_dim)))
+ -RandomState(123).binomial(1, 20./512., ((io_dim, repr_dim))))
# An epoch will be composed of 'num_seqs' sequences of len 'seq_len'
# divided in chunks of lengh 'seq_div_size'
@@ -13,9 +20,8 @@ seq_len = 2000
seq_div_size = 100
hidden_dims = [128, 128, 256, 512]
-cost_factors = [10., 1., 1., 1.]
+cost_factors = [1., 1., 1., 1.]
hidden_q = [0.1, 0.15, 0.22, 0.33]
-error_scale_factor = [2., 1.5, 1.5, 1.5]
activation_function = Tanh()
out_hidden = [512]
diff --git a/model/hpc_lstm.py b/model/hpc_lstm.py
index 5bad8af..395646c 100644
--- a/model/hpc_lstm.py
+++ b/model/hpc_lstm.py
@@ -17,9 +17,10 @@ class Model():
def __init__(self, config):
inp = tensor.imatrix('bytes')
- in_onehot = tensor.eq(tensor.arange(config.io_dim, dtype='int16').reshape((1, 1, config.io_dim)),
- inp[:, :, None])
- in_onehot.name = 'in_onehot'
+ embed = theano.shared(config.embedding_matrix.astype(theano.config.floatX),
+ name='embedding_matrix')
+ in_repr = embed[inp.flatten(), :].reshape((inp.shape[0], inp.shape[1], config.repr_dim))
+ in_repr.name = 'in_repr'
bricks = []
states = []
@@ -27,21 +28,20 @@ class Model():
# Construct predictive LSTM hierarchy
hidden = []
costs = []
- next_target = in_onehot.dimshuffle(1, 0, 2)
- for i, (hdim, cf, q, esf) in enumerate(zip(config.hidden_dims,
+ next_target = in_repr.dimshuffle(1, 0, 2)
+ for i, (hdim, cf, q) in enumerate(zip(config.hidden_dims,
config.cost_factors,
- config.hidden_q,
- config.error_scale_factor)):
+ config.hidden_q)):
init_state = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX),
name='st0_%d'%i)
init_cell = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX),
name='cell0_%d'%i)
- linear = Linear(input_dim=config.io_dim, output_dim=4*hdim,
+ linear = Linear(input_dim=config.repr_dim, output_dim=4*hdim,
name="lstm_in_%d"%i)
lstm = LSTM(dim=hdim, activation=config.activation_function,
name="lstm_rec_%d"%i)
- linear2 = Linear(input_dim=hdim, output_dim=config.io_dim, name='lstm_out_%d'%i)
+ linear2 = Linear(input_dim=hdim, output_dim=config.repr_dim, name='lstm_out_%d'%i)
tanh = Tanh('lstm_out_tanh_%d'%i)
bricks += [linear, lstm, linear2, tanh]
if i > 0: