diff options
-rw-r--r-- | config/hpc-lstm-1.py | 10 | ||||
-rw-r--r-- | model/hpc_lstm.py | 18 |
2 files changed, 17 insertions, 11 deletions
diff --git a/config/hpc-lstm-1.py b/config/hpc-lstm-1.py index dac0ff1..abd4e38 100644 --- a/config/hpc-lstm-1.py +++ b/config/hpc-lstm-1.py @@ -1,10 +1,17 @@ +import numpy +from numpy.random import RandomState + from blocks.algorithms import AdaDelta, Momentum from blocks.bricks import Tanh, Rectifier from model.hpc_lstm import Model dataset = 'data/logcompil-2016-03-07.txt' + io_dim = 256 +repr_dim = 512 +embedding_matrix = (RandomState(42).binomial(1, 20./512., ((io_dim, repr_dim))) + -RandomState(123).binomial(1, 20./512., ((io_dim, repr_dim)))) # An epoch will be composed of 'num_seqs' sequences of len 'seq_len' # divided in chunks of lengh 'seq_div_size' @@ -13,9 +20,8 @@ seq_len = 2000 seq_div_size = 100 hidden_dims = [128, 128, 256, 512] -cost_factors = [10., 1., 1., 1.] +cost_factors = [1., 1., 1., 1.] hidden_q = [0.1, 0.15, 0.22, 0.33] -error_scale_factor = [2., 1.5, 1.5, 1.5] activation_function = Tanh() out_hidden = [512] diff --git a/model/hpc_lstm.py b/model/hpc_lstm.py index 5bad8af..395646c 100644 --- a/model/hpc_lstm.py +++ b/model/hpc_lstm.py @@ -17,9 +17,10 @@ class Model(): def __init__(self, config): inp = tensor.imatrix('bytes') - in_onehot = tensor.eq(tensor.arange(config.io_dim, dtype='int16').reshape((1, 1, config.io_dim)), - inp[:, :, None]) - in_onehot.name = 'in_onehot' + embed = theano.shared(config.embedding_matrix.astype(theano.config.floatX), + name='embedding_matrix') + in_repr = embed[inp.flatten(), :].reshape((inp.shape[0], inp.shape[1], config.repr_dim)) + in_repr.name = 'in_repr' bricks = [] states = [] @@ -27,21 +28,20 @@ class Model(): # Construct predictive LSTM hierarchy hidden = [] costs = [] - next_target = in_onehot.dimshuffle(1, 0, 2) - for i, (hdim, cf, q, esf) in enumerate(zip(config.hidden_dims, + next_target = in_repr.dimshuffle(1, 0, 2) + for i, (hdim, cf, q) in enumerate(zip(config.hidden_dims, config.cost_factors, - config.hidden_q, - config.error_scale_factor)): + config.hidden_q)): init_state = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX), name='st0_%d'%i) init_cell = theano.shared(numpy.zeros((config.num_seqs, hdim)).astype(theano.config.floatX), name='cell0_%d'%i) - linear = Linear(input_dim=config.io_dim, output_dim=4*hdim, + linear = Linear(input_dim=config.repr_dim, output_dim=4*hdim, name="lstm_in_%d"%i) lstm = LSTM(dim=hdim, activation=config.activation_function, name="lstm_rec_%d"%i) - linear2 = Linear(input_dim=hdim, output_dim=config.io_dim, name='lstm_out_%d'%i) + linear2 = Linear(input_dim=hdim, output_dim=config.repr_dim, name='lstm_out_%d'%i) tanh = Tanh('lstm_out_tanh_%d'%i) bricks += [linear, lstm, linear2, tanh] if i > 0: |