diff options
-rw-r--r-- | config/hpc-lstm-1.py | 4 | ||||
-rw-r--r-- | gentext.py | 1 | ||||
-rw-r--r-- | model/hpc_lstm.py | 10 |
3 files changed, 9 insertions, 6 deletions
diff --git a/config/hpc-lstm-1.py b/config/hpc-lstm-1.py index ad8b121..dac0ff1 100644 --- a/config/hpc-lstm-1.py +++ b/config/hpc-lstm-1.py @@ -13,8 +13,8 @@ seq_len = 2000 seq_div_size = 100 hidden_dims = [128, 128, 256, 512] -cost_factors = [1., 1., 1., 1.] -hidden_q = [0.1, 0.2, 0.2, 0.4] +cost_factors = [10., 1., 1., 1.] +hidden_q = [0.1, 0.15, 0.22, 0.33] error_scale_factor = [2., 1.5, 1.5, 1.5] activation_function = Tanh() @@ -59,3 +59,4 @@ class GenText(SimpleExtension): +# vim: set sts=4 ts=4 sw=4 tw=0 et : diff --git a/model/hpc_lstm.py b/model/hpc_lstm.py index 3e4e878..5bad8af 100644 --- a/model/hpc_lstm.py +++ b/model/hpc_lstm.py @@ -49,6 +49,7 @@ class Model(): name='lstm_in2_%d'%i) bricks += [linear1] + next_target = tensor.cast(next_target, dtype=theano.config.floatX) inter = linear.apply(theano.gradient.disconnected_grad(next_target)) if i > 0: inter += linear1.apply(theano.gradient.disconnected_grad(hidden[-1][:-1,:,:])) @@ -60,9 +61,10 @@ class Model(): hidden += [tensor.concatenate([init_state[None,:,:], new_hidden],axis=0)] pred = tanh.apply(linear2.apply(hidden[-1][:-1,:,:])) + costs += [numpy.float32(cf) * (-next_target * pred).sum(axis=2).mean()] + costs += [numpy.float32(cf) * q * abs(pred).sum(axis=2).mean()] diff = next_target - pred - costs += [numpy.float32(cf) * ((abs(next_target)+q)*(diff**2)).sum(axis=2).mean()] - next_target = diff*esf + next_target = tensor.ge(diff, 0.5) - tensor.le(diff, -0.5) # Construct output from hidden states @@ -74,7 +76,7 @@ class Model(): pred_linear = Linear(input_dim=dim, output_dim=out_dims[0], name='pred_linear_%d'%i) bricks.append(pred_linear) - lin = state if i == 0 else theano.gradient.disconnected_grad(state) + lin = theano.gradient.disconnected_grad(state) out_parts.append(pred_linear.apply(lin)) # Do prediction and calculate cost @@ -98,7 +100,7 @@ class Model(): error_rate = tensor.neq(inp.flatten(), pred[:,:-1].flatten()).mean() sgd_cost = cost + sum(costs) - + # Initialize all bricks for brick in bricks: brick.weights_init = IsotropicGaussian(0.1) |