3 files changed, 9 insertions, 6 deletions
diff --git a/config/hpc-lstm-1.py b/config/hpc-lstm-1.py
index ad8b121..dac0ff1 100644
--- a/config/hpc-lstm-1.py
+++ b/config/hpc-lstm-1.py
@@ -13,8 +13,8 @@ seq_len = 2000
 seq_div_size = 100
 
 hidden_dims = [128, 128, 256, 512]
-cost_factors = [1., 1., 1., 1.]
-hidden_q = [0.1, 0.2, 0.2, 0.4]
+cost_factors = [10., 1., 1., 1.]
+hidden_q = [0.1, 0.15, 0.22, 0.33]
 error_scale_factor = [2., 1.5, 1.5, 1.5]
 activation_function = Tanh()
 
diff --git a/gentext.py b/gentext.py
index 2079602..1d5d4a6 100644
--- a/gentext.py
+++ b/gentext.py
@@ -59,3 +59,4 @@ class GenText(SimpleExtension):
 
 
 
+# vim: set sts=4 ts=4 sw=4 tw=0 et :
diff --git a/model/hpc_lstm.py b/model/hpc_lstm.py
index 3e4e878..5bad8af 100644
--- a/model/hpc_lstm.py
+++ b/model/hpc_lstm.py
@@ -49,6 +49,7 @@ class Model():
                                  name='lstm_in2_%d'%i)
                 bricks += [linear1]
 
+            next_target = tensor.cast(next_target, dtype=theano.config.floatX)
             inter = linear.apply(theano.gradient.disconnected_grad(next_target))
             if i > 0:
                 inter += linear1.apply(theano.gradient.disconnected_grad(hidden[-1][:-1,:,:]))
@@ -60,9 +61,10 @@ class Model():
 
             hidden += [tensor.concatenate([init_state[None,:,:], new_hidden],axis=0)]
             pred = tanh.apply(linear2.apply(hidden[-1][:-1,:,:]))
+            costs += [numpy.float32(cf) * (-next_target * pred).sum(axis=2).mean()]
+            costs += [numpy.float32(cf) * q * abs(pred).sum(axis=2).mean()]
             diff = next_target - pred
-            costs += [numpy.float32(cf) * ((abs(next_target)+q)*(diff**2)).sum(axis=2).mean()]
-            next_target = diff*esf
+            next_target = tensor.ge(diff, 0.5) - tensor.le(diff, -0.5)
 
 
         # Construct output from hidden states
@@ -74,7 +76,7 @@ class Model():
             pred_linear = Linear(input_dim=dim, output_dim=out_dims[0],
                                 name='pred_linear_%d'%i)
             bricks.append(pred_linear)
-            lin = state if i == 0 else theano.gradient.disconnected_grad(state)
+            lin = theano.gradient.disconnected_grad(state)
             out_parts.append(pred_linear.apply(lin))
 
         # Do prediction and calculate cost
@@ -98,7 +100,7 @@ class Model():
         error_rate = tensor.neq(inp.flatten(), pred[:,:-1].flatten()).mean()
 
         sgd_cost = cost + sum(costs)
-
+            
         # Initialize all bricks
         for brick in bricks:
             brick.weights_init = IsotropicGaussian(0.1)