summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--REMARKS1
-rw-r--r--config/lstm-xreg-relu.py48
-rw-r--r--config/lstm-xreg.py4
-rw-r--r--model/lstm.py6
4 files changed, 55 insertions, 4 deletions
diff --git a/REMARKS b/REMARKS
new file mode 100644
index 0000000..c44ca5f
--- /dev/null
+++ b/REMARKS
@@ -0,0 +1 @@
+- lstm-xreg-relu : does not converge at all, cost is stuck around 3.6 and error rate is 86%
diff --git a/config/lstm-xreg-relu.py b/config/lstm-xreg-relu.py
new file mode 100644
index 0000000..7697558
--- /dev/null
+++ b/config/lstm-xreg-relu.py
@@ -0,0 +1,48 @@
+from blocks.algorithms import AdaDelta
+from blocks.bricks import Tanh, Rectifier
+
+from model.lstm import Model
+
+dataset = 'data/logcompil.txt'
+io_dim = 256
+
+# An epoch will be composed of 'num_seqs' sequences of len 'seq_len'
+# divided in chunks of lengh 'seq_div_size'
+num_seqs = 50
+seq_len = 5000
+seq_div_size = 200
+
+layers = [
+ {'dim': 1024,
+ 'xreg': (768, 0.1, 50, 5, 5)
+ },
+ {'dim': 1024,
+ 'xreg': (768, 0.1, 50, 5, 5)
+ },
+ {'dim': 1024,
+ },
+]
+activation_function = Rectifier()
+
+i2h_all = True # input to all hidden layers or only first layer
+h2o_all = True # all hiden layers to output or only last layer
+
+w_noise_std = 0.02
+i_dropout = 0.5
+
+l1_reg = 0
+
+step_rule = AdaDelta()
+
+# parameter saving freq (number of batches)
+monitor_freq = 100
+save_freq = 100
+
+# used for sample generation and IRC mode
+sample_temperature = 0.7 #0.5
+
+# do we want to generate samples at times during training?
+sample_len = 1000
+sample_freq = 100
+sample_init = '\nalex\ttu crois?\n'
+
diff --git a/config/lstm-xreg.py b/config/lstm-xreg.py
index 66f7c51..f8c5094 100644
--- a/config/lstm-xreg.py
+++ b/config/lstm-xreg.py
@@ -14,10 +14,10 @@ seq_div_size = 200
layers = [
{'dim': 1024,
- 'xreg': (768, 0.1, 10, 10, 6)
+ 'xreg': (768, 0.1, 10, 10, 10, 2)
},
{'dim': 1024,
- 'xreg': (768, 0.1, 10, 10, 6)
+ 'xreg': (768, 0.1, 10, 10, 10, 5)
},
{'dim': 1024,
},
diff --git a/model/lstm.py b/model/lstm.py
index d928c88..10b090c 100644
--- a/model/lstm.py
+++ b/model/lstm.py
@@ -57,14 +57,16 @@ class Model():
states.append((init_cell, new_cells[-1, :, :]))
if 'xreg' in p and p['xreg'] is not None:
- n, s, w1, w2, w3 = p['xreg']
+ n, s, w1, w2, w3, w4 = p['xreg']
cost_x1 = w1 * ((new_hidden.mean(axis=2) - s)**2).mean()
cost_x2 = w2 * ((new_hidden.mean(axis=(0,1)) - s)**2).mean()
cost_x3 = -w3 * abs(new_hidden - s).mean()
+ cost_x4 = w4 * abs(new_hidden[:-1,:,:]-new_hidden[1:,:,:]).mean()
cost_x1.name = 'cost_x1_%d'%i
cost_x2.name = 'cost_x2_%d'%i
cost_x3.name = 'cost_x3_%d'%i
- costs_xreg += [cost_x1, cost_x2, cost_x3]
+ cost_x4.name = 'cost_x4_%d'%i
+ costs_xreg += [cost_x1, cost_x2, cost_x3, cost_x4]
dims.append(p['dim'])
hidden.append(new_hidden)