Xreg hyperparameter change

author: Alex Auvolat <alex@adnab.me> 2016-04-26 16:33:02 +0200
committer: Alex Auvolat <alex@adnab.me> 2016-04-26 16:33:02 +0200
commit: b5584610a14578d0f3ebf9eea3067a0284f67288 (patch)
tree: cc23207cf82a6ee0d0bc121d1f0f6bd6e2a8e531
parent: 760587b5d9771257160fac216dfcfff852de3ccc (diff)
download: text-rnn-b5584610a14578d0f3ebf9eea3067a0284f67288.tar.gz
text-rnn-b5584610a14578d0f3ebf9eea3067a0284f67288.zip
4 files changed, 55 insertions, 4 deletions
diff --git a/REMARKS b/REMARKS
new file mode 100644
index 0000000..c44ca5f
--- /dev/null
+++ b/REMARKS
@@ -0,0 +1 @@
+- lstm-xreg-relu : does not converge at all, cost is stuck around 3.6 and error rate is 86%
diff --git a/config/lstm-xreg-relu.py b/config/lstm-xreg-relu.py
new file mode 100644
index 0000000..7697558
--- /dev/null
+++ b/config/lstm-xreg-relu.py
@@ -0,0 +1,48 @@
+from blocks.algorithms import AdaDelta
+from blocks.bricks import Tanh, Rectifier
+
+from model.lstm import Model
+
+dataset = 'data/logcompil.txt'
+io_dim = 256
+
+# An epoch will be composed of 'num_seqs' sequences of len 'seq_len'
+# divided in chunks of lengh 'seq_div_size'
+num_seqs = 50
+seq_len = 5000
+seq_div_size = 200
+
+layers = [
+	{'dim':		1024,
+	 'xreg': 	(768, 0.1, 50, 5, 5)
+	},
+	{'dim':		1024,
+	 'xreg': 	(768, 0.1, 50, 5, 5)
+	},
+	{'dim':		1024,
+	},
+]
+activation_function = Rectifier()
+
+i2h_all = True             # input to all hidden layers or only first layer
+h2o_all = True             # all hiden layers to output or only last layer
+
+w_noise_std = 0.02
+i_dropout = 0.5
+
+l1_reg = 0
+
+step_rule = AdaDelta()
+
+# parameter saving freq (number of batches)
+monitor_freq = 100
+save_freq = 100
+
+# used for sample generation and IRC mode
+sample_temperature = 0.7 #0.5
+
+# do we want to generate samples at times during training?
+sample_len = 1000
+sample_freq = 100
+sample_init = '\nalex\ttu crois?\n'
+
diff --git a/config/lstm-xreg.py b/config/lstm-xreg.py
index 66f7c51..f8c5094 100644
--- a/config/lstm-xreg.py
+++ b/config/lstm-xreg.py
@@ -14,10 +14,10 @@ seq_div_size = 200
 
 layers = [
 	{'dim':		1024,
-	 'xreg': 	(768, 0.1, 10, 10, 6)
+	 'xreg': 	(768, 0.1, 10, 10, 10, 2)
 	},
 	{'dim':		1024,
-	 'xreg': 	(768, 0.1, 10, 10, 6)
+	 'xreg': 	(768, 0.1, 10, 10, 10, 5)
 	},
 	{'dim':		1024,
 	},
diff --git a/model/lstm.py b/model/lstm.py
index d928c88..10b090c 100644
--- a/model/lstm.py
+++ b/model/lstm.py
@@ -57,14 +57,16 @@ class Model():
             states.append((init_cell, new_cells[-1, :, :]))
 
             if 'xreg' in p and p['xreg'] is not None:
-                n, s, w1, w2, w3 = p['xreg']
+                n, s, w1, w2, w3, w4 = p['xreg']
                 cost_x1 = w1 * ((new_hidden.mean(axis=2) - s)**2).mean()
                 cost_x2 = w2 * ((new_hidden.mean(axis=(0,1)) - s)**2).mean()
                 cost_x3 = -w3 * abs(new_hidden - s).mean()
+                cost_x4 = w4 * abs(new_hidden[:-1,:,:]-new_hidden[1:,:,:]).mean()
                 cost_x1.name = 'cost_x1_%d'%i
                 cost_x2.name = 'cost_x2_%d'%i
                 cost_x3.name = 'cost_x3_%d'%i
-                costs_xreg += [cost_x1, cost_x2, cost_x3]
+                cost_x4.name = 'cost_x4_%d'%i
+                costs_xreg += [cost_x1, cost_x2, cost_x3, cost_x4]
 
             dims.append(p['dim'])
             hidden.append(new_hidden)
author	Alex Auvolat <alex@adnab.me>	2016-04-26 16:33:02 +0200
committer	Alex Auvolat <alex@adnab.me>	2016-04-26 16:33:02 +0200
commit	b5584610a14578d0f3ebf9eea3067a0284f67288 (patch)
tree	cc23207cf82a6ee0d0bc121d1f0f6bd6e2a8e531
parent	760587b5d9771257160fac216dfcfff852de3ccc (diff)
download	text-rnn-b5584610a14578d0f3ebf9eea3067a0284f67288.tar.gz text-rnn-b5584610a14578d0f3ebf9eea3067a0284f67288.zip