diff options
author | Thomas Mesnard <thomas.mesnard@ens.fr> | 2015-12-28 20:51:50 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2016-04-21 10:21:42 +0200 |
commit | 072d26e766931007a0f243674f7dfdff5c3104e9 (patch) | |
tree | ae3639f4ff3f8e0e3e9767c15322171aa6f2169e /ctc.py | |
parent | e8e37dee0c5c846b1aa2dd24dc99095191f72a9b (diff) | |
download | pgm-ctc-072d26e766931007a0f243674f7dfdff5c3104e9.tar.gz pgm-ctc-072d26e766931007a0f243674f7dfdff5c3104e9.zip |
Add plot
More TIMIT ; log domain
TIMIT: more complexity
Nice poster
Beautify code (mostly, add comments)
Add final stuff.
Diffstat (limited to 'ctc.py')
-rw-r--r-- | ctc.py | 69 |
1 files changed, 67 insertions, 2 deletions
@@ -73,12 +73,77 @@ class CTC(Brick): sequences=[probs, probs_mask], outputs_info=[alpha0, c0]) - prob = tensor.log(c).sum(axis=0) + tensor.log(alpha[-1][tensor.arange(B), 2*l_len.astype('int32')-1] - + alpha[-1][tensor.arange(B), 2*l_len.astype('int32')]) + # c = theano.printing.Print('c')(c) + last_alpha = alpha[-1] + # last_alpha = theano.printing.Print('a-1')(last_alpha) + + prob = tensor.log(c).sum(axis=0) + tensor.log(last_alpha[tensor.arange(B), 2*l_len.astype('int32')-1] + + last_alpha[tensor.arange(B), 2*l_len.astype('int32')] + + 1e-30) # return the log probability of the labellings return -prob + def apply_log_domain(self, l, probs, l_len=None, probs_mask=None): + # Does the same computation as apply, but alpha is in the log domain + # This avoids numerical underflow issues that were not corrected in the previous version. + + def _log(a): + return tensor.log(tensor.clip(a, 1e-12, 1e12)) + + def _log_add(a, b): + maximum = tensor.maximum(a, b) + return (maximum + tensor.log1p(tensor.exp(a + b - 2 * maximum))) + + def _log_mul(a, b): + return a + b + + # See comments above + B = probs.shape[1] + C = probs.shape[2]-1 + L = l.shape[0] + S = 2*L+1 + + l_blk = C * tensor.ones((S, B), dtype='int32') + l_blk = tensor.set_subtensor(l_blk[1::2,:], l) + l_blk = l_blk.T # now l_blk is B x S + + alpha0 = tensor.concatenate([ tensor.ones((B, 1)), + tensor.zeros((B, S-1)) + ], axis=1) + alpha0 = _log(alpha0) + + l_blk_2 = tensor.concatenate([-tensor.ones((B,2)), l_blk[:,:-2]], axis=1) + l_case2 = tensor.neq(l_blk, C) * tensor.neq(l_blk, l_blk_2) + + def recursion(p, p_mask, prev_alpha): + prev_alpha_1 = tensor.concatenate([tensor.zeros((B,1)),prev_alpha[:,:-1]], axis=1) + prev_alpha_2 = tensor.concatenate([tensor.zeros((B,2)),prev_alpha[:,:-2]], axis=1) + + alpha_bar1 = tensor.set_subtensor(prev_alpha[:,1:], _log_add(prev_alpha[:,1:],prev_alpha[:,:-1])) + alpha_bar2 = tensor.set_subtensor(alpha_bar1[:,2:], _log_add(alpha_bar1[:,2:],prev_alpha[:,:-2])) + + alpha_bar = tensor.switch(l_case2, alpha_bar2, alpha_bar1) + + probs = _log(p[tensor.arange(B)[:,None].repeat(S,axis=1).flatten(), l_blk.flatten()].reshape((B,S))) + next_alpha = _log_mul(alpha_bar, probs) + next_alpha = tensor.switch(p_mask[:,None], next_alpha, prev_alpha) + + return next_alpha + + alpha, _ = scan(fn=recursion, + sequences=[probs, probs_mask], + outputs_info=[alpha0]) + + last_alpha = alpha[-1] + # last_alpha = theano.printing.Print('a-1')(last_alpha) + + prob = _log_add(last_alpha[tensor.arange(B), 2*l_len.astype('int32')-1], + last_alpha[tensor.arange(B), 2*l_len.astype('int32')]) + + # return the negative log probability of the labellings + return -prob + def best_path_decoding(self, probs, probs_mask=None): # probs is T x B x C+1 |