#!/usr/bin/env python2 import importlib import logging import operator import os import sys from functools import reduce from theano import tensor from blocks import roles from blocks.algorithms import AdaDelta, CompositeRule, GradientDescent, RemoveNotFinite, StepRule, Momentum from blocks.extensions import Printing, FinishAfter from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring import blocks blocks.config.default_seed = 123 try: from blocks.extras.extensions.plotting import Plot use_plot = True except ImportError: use_plot = False from blocks.filter import VariableFilter from blocks.graph import ComputationGraph, apply_dropout, apply_noise from blocks.main_loop import MainLoop from blocks.model import Model from ext_saveload import SaveLoadParams from ext_test import RunOnTest logger = logging.getLogger(__name__) if __name__ == "__main__": if len(sys.argv) != 2: print >> sys.stderr, 'Usage: %s config' % sys.argv[0] sys.exit(1) model_name = sys.argv[1] config = importlib.import_module('.%s' % model_name, 'config') logger.info('# Configuration: %s' % config.__name__) for key in dir(config): if not key.startswith('__') and isinstance(getattr(config, key), (int, str, list, tuple)): logger.info(' %20s %s' % (key, str(getattr(config, key)))) model = config.Model(config) model.initialize() stream = config.Stream(config) inputs = stream.inputs() req_vars = model.cost.inputs train_stream = stream.train(req_vars) valid_stream = stream.valid(req_vars) cost = model.cost(**inputs) cg = ComputationGraph(cost) monitored = set([cost] + VariableFilter(roles=[roles.COST])(cg.variables)) valid_monitored = monitored if hasattr(model, 'valid_cost'): valid_cost = model.valid_cost(**inputs) valid_cg = ComputationGraph(valid_cost) valid_monitored = set([valid_cost] + VariableFilter(roles=[roles.COST])(valid_cg.variables)) if hasattr(config, 'dropout') and config.dropout < 1.0: cg = apply_dropout(cg, config.dropout_inputs(cg), config.dropout) if hasattr(config, 'noise') and config.noise > 0.0: cg = apply_noise(cg, config.noise_inputs(cg), config.noise) cost = cg.outputs[0] cg = Model(cost) logger.info('# Parameter shapes:') parameters_size = 0 for key, value in cg.get_params().iteritems(): logger.info(' %20s %s' % (value.get_value().shape, key)) parameters_size += reduce(operator.mul, value.get_value().shape, 1) logger.info('Total number of parameters: %d in %d matrices' % (parameters_size, len(cg.get_params()))) if hasattr(config, 'step_rule'): step_rule = config.step_rule else: step_rule = AdaDelta() params = cg.parameters algorithm = GradientDescent( cost=cost, step_rule=CompositeRule([ RemoveNotFinite(), step_rule ]), params=params) plot_vars = [['valid_' + x.name for x in valid_monitored]] logger.info('Plotted variables: %s' % str(plot_vars)) dump_path = os.path.join('model_data', model_name) + '.pkl' logger.info('Dump path: %s' % dump_path) extensions=[TrainingDataMonitoring(monitored, prefix='train', every_n_batches=1000), DataStreamMonitoring(valid_monitored, valid_stream, prefix='valid', every_n_batches=1000), Printing(every_n_batches=1000), SaveLoadParams(dump_path, cg, before_training=True, # before training -> load params every_n_batches=1000, # every N batches -> save params after_epoch=True, # after epoch -> save params after_training=True, # after training -> save params ), RunOnTest(model_name, model, stream, every_n_batches=1000), ] if use_plot: extensions.append(Plot(model_name, channels=plot_vars, every_n_batches=500)) main_loop = MainLoop( model=cg, data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run() main_loop.profile.report()