deepmind_attentive_reader.py (882B)
1 from blocks.bricks import Tanh 2 from blocks.algorithms import BasicMomentum, AdaDelta, RMSProp, Adam, CompositeRule, StepClipping, Momentum 3 from blocks.initialization import IsotropicGaussian, Constant 4 5 from model.attentive_reader import Model 6 7 8 batch_size = 32 9 sort_batch_count = 20 10 11 shuffle_questions = True 12 13 concat_ctx_and_question = False 14 15 n_entities = 550 16 embed_size = 200 17 18 ctx_lstm_size = [256] 19 ctx_skip_connections = True 20 21 question_lstm_size = [256] 22 question_skip_connections = True 23 24 attention_mlp_hidden = [100] 25 attention_mlp_activations = [Tanh()] 26 27 out_mlp_hidden = [] 28 out_mlp_activations = [] 29 30 step_rule = CompositeRule([RMSProp(decay_rate=0.95, learning_rate=5e-5), 31 BasicMomentum(momentum=0.9)]) 32 33 dropout = 0.2 34 w_noise = 0. 35 36 valid_freq = 1000 37 save_freq = 1000 38 print_freq = 100 39 40 weights_init = IsotropicGaussian(0.01) 41 biases_init = Constant(0.) 42