taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

rnn_lag_tgtcls_1.py (1265B)


      1 import os
      2 import cPickle
      3 
      4 from blocks import roles
      5 from blocks.bricks import Rectifier
      6 from blocks.filter import VariableFilter
      7 from blocks.initialization import IsotropicGaussian, Constant
      8 
      9 import data
     10 from model.rnn_lag_tgtcls import Model, Stream
     11 
     12 class EmbedderConfig(object):
     13     __slots__ = ('dim_embeddings', 'embed_weights_init')
     14 
     15 pre_embedder = EmbedderConfig()
     16 pre_embedder.embed_weights_init = IsotropicGaussian(0.001)
     17 pre_embedder.dim_embeddings = [ 
     18     ('week_of_year', 52, 10),
     19     ('day_of_week', 7, 10),
     20     ('qhour_of_day', 24 * 4, 10),
     21     ('day_type', 3, 10),
     22     ('taxi_id', 448, 10),
     23 ]
     24 
     25 post_embedder = EmbedderConfig()
     26 post_embedder.embed_weights_init = IsotropicGaussian(0.001)
     27 post_embedder.dim_embeddings = [ 
     28     ('origin_call', data.origin_call_train_size, 10), 
     29     ('origin_stand', data.stands_size, 10),
     30 ]
     31 
     32 with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: tgtcls = cPickle.load(f)
     33 
     34 hidden_state_dim = 100 
     35 weights_init = IsotropicGaussian(0.01)
     36 biases_init = Constant(0.001)
     37 
     38 rec_to_out_dims = [200, 1000]
     39 in_to_rec_dims = [200]
     40 
     41 dropout = 0.5
     42 dropout_inputs = VariableFilter(bricks=[Rectifier], name='output')
     43 
     44 noise = 0.01
     45 noise_inputs = VariableFilter(roles=[roles.PARAMETER])
     46 
     47 batch_size = 10
     48 batch_sort_size = 10