taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 88cdc3f8047a05bc5971eaa915ca6626f89a3e78
parent bd08e452093bba68fe2d79b1e9da76488b203720
Author: AdeB <adbrebs@gmail.com>
Date:   Wed, 24 Jun 2015 15:12:15 -0400

New configs. training step rule out of train.py

Diffstat:
Mconfig/dest_simple_mlp_emb_only.py | 22+++++++++++-----------
Aconfig/memory_network_adeb.py | 46++++++++++++++++++++++++++++++++++++++++++++++
Mtest.py | 5+++--
Mtrain.py | 5++---
4 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/config/dest_simple_mlp_emb_only.py b/config/dest_simple_mlp_emb_only.py @@ -6,26 +6,26 @@ from model.mlp_emb import Model, Stream use_cuts_for_training = True dim_embeddings = [ - ('origin_call', data.origin_call_train_size, 10), - ('origin_stand', data.stands_size, 10), - ('week_of_year', 52, 10), - ('day_of_week', 7, 10), + # ('origin_call', data.origin_call_train_size, 100), + # ('origin_stand', data.stands_size, 100), + # ('week_of_year', 52, 100), + # ('day_of_week', 7, 100), ('qhour_of_day', 24 * 4, 10), - ('day_type', 3, 10), + ('day_type', 3, 1), ] dim_input = sum(x for (_, _, x) in dim_embeddings) -dim_hidden = [200, 100] +dim_hidden = [10, 10] output_mode = "destination" dim_output = 2 -embed_weights_init = IsotropicGaussian(0.001) +embed_weights_init = IsotropicGaussian(0.01) mlp_weights_init = IsotropicGaussian(0.01) -mlp_biases_init = Constant(0.001) +mlp_biases_init = IsotropicGaussian(0.001) -learning_rate = 0.0001 -momentum = 0.99 -batch_size = 32 +learning_rate = 0.001 +momentum = 0.9 +batch_size = 100 valid_set = 'cuts/test_times_0' max_splits = 100 diff --git a/config/memory_network_adeb.py b/config/memory_network_adeb.py @@ -0,0 +1,46 @@ +from blocks.initialization import IsotropicGaussian, Constant +from blocks.algorithms import AdaDelta, CompositeRule, GradientDescent, RemoveNotFinite, StepRule, Momentum + +import data +from model.memory_network import Model, Stream + + +n_begin_end_pts = 5 # how many points we consider at the beginning and end of the known trajectory + +dim_embeddings = [ + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), + ('week_of_year', 52, 10), + ('day_of_week', 7, 10), + ('qhour_of_day', 24 * 4, 10), + ('day_type', 3, 10), +] + + +class MLPConfig(object): + __slots__ = ('dim_input', 'dim_hidden', 'dim_output', 'weights_init', 'biases_init') + +prefix_encoder = MLPConfig() +prefix_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +prefix_encoder.dim_hidden = [100, 100] +prefix_encoder.weights_init = IsotropicGaussian(0.001) +prefix_encoder.biases_init = Constant(0.0001) + +candidate_encoder = MLPConfig() +candidate_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +candidate_encoder.dim_hidden = [100, 100] +candidate_encoder.weights_init = IsotropicGaussian(0.001) +candidate_encoder.biases_init = Constant(0.0001) + + +embed_weights_init = IsotropicGaussian(0.001) + +step_rule = Momentum(learning_rate=0.001, momentum=0.9) +batch_size = 32 + +valid_set = 'cuts/test_times_0' +max_splits = 1 +num_cuts = 1000 + +train_candidate_size = 1000 +valid_candidate_size = 10000 diff --git a/test.py b/test.py @@ -1,11 +1,11 @@ #!/usr/bin/env python +import cPickle import sys import os import importlib import csv -from blocks.dump import load_parameter_values from blocks.model import Model @@ -24,7 +24,8 @@ if __name__ == "__main__": test_stream = stream.test(req_vars_test) model = Model(model_config.predict(**inputs)) - parameters = load_parameter_values(os.path.join('model_data', model_name, 'params.npz')) + with open(os.path.join('model_data', "{}.pkl".format(model_name))) as f: + parameters = cPickle.load(f) model.set_param_values(parameters) if 'destination' in outputs: diff --git a/train.py b/train.py @@ -11,7 +11,7 @@ from functools import reduce from theano import tensor from blocks import roles -from blocks.algorithms import AdaDelta, CompositeRule, GradientDescent, RemoveNotFinite, StepRule +from blocks.algorithms import AdaDelta, CompositeRule, GradientDescent, RemoveNotFinite, StepRule, Momentum from blocks.extensions import Printing, FinishAfter, SimpleExtension from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring @@ -136,8 +136,7 @@ if __name__ == "__main__": cost=cost, step_rule=CompositeRule([ ElementwiseRemoveNotFinite(), - AdaDelta(), - #Momentum(learning_rate=config.learning_rate, momentum=config.momentum), + config.step_rule, ]), params=params)