taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 95b565afb7e1c2a6eb23ca9f7c13cd6efaf55a39
parent 1556e9087f7e49bd75c8e236d2d3fb4fd936dc40
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Tue,  5 May 2015 09:30:32 -0400

New config (added a hidden layer), small changes to train.py

Diffstat:
Aconfig/simple_mlp_tgtcls_1.py | 25+++++++++++++++++++++++++
Mtrain.py | 4++--
2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/config/simple_mlp_tgtcls_1.py b/config/simple_mlp_tgtcls_1.py @@ -0,0 +1,25 @@ +import cPickle + +import data + +import model.simple_mlp_tgtcls as model + +n_dow = 7 # number of division for dayofweek/dayofmonth/hourofday +n_dom = 31 +n_hour = 24 + +n_begin_end_pts = 5 # how many points we consider at the beginning and end of the known trajectory +n_end_pts = 5 + +n_valid = 1000 + +with open(data.DATA_PATH + "/arrival-clusters.pkl") as f: tgtcls = cPickle.load(f) + +dim_embed = 10 +dim_input = n_begin_end_pts * 2 * 2 + dim_embed + dim_embed +dim_hidden = [500] +dim_output = tgtcls.shape[0] + +learning_rate = 0.0001 +momentum = 0.99 +batch_size = 32 diff --git a/train.py b/train.py @@ -110,7 +110,7 @@ def main(): # Checkpoint('model.pkl', every_n_batches=100), Dump('model_data/' + model_name, every_n_batches=1000), LoadFromDump('model_data/' + model_name), - FinishAfter(after_epoch=5) + FinishAfter(after_epoch=10), ] main_loop = MainLoop( @@ -124,7 +124,7 @@ def main(): # Produce an output on the test data test_stream = setup_test_stream() - outfile = open("test-output.csv", "w") + outfile = open("test-output-%s.csv" % model_name, "w") outcsv = csv.writer(outfile) outcsv.writerow(["TRIP_ID", "LATITUDE", "LONGITUDE"]) for out in apply_model.Apply(outputs=outputs, stream=test_stream, return_vars=['trip_id', 'outputs']):