taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 4f68132cc9c62860bd3368e559e430961c872636
parent 6d946f29f7548c75e97f30c4356dbac200ee6cce
Author: Étienne Simon <esimon@esimon.eu>
Date:   Mon, 18 May 2015 18:57:08 -0400

Use os.path.join and close files

Diffstat:
Mconfig/dest_simple_mlp_tgtcls_0_cs.py | 3++-
Mconfig/dest_simple_mlp_tgtcls_1_cs.py | 3++-
Mconfig/dest_simple_mlp_tgtcls_1_cswdt.py | 3++-
Mconfig/dest_simple_mlp_tgtcls_1_cswdtx.py | 3++-
Mconfig/dest_simple_mlp_tgtcls_1_cswdtx_alexandre.py | 3++-
Mconfig/joint_simple_mlp_tgtcls_111_cswdtx.py | 3++-
Mconfig/joint_simple_mlp_tgtcls_111_cswdtx_noise_dout.py | 3++-
Mconfig/joint_simple_mlp_tgtcls_1_cswdtx.py | 3++-
Mdata/rfc4180.py | 10++++++----
Mtest.py | 9+++++++--
10 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/config/dest_simple_mlp_tgtcls_0_cs.py b/config/dest_simple_mlp_tgtcls_0_cs.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks.initialization import IsotropicGaussian, Constant @@ -11,7 +12,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: tgtcls = cPickle.load(f) dim_embeddings = [ ('origin_call', data.origin_call_train_size, 10), diff --git a/config/dest_simple_mlp_tgtcls_1_cs.py b/config/dest_simple_mlp_tgtcls_1_cs.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks.initialization import IsotropicGaussian, Constant @@ -11,7 +12,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: tgtcls = cPickle.load(f) dim_embeddings = [ ('origin_call', data.origin_call_train_size, 10), diff --git a/config/dest_simple_mlp_tgtcls_1_cswdt.py b/config/dest_simple_mlp_tgtcls_1_cswdt.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks.initialization import IsotropicGaussian, Constant @@ -11,7 +12,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: tgtcls = cPickle.load(f) dim_embeddings = [ ('origin_call', data.origin_call_train_size, 10), diff --git a/config/dest_simple_mlp_tgtcls_1_cswdtx.py b/config/dest_simple_mlp_tgtcls_1_cswdtx.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks.initialization import IsotropicGaussian, Constant @@ -11,7 +12,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: tgtcls = cPickle.load(f) dim_embeddings = [ ('origin_call', data.origin_call_train_size, 10), diff --git a/config/dest_simple_mlp_tgtcls_1_cswdtx_alexandre.py b/config/dest_simple_mlp_tgtcls_1_cswdtx_alexandre.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks.initialization import IsotropicGaussian, Constant @@ -11,7 +12,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: tgtcls = cPickle.load(f) +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: tgtcls = cPickle.load(f) dim_embeddings = [ ('origin_call', data.origin_call_train_size, 10), diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx.py b/config/joint_simple_mlp_tgtcls_111_cswdtx.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks.initialization import IsotropicGaussian, Constant @@ -11,7 +12,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: dest_tgtcls = cPickle.load(f) # generate target classes for time prediction as a Fibonacci sequence diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_noise_dout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_noise_dout.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks import roles @@ -14,7 +15,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: dest_tgtcls = cPickle.load(f) # generate target classes for time prediction as a Fibonacci sequence diff --git a/config/joint_simple_mlp_tgtcls_1_cswdtx.py b/config/joint_simple_mlp_tgtcls_1_cswdtx.py @@ -1,3 +1,4 @@ +import os import cPickle from blocks.initialization import IsotropicGaussian, Constant @@ -11,7 +12,7 @@ n_end_pts = 5 n_valid = 1000 -with open("%s/arrival-clusters.pkl" % data.path) as f: +with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: dest_tgtcls = cPickle.load(f) # generate target classes for time prediction as a Fibonacci sequence diff --git a/data/rfc4180.py b/data/rfc4180.py @@ -1,6 +1,7 @@ import ast import csv import numpy +import os from fuel.datasets import Dataset from fuel.streams import DataStream @@ -90,15 +91,16 @@ taxi_columns_valid = taxi_columns + [ ("time", lambda l: int(l[11])), ] -train_file="%s/train.csv" % data.path -valid_file="%s/valid2-cut.csv" % data.path -test_file="%s/test.csv" % data.path +train_file = os.path.join(data.path, 'train.csv') +valid_file = os.path.join(data.path, 'valid2-cut.csv') +test_file = os.path.join(data.path, 'test.csv') train_data=TaxiData(train_file, taxi_columns, has_header=True) valid_data = TaxiData(valid_file, taxi_columns_valid) test_data = TaxiData(test_file, taxi_columns, has_header=True) -valid_trips = [l for l in open("%s/valid2-cut-ids.txt" % data.path)] +with open(os.path.join(data.path, 'valid2-cut-ids.txt')) as f: + valid_trips = [l for l in f] def train_it(): return DataIterator(DataStream(train_data)) diff --git a/test.py b/test.py @@ -28,11 +28,11 @@ if __name__ == "__main__": model.set_param_values(parameters) if 'destination' in outputs: - dest_outfile = open("output/test-dest-output-%s.csv" % model_name, "w") + dest_outfile = open(os.path.join('output', 'test-dest-output-%s.csv' % model_name), 'w') dest_outcsv = csv.writer(dest_outfile) dest_outcsv.writerow(["TRIP_ID", "LATITUDE", "LONGITUDE"]) if 'duration' in outputs: - time_outfile = open("output/test-time-output-%s.csv" % model_name, "w") + time_outfile = open(os.path.join('output', 'test-time-output-%s.csv' % model_name), 'w') time_outcsv = csv.writer(time_outfile) time_outcsv.writerow(["TRIP_ID", "TRAVEL_TIME"]) @@ -46,3 +46,8 @@ if __name__ == "__main__": if 'duration' in outputs: duration = output_values[outputs.index('duration')] time_outcsv.writerow([d['trip_id'][0], duration[0]]) + + if 'destination' in outputs: + dest_outfile.close() + if 'duration' in outputs: + time_outfile.close()