taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 71bb4d90da2bad933fdca48d1879886fe7aa9bc8
parent de76aae44b6c0cbe9ab42c7ae215c3ae9e4e4055
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Mon,  4 May 2015 13:15:33 -0400

Add make_valid_cut

Diffstat:
Amake_valid_cut.py | 40++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+), 0 deletions(-)

diff --git a/make_valid_cut.py b/make_valid_cut.py @@ -0,0 +1,40 @@ +# Cuts the training dataset at the following timestamps : + +cuts = [ + 1376503200, + 1380616200, + 1381167900, + 1383364800, + 1387722600, +] + +import random +import csv +import ast + +f = open("train.csv") +fr = csv.reader(f) +_skip_header = fr.next() +g = open("cutvalid.csv", "w") +gw = csv.writer(g) + +for l in fr: + polyline = ast.literal_eval(l[-1]) + if len(polyline) == 0: continue + time = int(l[5]) + for ts in cuts: + if time <= ts and time + 15 * (len(polyline) - 1) >= ts: + # keep it + n = (ts - time) / 15 + 1 + cut = polyline[:n] + row = l[:-1] + [ + cut.__str__(), + polyline[-1][0], + polyline[-1][1], + 15 * (len(polyline)-1) + ] + print row + gw.writerow(row) + +f.close() +g.close()