taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 3a694dde577103f269ff888c19c820712fbab96a
parent 1e8da55c32746e7bf898717c032144b056256d3c
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Mon, 20 Jul 2015 17:40:20 -0400

Large validation set

Diffstat:
Adata/cuts/large_valid.py | 9+++++++++
Mdata/make_valid_cut.py | 5+++++
2 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/data/cuts/large_valid.py b/data/cuts/large_valid.py @@ -0,0 +1,9 @@ +import random + +begin = 1372636853 +end = 1404172787 + +random.seed(1234) +cuts = [] +for i in range(500): + cuts.append(random.randrange(begin, end)) diff --git a/data/make_valid_cut.py b/data/make_valid_cut.py @@ -17,6 +17,8 @@ _fields = ['trip_id', 'call_type', 'origin_call', 'origin_stand', 'taxi_id', 'ti def make_valid(cutfile, outpath): cuts = importlib.import_module('.%s' % cutfile, 'data.cuts').cuts + print "Number of cuts:", len(cuts) + valid = [] for line in taxi_it('train'): @@ -39,6 +41,9 @@ def make_valid(cutfile, outpath): 'travel_time': 15 * (len(latitude)-1) }) valid.append(line) + break + + print "Number of trips in validation set:", len(valid) file = h5py.File(outpath, 'a') clen = file['trip_id'].shape[0]