# taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

pvalue.py (1309B)

```      1 #!/usr/bin/env python
2
3 import os
4 import sys
5
6 import math
7 import numpy
8
9 import data
10
11 # Haversine distance calculation
12 # --------- -------- -----------
13
14 rearth = 6371.
15 deg2rad = 3.141592653589793 / 180.
16
17 def hdist(a, b):
18     lat1 = a[:, 0] * deg2rad
19     lon1 = a[:, 1] * deg2rad
20     lat2 = b[:, 0] * deg2rad
21     lon2 = b[:, 1] * deg2rad
22
23     dlat = abs(lat1-lat2)
24     dlon = abs(lon1-lon2)
25
26     al = numpy.sin(dlat/2)**2  + numpy.cos(lat1) * numpy.cos(lat2) * (numpy.sin(dlon/2)**2)
27     d = numpy.arctan2(numpy.sqrt(al), numpy.sqrt(1.-al))
28
29     hd = 2. * rearth * d
30
31     return hd
32
33
35 # ---- --- ------
36
38     return numpy.genfromtxt(f, delimiter=',', skip_header=1)[:, 1:3]
39
41
42 tables = [readcsv(f) for f in sys.argv if '.csv' in f]
43 etables = [hdist(t, answer) for t in tables]
44
45 # Calculate p-values
46 # --------- --------
47
48 pvalue = numpy.zeros((len(tables), len(tables)))
49
50 for i, a in enumerate(etables):
51     for j, b in enumerate(etables):
52         if i == j:
53             continue
54         d = b - a
55         var = (numpy.mean((a - numpy.mean(a))**2)
56                 + numpy.mean((b - numpy.mean(b))**2)) / 2.
57         pv = 1 - .5 * (1 + math.erf(numpy.mean(d) / numpy.sqrt(2 * var)))
58         pvalue[i, j] = pv
59
60 print pvalue
```