# taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

maps.py (1464B)

```      1 import cPickle
2 import numpy as np
3 import matplotlib.pyplot as plt
4
5 import data
6 from data.hdf5 import taxi_it
7
8
9 def compute_number_coordinates():
10
11     # Count the number of coordinates
12     n_coordinates = 0
13     for ride in taxi_it('train'):
14         n_coordinates += len(ride['latitude'])
15     print n_coordinates
16
17     return n_coordinates
18
19
20 def extract_coordinates(n_coordinates=None):
21     """Extract coordinates from the dataset and store them in a numpy array"""
22
23     if n_coordinates is None:
24         n_coordinates = compute_number_coordinates()
25
26     coordinates = np.zeros((n_coordinates, 2), dtype="float32")
27
28     c = 0
29     for ride in taxi_it('train'):
30         for point in zip(ride['latitude'], ride['longitude']):
31             coordinates[c] = point
32             c += 1
33
34     print c
35
36     cPickle.dump(coordinates, open(data.path + "/coordinates_array.pkl", "wb"))
37
38
39 def draw_map(coordinates, xrg, yrg):
40
41     print "Start drawing"
42     plt.figure(figsize=(30, 30), dpi=100, facecolor='w', edgecolor='k')
43     hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg])
44
45     plt.imshow(np.log(hist))
46     plt.gca().invert_yaxis()
47     plt.savefig(data.path + "/analysis/xyhmap2.png")
48
49
50 if __name__ == "__main__":
51     extract_coordinates(n_coordinates=83409386)
52
53     coordinates = cPickle.load(open(data.path + "/coordinates_array.pkl", "rb"))
54     xrg = [41.05, 41.25]
55     yrg = [-8.75, -8.55]
56     draw_map(coordinates, xrg, yrg)
```