taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

HTTPServer.py (4409B)


      1 #!/usr/bin/env python
      2 
      3 import os
      4 import sys
      5 import urllib
      6 import SimpleHTTPServer
      7 import SocketServer
      8 from cStringIO import StringIO
      9 
     10 import data
     11 from data.hdf5 import TaxiDataset
     12 from visualizer import Vlist, Path
     13 
     14 
     15 visualizer_path = os.path.join(data.path, 'visualizer')
     16 source_path = os.path.split(os.path.realpath(__file__))[0]
     17 
     18 test_data = None
     19 train_data = None
     20 
     21 class VisualizerHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
     22     def send_head(self):
     23         spath = self.path.split('?')[0]
     24         path = spath.split('/')[1:]
     25         if len(path) == 1:
     26             if path[0] == '':
     27                 path[0] = 'index.html'
     28             file_path = os.path.join(source_path, path[0])
     29             return self.send_file(file_path)
     30         elif path[0] == 'ls':
     31             return self.send_datalist()
     32         elif path[0] == 'get':
     33             return self.send_file(os.path.join(visualizer_path, spath[5:]))
     34         elif path[0] == 'extract':
     35             return self.send_extract(spath[9:])
     36 
     37     def send_file(self, file_path):
     38         file_path = urllib.unquote(file_path)
     39         ctype = self.guess_type(file_path)
     40 
     41         try:
     42             f = open(file_path, 'rb')
     43         except IOError:
     44             self.send_error(404, 'File not found')
     45             return None
     46         try:
     47             self.send_response(200)
     48             self.send_header('Content-type', ctype)
     49             fs = os.fstat(f.fileno())
     50             self.send_header('Content-Length', str(fs[6]))
     51             self.send_header('Last-Modified', self.date_time_string(fs.st_mtime))
     52             self.end_headers()
     53             return f
     54         except:
     55             f.close()
     56             raise
     57 
     58     def send_datalist(self):
     59         l = []
     60         for path, subs, files in os.walk(visualizer_path):
     61             for file in files:
     62                 mtime = os.stat('%s/%s' % (path, file))[8]
     63                 l.append('{"path":["%s"],"name":"%s","mtime":%d}' % ('","'.join(path[len(visualizer_path):].split('/')), file, mtime))
     64         l.sort()
     65         f = StringIO()
     66         f.write("[")
     67         f.write(','.join(l))
     68         f.write("]")
     69         length = f.tell()
     70         f.seek(0)
     71         self.send_response(200)
     72         encoding = sys.getfilesystemencoding()
     73         self.send_header("Content-type", "text/html; charset=%s" % encoding)
     74         self.send_header("Content-Length", str(length))
     75         self.end_headers()
     76         return f
     77 
     78     def send_extract(self, query):
     79         f = StringIO()
     80         query = urllib.unquote(query)
     81         content = Vlist()
     82         for (i,sub) in enumerate(query.split(',')):
     83             r = sub.split('-')
     84             if len(r)==1:
     85                 if sub.strip()[0].lower()=='t':
     86                     sub=sub.strip()[1:]
     87                     content.append(Path(test_data.extract(int(sub)), 'T%s<br>'%sub))
     88                 else:
     89                     content.append(Path(train_data.extract(int(sub)), '%s<br>'%sub))
     90             elif len(r)==2:
     91                 test = False
     92                 if r[0].strip()[0].lower()=='t':
     93                     test = True
     94                     r[0]=r[0].strip()[1:]
     95                     if r[1].strip()[0].lower()=='t':
     96                         r[1]=r[1].strip()[1:]
     97                 for i in xrange(int(r[0]), int(r[1])+1):
     98                     if test:
     99                         content.append(Path(test_data.extract(i), 'T%d<br>'%i))
    100                     else:
    101                         content.append(Path(train_data.extract(i), '%d<br>'%i))
    102             elif len(r)>2:
    103                 self.send_error(404, 'File not found')
    104                 return None
    105         content.write(f)
    106         length = f.tell()
    107         f.seek(0)
    108         self.send_response(200)
    109         encoding = sys.getfilesystemencoding()
    110         self.send_header("Content-type", "text/html; charset=%s" % encoding)
    111         self.send_header("Content-Length", str(length))
    112         self.end_headers()
    113         return f
    114 
    115 if __name__ == '__main__':
    116     if len(sys.argv) < 2:
    117         print >>sys.stderr, 'Usage: %s port [--no-hdf5]' % sys.argv[0]
    118 
    119     if '--no-hdf5' not in sys.argv:
    120         print >>sys.stderr, 'Loading dataset...',
    121         path = os.path.join(data.path, 'data.hdf5')
    122         train_data = TaxiDataset('train')
    123         test_data = TaxiDataset('test')
    124         print >>sys.stderr, 'done'
    125 
    126     httpd = SocketServer.TCPServer(('', int(sys.argv[1])), VisualizerHTTPRequestHandler)
    127     httpd.serve_forever()