From 39e549f05e568e4153381f025b3a0f256e9a7b7a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 22 May 2015 10:00:15 -0400 Subject: Make indexing faster by indexing only one column and querying a range --- data/cut.py | 6 ++++-- data/make_time_index.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'data') diff --git a/data/cut.py b/data/cut.py index 1253434..7853030 100644 --- a/data/cut.py +++ b/data/cut.py @@ -24,8 +24,10 @@ class TaxiTimeCutScheme(IterationScheme): with sqlite3.connect(self.dbfile) as db: c = db.cursor() for cut in cuts: - l = l + [i for (i,) in - c.execute('SELECT trip FROM trip_times WHERE begin <= ? AND end >= ?', (cut, cut))] + part = [i for (i,) in + c.execute('SELECT trip FROM trip_times WHERE begin >= ? AND begin <= ? AND end >= ?', + (cut - 40000, cut, cut))] + l = l + part return iter_(l) diff --git a/data/make_time_index.py b/data/make_time_index.py index c51d075..c2838e0 100755 --- a/data/make_time_index.py +++ b/data/make_time_index.py @@ -39,7 +39,7 @@ def make_valid(outpath): c.executemany('INSERT INTO trip_times(trip, begin, end) VALUES(?, ?, ?)', times) timedb.commit() print "Creating index..." - c.execute('''CREATE INDEX trip_time_index ON trip_times (begin, end)''') + c.execute('''CREATE INDEX trip_begin_index ON trip_times (begin)''') if __name__ == '__main__': -- cgit v1.2.3