aboutsummaryrefslogtreecommitdiff
path: root/pvalue.py
diff options
context:
space:
mode:
authorAlex Auvolat <alex.auvolat@ens.fr>2015-07-29 12:06:00 -0400
committerAlex Auvolat <alex.auvolat@ens.fr>2015-07-29 12:06:00 -0400
commitdefab74395f2ddb2641bba6ab8d18bdedde7a334 (patch)
tree17821ee924e2bdcd2927ed1e61cbf410dac40108 /pvalue.py
parentca40e5c81d385e1422cebe40e009d7e93b95bfbb (diff)
downloadtaxi-defab74395f2ddb2641bba6ab8d18bdedde7a334.tar.gz
taxi-defab74395f2ddb2641bba6ab8d18bdedde7a334.zip
p-value caluculation script
Diffstat (limited to 'pvalue.py')
-rwxr-xr-xpvalue.py60
1 files changed, 60 insertions, 0 deletions
diff --git a/pvalue.py b/pvalue.py
new file mode 100755
index 0000000..15c6e10
--- /dev/null
+++ b/pvalue.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+import math
+import numpy
+
+import data
+
+# Haversine distance calculation
+# --------- -------- -----------
+
+rearth = 6371.
+deg2rad = 3.141592653589793 / 180.
+
+def hdist(a, b):
+ lat1 = a[:, 0] * deg2rad
+ lon1 = a[:, 1] * deg2rad
+ lat2 = b[:, 0] * deg2rad
+ lon2 = b[:, 1] * deg2rad
+
+ dlat = abs(lat1-lat2)
+ dlon = abs(lon1-lon2)
+
+ al = numpy.sin(dlat/2)**2 + numpy.cos(lat1) * numpy.cos(lat2) * (numpy.sin(dlon/2)**2)
+ d = numpy.arctan2(numpy.sqrt(al), numpy.sqrt(1.-al))
+
+ hd = 2. * rearth * d
+
+ return hd
+
+
+# Read the inputs
+# ---- --- ------
+
+def readcsv(f):
+ return numpy.genfromtxt(f, delimiter=',', skip_header=1)[:, 1:3]
+
+answer = readcsv(os.path.join(data.path, 'test_answer.csv'))
+
+tables = [readcsv(f) for f in sys.argv if '.csv' in f]
+etables = [hdist(t, answer) for t in tables]
+
+# Calculate p-values
+# --------- --------
+
+pvalue = numpy.zeros((len(tables), len(tables)))
+
+for i, a in enumerate(etables):
+ for j, b in enumerate(etables):
+ if i == j:
+ continue
+ d = b - a
+ var = (numpy.mean((a - numpy.mean(a))**2)
+ + numpy.mean((b - numpy.mean(b))**2)) / 2.
+ pv = 1 - .5 * (1 + math.erf(numpy.mean(d) / numpy.sqrt(2 * var)))
+ pvalue[i, j] = pv
+
+print pvalue