aboutsummaryrefslogtreecommitdiff
path: root/pvalue.py
blob: 15c6e1070e0c6b6fd070aa7d1d1aa07e66e52161 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python

import os
import sys

import math
import numpy

import data

# Haversine distance calculation
# --------- -------- -----------

rearth = 6371.
deg2rad = 3.141592653589793 / 180.

def hdist(a, b):
    lat1 = a[:, 0] * deg2rad
    lon1 = a[:, 1] * deg2rad
    lat2 = b[:, 0] * deg2rad
    lon2 = b[:, 1] * deg2rad

    dlat = abs(lat1-lat2)
    dlon = abs(lon1-lon2)

    al = numpy.sin(dlat/2)**2  + numpy.cos(lat1) * numpy.cos(lat2) * (numpy.sin(dlon/2)**2)
    d = numpy.arctan2(numpy.sqrt(al), numpy.sqrt(1.-al))

    hd = 2. * rearth * d

    return hd


# Read the inputs
# ---- --- ------

def readcsv(f):
    return numpy.genfromtxt(f, delimiter=',', skip_header=1)[:, 1:3]

answer = readcsv(os.path.join(data.path, 'test_answer.csv'))

tables = [readcsv(f) for f in sys.argv if '.csv' in f]
etables = [hdist(t, answer) for t in tables]

# Calculate p-values
# --------- --------

pvalue = numpy.zeros((len(tables), len(tables)))

for i, a in enumerate(etables):
    for j, b in enumerate(etables):
        if i == j:
            continue
        d = b - a
        var = (numpy.mean((a - numpy.mean(a))**2)
                + numpy.mean((b - numpy.mean(b))**2)) / 2.
        pv = 1 - .5 * (1 + math.erf(numpy.mean(d) / numpy.sqrt(2 * var)))
        pvalue[i, j] = pv

print pvalue