diff options
Diffstat (limited to 'data_analysis/cluster_arrival.py')
-rw-r--r-- | data_analysis/cluster_arrival.py | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/data_analysis/cluster_arrival.py b/data_analysis/cluster_arrival.py new file mode 100644 index 0000000..fd4ea04 --- /dev/null +++ b/data_analysis/cluster_arrival.py @@ -0,0 +1,27 @@ +import matplotlib.pyplot as plt +import numpy +import cPickle +import scipy.misc + +from sklearn.cluster import MeanShift, estimate_bandwidth +from sklearn.datasets.samples_generator import make_blobs +from itertools import cycle + +print "Reading arrival point list" +with open("arrivals.pkl") as f: + pts = cPickle.load(f) + +print "Doing clustering" +bw = estimate_bandwidth(pts, quantile=.1, n_samples=1000) +print bw +bw = 0.001 + +ms = MeanShift(bandwidth=bw, bin_seeding=True, min_bin_freq=5) +ms.fit(pts) +cluster_centers = ms.cluster_centers_ + +print "Clusters shape: ", cluster_centers.shape + +with open("arrival-cluters.pkl", "w") as f: + cPickle.dump(cluster_centers, f, protocol=cPickle.HIGHEST_PROTOCOL) + |