aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2021-02-21 15:11:15 +0100
committerAlex Auvolat <alex@adnab.me>2021-02-21 15:11:15 +0100
commite59322041a0f894db0dfbd32805303b64048c810 (patch)
tree382202f7abbddff078b665e7bbc09d3a3a099ad6
parent217b0dfd683cda0a082fb33636b38f0172a4af31 (diff)
downloadgarage-e59322041a0f894db0dfbd32805303b64048c810.tar.gz
garage-e59322041a0f894db0dfbd32805303b64048c810.zip
Evaluate hash functions
-rw-r--r--script/simulate_ring.py47
1 files changed, 25 insertions, 22 deletions
diff --git a/script/simulate_ring.py b/script/simulate_ring.py
index fa875d4e..674b1190 100644
--- a/script/simulate_ring.py
+++ b/script/simulate_ring.py
@@ -2,6 +2,13 @@ import hashlib
import bisect
import xxhash
+def hash_str(s):
+ xxh = xxhash.xxh64()
+ xxh.update(s.encode('ascii'))
+ return xxh.hexdigest()
+
+def sha256_str(s):
+ return hashlib.sha256(s.encode('ascii')).hexdigest()
def walk_ring_from_pos(tokens, dcs, start, rep):
ret = []
@@ -29,22 +36,24 @@ def count_tokens_per_node(tokens):
for node, ntok in sorted(list(tokens_of_node.items())):
print(node, ": ", ntok)
+
def method1(nodes):
tokens = []
dcs = set()
for (node_id, dc, n_tokens) in nodes:
dcs |= set([dc])
for i in range(n_tokens):
- token = hashlib.sha256(f"{node_id} {i}".encode('ascii')).hexdigest()
+ token = hash_str(f"{node_id} {i}")
tokens.append((token, dc, node_id))
tokens.sort(key=lambda tok: tok[0])
+ print(tokens)
count_tokens_per_node(tokens)
space_of_node = {}
def walk_ring(v, rep):
- i = bisect.bisect_left([tok for tok, _, _ in tokens], hashlib.sha256(v).hexdigest())
+ i = bisect.bisect_left([tok for tok, _, _ in tokens], hash_str(v))
return walk_ring_from_pos(tokens, dcs, i, rep)
return walk_ring
@@ -57,10 +66,7 @@ def method2(nodes):
h, hn, hndc = None, None, None
for (node_id, node_dc, n_tokens) in nodes:
for tok in range(n_tokens):
- #hnode = hashlib.sha256(f"{i} {node_id} {tok}".encode('ascii')).hexdigest()
- xxh = xxhash.xxh64()
- xxh.update(f"partition {i} node {node_id} token {tok}".encode('ascii'))
- hnode = xxh.digest()
+ hnode = hash_str(f"partition {i} node {node_id} token {tok}")
if h is None or hnode < h:
h = hnode
hn = node_id
@@ -74,8 +80,10 @@ def method2(nodes):
def walk_ring(v, rep):
- vh = hashlib.sha256(v).digest()
- i = (vh[0]<<8 | vh[1]) % (2**partition_bits)
+ xxh = xxhash.xxh32()
+ xxh.update(v.encode('ascii'))
+ vh = xxh.intdigest()
+ i = vh % (2**partition_bits)
return walk_ring_from_pos(partition_nodes, dcs, i, rep)
return walk_ring
@@ -85,7 +93,7 @@ def method2(nodes):
def evaluate_method(walk_ring):
node_data_counts = {}
for i in range(100000):
- nodes = walk_ring(f"{i}".encode('ascii'), 3)
+ nodes = walk_ring(f"{i}", 3)
for n in nodes:
if n not in node_data_counts:
node_data_counts[n] = 0
@@ -96,25 +104,20 @@ def evaluate_method(walk_ring):
if __name__ == "__main__":
-
- nodes = [('digitale', 'atuin', 10),
- ('drosera', 'atuin', 10),
- ('datura', 'atuin', 10),
- ('io', 'jupiter', 20),
- ('meta', 'pipo', 10),
- ('mega', 'pipo', 10),
- ('mina', 'pipo', 10),
- #('moni', 'pipo', 10),
- #('mimi', 'pipo', 10),
- #('mesi', 'pipo', 10),
- ]
-
print("------")
print("method 1")
+ nodes = [('digitale', 'atuin', 64),
+ ('drosera', 'atuin', 64),
+ ('datura', 'atuin', 64),
+ ('io', 'jupiter', 128)]
method1_walk_ring = method1(nodes)
evaluate_method(method1_walk_ring)
print("------")
print("method 2")
+ nodes = [('digitale', 'atuin', 10),
+ ('drosera', 'atuin', 10),
+ ('datura', 'atuin', 10),
+ ('io', 'jupiter', 20)]
method2_walk_ring = method2(nodes)
evaluate_method(method2_walk_ring)