aboutsummaryrefslogtreecommitdiff
path: root/script/simulate_ring.py
blob: fa875d4ef580ccf6ced17fd2066ab3957c695583 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import hashlib
import bisect
import xxhash


def walk_ring_from_pos(tokens, dcs, start, rep):
    ret = []
    ret_dcs = set()
    delta = 0
    while len(ret) < rep:
        i = (start + delta) % len(tokens)
        delta = delta + 1

        (token_k, token_dc, token_node) = tokens[i]
        if token_dc not in ret_dcs:
            ret_dcs |= set([token_dc])
            ret.append(token_node)
        elif len(ret_dcs) == len(dcs) and token_node not in ret:
            ret.append(token_node)
    return ret

def count_tokens_per_node(tokens):
    tokens_of_node = {}
    for _, _, token_node in tokens:
        if token_node not in tokens_of_node:
            tokens_of_node[token_node] = 0
        tokens_of_node[token_node] += 1
    print("#tokens per node:")
    for node, ntok in sorted(list(tokens_of_node.items())):
        print(node, ": ", ntok)

def method1(nodes):
    tokens = []
    dcs = set()
    for (node_id, dc, n_tokens) in nodes:
        dcs |= set([dc])
        for i in range(n_tokens):
            token = hashlib.sha256(f"{node_id} {i}".encode('ascii')).hexdigest()
            tokens.append((token, dc, node_id))
    tokens.sort(key=lambda tok: tok[0])

    count_tokens_per_node(tokens)

    space_of_node = {}

    def walk_ring(v, rep):
        i = bisect.bisect_left([tok for tok, _, _ in tokens], hashlib.sha256(v).hexdigest())
        return walk_ring_from_pos(tokens, dcs, i, rep)

    return walk_ring


def method2(nodes):
    partition_bits = 10
    partitions = list(range(2**partition_bits))
    def partition_node(i):
        h, hn, hndc = None, None, None
        for (node_id, node_dc, n_tokens) in nodes:
            for tok in range(n_tokens):
                #hnode = hashlib.sha256(f"{i} {node_id} {tok}".encode('ascii')).hexdigest()
                xxh = xxhash.xxh64()
                xxh.update(f"partition {i} node {node_id} token {tok}".encode('ascii'))
                hnode = xxh.digest()
                if h is None or hnode < h:
                    h = hnode
                    hn = node_id
                    hndc = node_dc
        return (i, hndc, hn)

    partition_nodes = [partition_node(i) for i in partitions]
    count_tokens_per_node(partition_nodes)

    dcs = list(set(node_dc for _, node_dc, _ in nodes))

        
    def walk_ring(v, rep):
        vh = hashlib.sha256(v).digest()
        i = (vh[0]<<8 | vh[1]) % (2**partition_bits)
        return walk_ring_from_pos(partition_nodes, dcs, i, rep)

    return walk_ring



def evaluate_method(walk_ring):
    node_data_counts = {}
    for i in range(100000):
        nodes = walk_ring(f"{i}".encode('ascii'), 3)
        for n in nodes:
            if n not in node_data_counts:
                node_data_counts[n] = 0
            node_data_counts[n] += 1
    print("Number of data items per node:")
    for n, v in sorted(list(node_data_counts.items())):
        print(n, ": ", v)


if __name__ == "__main__":

    nodes = [('digitale', 'atuin', 10),
             ('drosera', 'atuin', 10),
             ('datura', 'atuin', 10),
             ('io', 'jupiter', 20),
             ('meta', 'pipo', 10),
             ('mega', 'pipo', 10),
             ('mina', 'pipo', 10),
             #('moni', 'pipo', 10),
             #('mimi', 'pipo', 10),
             #('mesi', 'pipo', 10),
             ]

    print("------")
    print("method 1")
    method1_walk_ring = method1(nodes)
    evaluate_method(method1_walk_ring)

    print("------")
    print("method 2")
    method2_walk_ring = method2(nodes)
    evaluate_method(method2_walk_ring)