aboutsummaryrefslogtreecommitdiff
path: root/shard/lib/data/merkletree.ex
blob: 90361a37b61a5d83b83565060f8350c5e1de7546 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
defmodule SData.MerkleTree do
  @moduledoc"""
  A Merkle tree structure for storing metadata for a big file.
  """

  alias SData.PageStore, as: Store

  @block_size 4096
  @tree_arity 64

  defstruct [:root, :store]

  defmodule Page do
    defstruct [:nblk, :child_nblk, :list]

    defimpl SData.Page do
      def refs(page) do
        if page.child_nblk == 1 do
          []
        else
          page.list
        end
      end
    end
  end

  @doc"""
  Create a Merkle tree for indexing a file.
  """
  def create(file, store \\ SData.LocalStore.new()) do
    %File.Stat{size: size} = File.stat!(file)
    nblk = div(size, @block_size) + (if rem(size, @block_size) == 0 do 0 else 1 end)
    fh = File.open!(file, [:binary, :read])
    create_file_aux(fh, store, 0, nblk, @tree_arity)
  end

  defp create_file_aux(fh, store, first_blk, nblk, divc) do
    cond do
      divc < nblk ->
        create_file_aux(fh, store, first_blk, nblk, divc * @tree_arity)
      divc == @tree_arity and nblk <= divc ->
        hashes = for i <- first_blk .. (first_blk + nblk - 1) do
            {:ok, blk} = :file.pread(fh, i * @block_size, @block_size)
            :crypto.hash(:sha256, blk)
        end
        page = %Page{nblk: nblk, child_nblk: 1, list: hashes}
        {hash, store} = Store.put(store, page)
        %__MODULE__{root: hash, store: store}
      divc > @tree_arity and nblk <= divc ->
        sub_divc = div(divc, @tree_arity)
        n_sub_minus_one = div(nblk, sub_divc)
        {sub_hashes, store} = Enum.reduce(0..n_sub_minus_one, {[], store}, fn i, {sh, store} ->
          sub_first = first_blk + i * sub_divc
          sub_n = (if i == n_sub_minus_one do rem(nblk, sub_divc) else sub_divc end)
          %__MODULE__{root: hash, store: store} = create_file_aux(fh, store, sub_first, sub_n, sub_divc)
          {[hash | sh], store}
        end)
        page = %Page{nblk: nblk, child_nblk: sub_divc, list: Enum.reverse(sub_hashes)}
        {hash, store} = Store.put(store, page)
        %__MODULE__{root: hash, store: store}
    end
  end

  @doc"""
  Get the number of blocks in the tree
  """
  def block_count(mt) do
    %Page{nblk: nblk} = Store.get(mt.store, mt.root)
    nblk
  end

  @doc"""
  Get the hash of block number i
  """
  def get(mt, i) do
    %Page{child_nblk: cn, list: list} = Store.get(mt.store, mt.root)
    if cn == 1 do
      Enum.fetch!(list, i)
    else
      pos = div(i, cn)
      subtree = %{mt | root: Enum.fetch!(list, pos)}
      subpos = rem(i, cn)
      get(subtree, subpos)
    end
  end

  @doc"""
  Get the hashes of all blocks in a range
  """
  def get_range(mt, range) do
    range |> Enum.map(&(get(mt, &1))) # TODO: do this efficiently
  end
end