defmodule SData.MerkleTree do @moduledoc""" A Merkle tree structure for storing metadata for a big file. """ alias SData.PageStore, as: Store @block_size 8192 @tree_arity 256 defstruct [:root, :store] defmodule Page do defstruct [:nblk, :child_nblk, :list] defimpl SData.Page do def refs(page) do if page.child_nblk == 1 do [] else page.list end end end end @doc""" Get the block size used by merkle trees """ def block_size() do @block_size end @doc""" Create a Merkle tree for indexing a file. """ def create(file, store \\ SData.LocalStore.new()) do %File.Stat{size: size} = File.stat!(file) nblk = div(size, @block_size) + (if rem(size, @block_size) == 0 do 0 else 1 end) fh = File.open!(file, [:binary, :read]) create_file_aux(fh, store, 0, nblk, @tree_arity) end defp create_file_aux(fh, store, first_blk, nblk, divc) do cond do divc < nblk -> create_file_aux(fh, store, first_blk, nblk, divc * @tree_arity) divc == @tree_arity and nblk <= divc -> hashes = for i <- first_blk .. (first_blk + nblk - 1) do {:ok, blk} = :file.pread(fh, i * @block_size, @block_size) :crypto.hash(:sha256, blk) end page = %Page{nblk: nblk, child_nblk: 1, list: hashes} {hash, store} = Store.put(store, page) %__MODULE__{root: hash, store: store} divc > @tree_arity and nblk <= divc -> sub_divc = div(divc, @tree_arity) n_sub_minus_one = div(nblk, sub_divc) {sub_hashes, store} = Enum.reduce(0..n_sub_minus_one, {[], store}, fn i, {sh, store} -> sub_first = first_blk + i * sub_divc sub_n = (if i == n_sub_minus_one do rem(nblk, sub_divc) else sub_divc end) %__MODULE__{root: hash, store: store} = create_file_aux(fh, store, sub_first, sub_n, sub_divc) {[hash | sh], store} end) page = %Page{nblk: nblk, child_nblk: sub_divc, list: Enum.reverse(sub_hashes)} {hash, store} = Store.put(store, page) %__MODULE__{root: hash, store: store} end end @doc""" Get the number of blocks in the tree """ def block_count(mt) do %Page{nblk: nblk} = Store.get(mt.store, mt.root) nblk end @doc""" Get the hash of block number i """ def get(mt, i) do %Page{child_nblk: cn, list: list} = Store.get(mt.store, mt.root) if cn == 1 do Enum.fetch!(list, i) else pos = div(i, cn) subtree = %{mt | root: Enum.fetch!(list, pos)} subpos = rem(i, cn) get(subtree, subpos) end end @doc""" Get the hashes of all blocks """ def get_all(mt) do %Page{child_nblk: cn, list: list} = Store.get(mt.store, mt.root) if cn == 1 do list else list |> Enum.map(&(%{mt | root: &1})) |> Enum.map(&get_all/1) |> Enum.reduce([], &(&2++&1)) end end end