defmodule SData.MerkleTree do
@moduledoc"""
A Merkle tree structure for storing metadata for a big file.
"""
alias SData.PageStore, as: Store
@block_size 8192
@tree_arity 256
defstruct [:root, :store]
defmodule Page do
defstruct [:nblk, :child_nblk, :list]
defimpl SData.Page do
def refs(page) do
if page.child_nblk == 1 do
[]
else
page.list
end
end
end
end
@doc"""
Get the block size used by merkle trees
"""
def block_size() do
@block_size
end
@doc"""
Create a Merkle tree for indexing a file.
"""
def create(file, store \\ SData.LocalStore.new()) do
%File.Stat{size: size} = File.stat!(file)
nblk = div(size, @block_size) + (if rem(size, @block_size) == 0 do 0 else 1 end)
fh = File.open!(file, [:binary, :read])
create_file_aux(fh, store, 0, nblk, @tree_arity)
end
defp create_file_aux(fh, store, first_blk, nblk, divc) do
cond do
divc < nblk ->
create_file_aux(fh, store, first_blk, nblk, divc * @tree_arity)
divc == @tree_arity and nblk <= divc ->
hashes = for i <- first_blk .. (first_blk + nblk - 1) do
{:ok, blk} = :file.pread(fh, i * @block_size, @block_size)
:crypto.hash(:sha256, blk)
end
page = %Page{nblk: nblk, child_nblk: 1, list: hashes}
{hash, store} = Store.put(store, page)
%__MODULE__{root: hash, store: store}
divc > @tree_arity and nblk <= divc ->
sub_divc = div(divc, @tree_arity)
n_sub_minus_one = div(nblk, sub_divc)
{sub_hashes, store} = Enum.reduce(0..n_sub_minus_one, {[], store}, fn i, {sh, store} ->
sub_first = first_blk + i * sub_divc
sub_n = (if i == n_sub_minus_one do rem(nblk, sub_divc) else sub_divc end)
%__MODULE__{root: hash, store: store} = create_file_aux(fh, store, sub_first, sub_n, sub_divc)
{[hash | sh], store}
end)
page = %Page{nblk: nblk, child_nblk: sub_divc, list: Enum.reverse(sub_hashes)}
{hash, store} = Store.put(store, page)
%__MODULE__{root: hash, store: store}
end
end
@doc"""
Get the number of blocks in the tree
"""
def block_count(mt) do
%Page{nblk: nblk} = Store.get(mt.store, mt.root)
nblk
end
@doc"""
Get the hash of block number i
"""
def get(mt, i) do
%Page{child_nblk: cn, list: list} = Store.get(mt.store, mt.root)
if cn == 1 do
Enum.fetch!(list, i)
else
pos = div(i, cn)
subtree = %{mt | root: Enum.fetch!(list, pos)}
subpos = rem(i, cn)
get(subtree, subpos)
end
end
@doc"""
Get the hashes of all blocks in a range
"""
def get_range(mt, range) do
range |> Enum.map(&(get(mt, &1))) # TODO: do this efficiently
end
end