diff options
Diffstat (limited to 'shard/lib/data')
-rw-r--r-- | shard/lib/data/data.ex | 6 | ||||
-rw-r--r-- | shard/lib/data/merkletree.ex | 93 |
2 files changed, 99 insertions, 0 deletions
diff --git a/shard/lib/data/data.ex b/shard/lib/data/data.ex index 78c73cd..33dca09 100644 --- a/shard/lib/data/data.ex +++ b/shard/lib/data/data.ex @@ -26,6 +26,12 @@ defmodule SData do :crypto.hash(algo, bin) end + def file_hash(path, algo \\ :sha256) do + File.stream!(path, [], 65536) + |> Enum.reduce(:crypto.hash_init(algo), &(:crypto.hash_update(&2, &1))) + |> :crypto.hash_final() + end + def term_unbin(bin) do :erlang.binary_to_term(bin, [:safe]) end diff --git a/shard/lib/data/merkletree.ex b/shard/lib/data/merkletree.ex new file mode 100644 index 0000000..90361a3 --- /dev/null +++ b/shard/lib/data/merkletree.ex @@ -0,0 +1,93 @@ +defmodule SData.MerkleTree do + @moduledoc""" + A Merkle tree structure for storing metadata for a big file. + """ + + alias SData.PageStore, as: Store + + @block_size 4096 + @tree_arity 64 + + defstruct [:root, :store] + + defmodule Page do + defstruct [:nblk, :child_nblk, :list] + + defimpl SData.Page do + def refs(page) do + if page.child_nblk == 1 do + [] + else + page.list + end + end + end + end + + @doc""" + Create a Merkle tree for indexing a file. + """ + def create(file, store \\ SData.LocalStore.new()) do + %File.Stat{size: size} = File.stat!(file) + nblk = div(size, @block_size) + (if rem(size, @block_size) == 0 do 0 else 1 end) + fh = File.open!(file, [:binary, :read]) + create_file_aux(fh, store, 0, nblk, @tree_arity) + end + + defp create_file_aux(fh, store, first_blk, nblk, divc) do + cond do + divc < nblk -> + create_file_aux(fh, store, first_blk, nblk, divc * @tree_arity) + divc == @tree_arity and nblk <= divc -> + hashes = for i <- first_blk .. (first_blk + nblk - 1) do + {:ok, blk} = :file.pread(fh, i * @block_size, @block_size) + :crypto.hash(:sha256, blk) + end + page = %Page{nblk: nblk, child_nblk: 1, list: hashes} + {hash, store} = Store.put(store, page) + %__MODULE__{root: hash, store: store} + divc > @tree_arity and nblk <= divc -> + sub_divc = div(divc, @tree_arity) + n_sub_minus_one = div(nblk, sub_divc) + {sub_hashes, store} = Enum.reduce(0..n_sub_minus_one, {[], store}, fn i, {sh, store} -> + sub_first = first_blk + i * sub_divc + sub_n = (if i == n_sub_minus_one do rem(nblk, sub_divc) else sub_divc end) + %__MODULE__{root: hash, store: store} = create_file_aux(fh, store, sub_first, sub_n, sub_divc) + {[hash | sh], store} + end) + page = %Page{nblk: nblk, child_nblk: sub_divc, list: Enum.reverse(sub_hashes)} + {hash, store} = Store.put(store, page) + %__MODULE__{root: hash, store: store} + end + end + + @doc""" + Get the number of blocks in the tree + """ + def block_count(mt) do + %Page{nblk: nblk} = Store.get(mt.store, mt.root) + nblk + end + + @doc""" + Get the hash of block number i + """ + def get(mt, i) do + %Page{child_nblk: cn, list: list} = Store.get(mt.store, mt.root) + if cn == 1 do + Enum.fetch!(list, i) + else + pos = div(i, cn) + subtree = %{mt | root: Enum.fetch!(list, pos)} + subpos = rem(i, cn) + get(subtree, subpos) + end + end + + @doc""" + Get the hashes of all blocks in a range + """ + def get_range(mt, range) do + range |> Enum.map(&(get(mt, &1))) # TODO: do this efficiently + end +end |