aboutsummaryrefslogtreecommitdiff
path: root/shard/lib/data
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2018-11-02 16:26:59 +0100
committerAlex Auvolat <alex@adnab.me>2018-11-02 16:26:59 +0100
commita26dd9284352000cca6338b68c03594dcd900494 (patch)
treeb51c1a9ba734d0fba9a7d4a97df4ddca85dafbca /shard/lib/data
parent353769402b6fd2ca4ea1807c2733e161a768f85e (diff)
downloadshard-a26dd9284352000cca6338b68c03594dcd900494.tar.gz
shard-a26dd9284352000cca6338b68c03594dcd900494.zip
WIP for file upload (Merkle tree for signatures)
Diffstat (limited to 'shard/lib/data')
-rw-r--r--shard/lib/data/data.ex6
-rw-r--r--shard/lib/data/merkletree.ex93
2 files changed, 99 insertions, 0 deletions
diff --git a/shard/lib/data/data.ex b/shard/lib/data/data.ex
index 78c73cd..33dca09 100644
--- a/shard/lib/data/data.ex
+++ b/shard/lib/data/data.ex
@@ -26,6 +26,12 @@ defmodule SData do
:crypto.hash(algo, bin)
end
+ def file_hash(path, algo \\ :sha256) do
+ File.stream!(path, [], 65536)
+ |> Enum.reduce(:crypto.hash_init(algo), &(:crypto.hash_update(&2, &1)))
+ |> :crypto.hash_final()
+ end
+
def term_unbin(bin) do
:erlang.binary_to_term(bin, [:safe])
end
diff --git a/shard/lib/data/merkletree.ex b/shard/lib/data/merkletree.ex
new file mode 100644
index 0000000..90361a3
--- /dev/null
+++ b/shard/lib/data/merkletree.ex
@@ -0,0 +1,93 @@
+defmodule SData.MerkleTree do
+ @moduledoc"""
+ A Merkle tree structure for storing metadata for a big file.
+ """
+
+ alias SData.PageStore, as: Store
+
+ @block_size 4096
+ @tree_arity 64
+
+ defstruct [:root, :store]
+
+ defmodule Page do
+ defstruct [:nblk, :child_nblk, :list]
+
+ defimpl SData.Page do
+ def refs(page) do
+ if page.child_nblk == 1 do
+ []
+ else
+ page.list
+ end
+ end
+ end
+ end
+
+ @doc"""
+ Create a Merkle tree for indexing a file.
+ """
+ def create(file, store \\ SData.LocalStore.new()) do
+ %File.Stat{size: size} = File.stat!(file)
+ nblk = div(size, @block_size) + (if rem(size, @block_size) == 0 do 0 else 1 end)
+ fh = File.open!(file, [:binary, :read])
+ create_file_aux(fh, store, 0, nblk, @tree_arity)
+ end
+
+ defp create_file_aux(fh, store, first_blk, nblk, divc) do
+ cond do
+ divc < nblk ->
+ create_file_aux(fh, store, first_blk, nblk, divc * @tree_arity)
+ divc == @tree_arity and nblk <= divc ->
+ hashes = for i <- first_blk .. (first_blk + nblk - 1) do
+ {:ok, blk} = :file.pread(fh, i * @block_size, @block_size)
+ :crypto.hash(:sha256, blk)
+ end
+ page = %Page{nblk: nblk, child_nblk: 1, list: hashes}
+ {hash, store} = Store.put(store, page)
+ %__MODULE__{root: hash, store: store}
+ divc > @tree_arity and nblk <= divc ->
+ sub_divc = div(divc, @tree_arity)
+ n_sub_minus_one = div(nblk, sub_divc)
+ {sub_hashes, store} = Enum.reduce(0..n_sub_minus_one, {[], store}, fn i, {sh, store} ->
+ sub_first = first_blk + i * sub_divc
+ sub_n = (if i == n_sub_minus_one do rem(nblk, sub_divc) else sub_divc end)
+ %__MODULE__{root: hash, store: store} = create_file_aux(fh, store, sub_first, sub_n, sub_divc)
+ {[hash | sh], store}
+ end)
+ page = %Page{nblk: nblk, child_nblk: sub_divc, list: Enum.reverse(sub_hashes)}
+ {hash, store} = Store.put(store, page)
+ %__MODULE__{root: hash, store: store}
+ end
+ end
+
+ @doc"""
+ Get the number of blocks in the tree
+ """
+ def block_count(mt) do
+ %Page{nblk: nblk} = Store.get(mt.store, mt.root)
+ nblk
+ end
+
+ @doc"""
+ Get the hash of block number i
+ """
+ def get(mt, i) do
+ %Page{child_nblk: cn, list: list} = Store.get(mt.store, mt.root)
+ if cn == 1 do
+ Enum.fetch!(list, i)
+ else
+ pos = div(i, cn)
+ subtree = %{mt | root: Enum.fetch!(list, pos)}
+ subpos = rem(i, cn)
+ get(subtree, subpos)
+ end
+ end
+
+ @doc"""
+ Get the hashes of all blocks in a range
+ """
+ def get_range(mt, range) do
+ range |> Enum.map(&(get(mt, &1))) # TODO: do this efficiently
+ end
+end