diff options
Diffstat (limited to 'shard/lib')
-rw-r--r-- | shard/lib/app/file.ex | 114 | ||||
-rw-r--r-- | shard/lib/data/data.ex | 6 | ||||
-rw-r--r-- | shard/lib/data/merkletree.ex | 93 | ||||
-rw-r--r-- | shard/lib/net/manager.ex | 2 |
4 files changed, 215 insertions, 0 deletions
diff --git a/shard/lib/app/file.ex b/shard/lib/app/file.ex new file mode 100644 index 0000000..a874222 --- /dev/null +++ b/shard/lib/app/file.ex @@ -0,0 +1,114 @@ +defmodule SApp.File do + @moduledoc""" + Shard application for a file identified by its infohash. The file cannot be modified. + + The infohash is the hash of an info struct containing: + + %Info{ + merkle_root: hash + file_hash: hash + size: int + mime_type: string + } + + The file is cut in blocks of 4kb that are collected in a 64-ary Merkle tree. + """ + + use GenServer + + require Logger + + alias SData.MerkleTree, as: MT + + defmodule Manifest do + @moduledoc""" + Manifest for a file. + The file is identified by the root hash of its Merkle tree and by its mime type. + """ + defstruct [:infohash] + + defimpl Shard.Manifest do + def module(_m), do: SApp.File + def is_valid?(m) do + byte_size(m.infohash) == 32 + end + end + end + + defmodule Info do + @moduledoc""" + A file info struct. + """ + defstruct [:merkle_root, :file_hash, :size, :mime_type] + end + + defmodule State do + defstruct [:infohash, :id, :manifest, :netgroup, :info, :infobin, :store, :missing, :path] + end + + def start_link(manifest) do + GenServer.start_link(__MODULE__, manifest) + end + + def init(manifest) do + %Manifest{infohash: infohash} = manifest + id = SData.term_hash manifest + + Shard.Manager.dispatch_to(id, nil, self()) + {infobin, info} = case Shard.Manager.load_state(id) do + nil -> {nil, nil} + infobin -> {infobin, SData.term_unbin infobin} + end + netgroup = %SNet.PubShardGroup{id: id} + SNet.Group.init_lookup(netgroup, self()) + + path = [Application.get_env(:shard, :data_path), "#{id|>Base.encode16}"] |> Path.join + + {:ok, store} = SApp.PageStore.start_link(id, :meta, netgroup) + + {:ok, %State{ + id: id, infohash: infohash, manifest: manifest, netgroup: netgroup, + infobin: infobin, info: info, store: store, missing: nil, path: path + }} + end + + def handle_cast({:init_with, file_path, infobin, mt}, state) do + info = SData.term_unbin(infobin) + for {k, v} <- mt.store do + {^k, _} = SData.PageStore.put(state.store, v) + end + File.copy!(file_path, state.path) + new_state = %{state | + infobin: infobin, + info: info, + } + {:noreply, new_state} + end + + # TODO networking etc + + # ========= + # INTERFACE + # ========= + + @doc""" + Create a File shard from a file path + """ + def create(path, mime_type) do + %File.Stat{size: size} = File.stat!(path) + mt = MT.create(path) + hash = SData.file_hash(path) + + info = %Info{ + merkle_root: mt.root, + file_hash: hash, + size: size, + mime_type: mime_type, + } + infobin = SData.term_bin(info) + infohash = SData.bin_hash(infobin) + manifest = %Manifest{infohash: infohash} + pid = Shard.Manager.find_or_start(manifest) + GenServer.cast(pid, {:init_with, path, infobin, mt}) + end +end diff --git a/shard/lib/data/data.ex b/shard/lib/data/data.ex index 78c73cd..33dca09 100644 --- a/shard/lib/data/data.ex +++ b/shard/lib/data/data.ex @@ -26,6 +26,12 @@ defmodule SData do :crypto.hash(algo, bin) end + def file_hash(path, algo \\ :sha256) do + File.stream!(path, [], 65536) + |> Enum.reduce(:crypto.hash_init(algo), &(:crypto.hash_update(&2, &1))) + |> :crypto.hash_final() + end + def term_unbin(bin) do :erlang.binary_to_term(bin, [:safe]) end diff --git a/shard/lib/data/merkletree.ex b/shard/lib/data/merkletree.ex new file mode 100644 index 0000000..90361a3 --- /dev/null +++ b/shard/lib/data/merkletree.ex @@ -0,0 +1,93 @@ +defmodule SData.MerkleTree do + @moduledoc""" + A Merkle tree structure for storing metadata for a big file. + """ + + alias SData.PageStore, as: Store + + @block_size 4096 + @tree_arity 64 + + defstruct [:root, :store] + + defmodule Page do + defstruct [:nblk, :child_nblk, :list] + + defimpl SData.Page do + def refs(page) do + if page.child_nblk == 1 do + [] + else + page.list + end + end + end + end + + @doc""" + Create a Merkle tree for indexing a file. + """ + def create(file, store \\ SData.LocalStore.new()) do + %File.Stat{size: size} = File.stat!(file) + nblk = div(size, @block_size) + (if rem(size, @block_size) == 0 do 0 else 1 end) + fh = File.open!(file, [:binary, :read]) + create_file_aux(fh, store, 0, nblk, @tree_arity) + end + + defp create_file_aux(fh, store, first_blk, nblk, divc) do + cond do + divc < nblk -> + create_file_aux(fh, store, first_blk, nblk, divc * @tree_arity) + divc == @tree_arity and nblk <= divc -> + hashes = for i <- first_blk .. (first_blk + nblk - 1) do + {:ok, blk} = :file.pread(fh, i * @block_size, @block_size) + :crypto.hash(:sha256, blk) + end + page = %Page{nblk: nblk, child_nblk: 1, list: hashes} + {hash, store} = Store.put(store, page) + %__MODULE__{root: hash, store: store} + divc > @tree_arity and nblk <= divc -> + sub_divc = div(divc, @tree_arity) + n_sub_minus_one = div(nblk, sub_divc) + {sub_hashes, store} = Enum.reduce(0..n_sub_minus_one, {[], store}, fn i, {sh, store} -> + sub_first = first_blk + i * sub_divc + sub_n = (if i == n_sub_minus_one do rem(nblk, sub_divc) else sub_divc end) + %__MODULE__{root: hash, store: store} = create_file_aux(fh, store, sub_first, sub_n, sub_divc) + {[hash | sh], store} + end) + page = %Page{nblk: nblk, child_nblk: sub_divc, list: Enum.reverse(sub_hashes)} + {hash, store} = Store.put(store, page) + %__MODULE__{root: hash, store: store} + end + end + + @doc""" + Get the number of blocks in the tree + """ + def block_count(mt) do + %Page{nblk: nblk} = Store.get(mt.store, mt.root) + nblk + end + + @doc""" + Get the hash of block number i + """ + def get(mt, i) do + %Page{child_nblk: cn, list: list} = Store.get(mt.store, mt.root) + if cn == 1 do + Enum.fetch!(list, i) + else + pos = div(i, cn) + subtree = %{mt | root: Enum.fetch!(list, pos)} + subpos = rem(i, cn) + get(subtree, subpos) + end + end + + @doc""" + Get the hashes of all blocks in a range + """ + def get_range(mt, range) do + range |> Enum.map(&(get(mt, &1))) # TODO: do this efficiently + end +end diff --git a/shard/lib/net/manager.ex b/shard/lib/net/manager.ex index 759c5f0..fb92f13 100644 --- a/shard/lib/net/manager.ex +++ b/shard/lib/net/manager.ex @@ -90,6 +90,8 @@ defmodule SNet.Manager do @doc""" Connect to a peer specified by ip address and port + + peer_info := {:inet, ip, port} """ def add_peer(peer_info, opts \\ []) do GenServer.call(__MODULE__, {:add_peer, peer_info, opts[:auth], opts[:callback]}) |