diff options
author | Alex Auvolat <alex@adnab.me> | 2020-04-22 19:25:15 +0000 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2020-04-22 19:25:15 +0000 |
commit | 231cb32955080557b05c7dde7d7adee664457e0e (patch) | |
tree | 612030d7dbfff10376033be13232cda9e7fc1113 | |
parent | 8971f34c816c42a0eb2bbcead5ac7f05854ddfb6 (diff) | |
download | garage-231cb32955080557b05c7dde7d7adee664457e0e.tar.gz garage-231cb32955080557b05c7dde7d7adee664457e0e.zip |
Do not delete block if just a single replication error. Write TODO stuff.
-rw-r--r-- | TODO | 34 | ||||
-rw-r--r-- | src/block.rs | 20 |
2 files changed, 36 insertions, 18 deletions
@@ -1,18 +1,36 @@ -Replication ------------ - -Finish the thing that sends blocks to other nodes if needed before deleting them locally. +Testing +------- How are we going to test that our replication method works correctly? We will have to introduce lots of dummy data and then add/remove nodes many times. -Repair: -- re-propagate block ref table to rc + +Improvements +------------ + +Membership: keep IP addresses of failed nodes and try to reping them regularly + +RPC client/server: do not go through the serialization+HTTP+TLS+deserialization when doing a request to ourself. + +RPC requests: unify quorum + timeout in a "RequestStrategy" class, +and add to the request strategy whether or not the request should continue in the background +once `quorum` valid responses have been received + + +Attaining S3 compatibility +-------------------------- + +- table for access keys +- S3 request signature verification +- api_server following the S3 semantics for get/put/delete +- implement object listing +- possibly other necessary endpoints ? -To do list ----------- +Lower priority +-------------- - less a priority: hinted handoff +- repair: re-propagate block ref table to rc - FIXME in rpc_server when garage shuts down and futures can be interrupted (tokio::spawn should be replaced by a new function background::spawn_joinable) diff --git a/src/block.rs b/src/block.rs index 46abcf02..6c785f89 100644 --- a/src/block.rs +++ b/src/block.rs @@ -278,25 +278,25 @@ impl BlockManager { let who_needs = join_all(who_needs_fut).await; let mut need_nodes = vec![]; - let mut errors = 0; for (node, needed) in who.into_iter().zip(who_needs.iter()) { match needed { Ok(Message::NeedBlockReply(true)) => { need_nodes.push(node); } - Err(_) => { - errors += 1; + Err(e) => { + return Err(Error::Message(format!( + "Should delete block, but unable to confirm that all other nodes that need it have it: {}", + e + ))); + } + _ => { + return Err(Error::Message(format!( + "Unexpected response to NeedBlockQuery RPC" + ))); } - _ => (), } } - if errors > (garage.system.config.data_replication_factor - 1) / 2 { - return Err(Error::Message(format!( - "Should delete block, but not enough nodes confirm that they have it." - ))); - } - if need_nodes.len() > 0 { let put_block_message = self.read_block(hash).await?; let put_responses = self |