aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2020-04-22 19:25:15 +0000
committerAlex Auvolat <alex@adnab.me>2020-04-22 19:25:15 +0000
commit231cb32955080557b05c7dde7d7adee664457e0e (patch)
tree612030d7dbfff10376033be13232cda9e7fc1113
parent8971f34c816c42a0eb2bbcead5ac7f05854ddfb6 (diff)
downloadgarage-231cb32955080557b05c7dde7d7adee664457e0e.tar.gz
garage-231cb32955080557b05c7dde7d7adee664457e0e.zip
Do not delete block if just a single replication error. Write TODO stuff.
-rw-r--r--TODO34
-rw-r--r--src/block.rs20
2 files changed, 36 insertions, 18 deletions
diff --git a/TODO b/TODO
index 1b5f466d..a8ac6a49 100644
--- a/TODO
+++ b/TODO
@@ -1,18 +1,36 @@
-Replication
------------
-
-Finish the thing that sends blocks to other nodes if needed before deleting them locally.
+Testing
+-------
How are we going to test that our replication method works correctly?
We will have to introduce lots of dummy data and then add/remove nodes many times.
-Repair:
-- re-propagate block ref table to rc
+
+Improvements
+------------
+
+Membership: keep IP addresses of failed nodes and try to reping them regularly
+
+RPC client/server: do not go through the serialization+HTTP+TLS+deserialization when doing a request to ourself.
+
+RPC requests: unify quorum + timeout in a "RequestStrategy" class,
+and add to the request strategy whether or not the request should continue in the background
+once `quorum` valid responses have been received
+
+
+Attaining S3 compatibility
+--------------------------
+
+- table for access keys
+- S3 request signature verification
+- api_server following the S3 semantics for get/put/delete
+- implement object listing
+- possibly other necessary endpoints ?
-To do list
-----------
+Lower priority
+--------------
- less a priority: hinted handoff
+- repair: re-propagate block ref table to rc
- FIXME in rpc_server when garage shuts down and futures can be interrupted
(tokio::spawn should be replaced by a new function background::spawn_joinable)
diff --git a/src/block.rs b/src/block.rs
index 46abcf02..6c785f89 100644
--- a/src/block.rs
+++ b/src/block.rs
@@ -278,25 +278,25 @@ impl BlockManager {
let who_needs = join_all(who_needs_fut).await;
let mut need_nodes = vec![];
- let mut errors = 0;
for (node, needed) in who.into_iter().zip(who_needs.iter()) {
match needed {
Ok(Message::NeedBlockReply(true)) => {
need_nodes.push(node);
}
- Err(_) => {
- errors += 1;
+ Err(e) => {
+ return Err(Error::Message(format!(
+ "Should delete block, but unable to confirm that all other nodes that need it have it: {}",
+ e
+ )));
+ }
+ _ => {
+ return Err(Error::Message(format!(
+ "Unexpected response to NeedBlockQuery RPC"
+ )));
}
- _ => (),
}
}
- if errors > (garage.system.config.data_replication_factor - 1) / 2 {
- return Err(Error::Message(format!(
- "Should delete block, but not enough nodes confirm that they have it."
- )));
- }
-
if need_nodes.len() > 0 {
let put_block_message = self.read_block(hash).await?;
let put_responses = self