aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2022-09-01 16:30:44 +0200
committerAlex Auvolat <alex@adnab.me>2022-09-01 16:30:44 +0200
commitdf094bd8075332bb765b8b44c9b19cf2485e9ca8 (patch)
tree3cc838ad263c10960903b8b865e356d14eef9f60 /src
parentf3bf34b6a18c547c5fb29346787648048c093d52 (diff)
downloadgarage-df094bd8075332bb765b8b44c9b19cf2485e9ca8.tar.gz
garage-df094bd8075332bb765b8b44c9b19cf2485e9ca8.zip
Less strict timeouts
Diffstat (limited to 'src')
-rw-r--r--src/block/manager.rs8
-rw-r--r--src/rpc/rpc_helper.rs2
-rw-r--r--src/rpc/system.rs6
-rw-r--r--src/table/gc.rs3
-rw-r--r--src/table/sync.rs3
-rw-r--r--src/table/table.rs2
6 files changed, 15 insertions, 9 deletions
diff --git a/src/block/manager.rs b/src/block/manager.rs
index b9f6fc0f..00438648 100644
--- a/src/block/manager.rs
+++ b/src/block/manager.rs
@@ -48,10 +48,14 @@ use crate::repair::*;
pub const INLINE_THRESHOLD: usize = 3072;
// Timeout for RPCs that read and write blocks to remote nodes
-const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(30);
+const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(60);
// Timeout for RPCs that ask other nodes whether they need a copy
// of a given block before we delete it locally
-const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(5);
+// The timeout here is relatively low because we don't want to block
+// the entire resync loop when some nodes are not responding.
+// Nothing will be deleted if the nodes don't answer the queries,
+// we will just retry later.
+const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(15);
// The delay between the time where a resync operation fails
// and the time when it is retried, with exponential backoff
diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 6c79c502..e9575261 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -31,7 +31,7 @@ use garage_util::metrics::RecordDuration;
use crate::metrics::RpcMetrics;
use crate::ring::Ring;
-const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
+const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
// Don't allow more than 100 concurrent outgoing RPCs.
const MAX_CONCURRENT_REQUESTS: usize = 100;
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index 5858660e..d7ef2140 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -38,7 +38,7 @@ use crate::rpc_helper::*;
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const STATUS_EXCHANGE_INTERVAL: Duration = Duration::from_secs(10);
-const PING_TIMEOUT: Duration = Duration::from_secs(2);
+const SYSTEM_RPC_TIMEOUT: Duration = Duration::from_secs(15);
/// Version tag used for version check upon Netapp connection
pub const GARAGE_VERSION_TAG: u64 = 0x6761726167650007; // garage 0x0007
@@ -561,7 +561,7 @@ impl System {
.broadcast(
&self.system_endpoint,
SystemRpc::AdvertiseStatus(local_status),
- RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
+ RequestStrategy::with_priority(PRIO_HIGH).with_timeout(SYSTEM_RPC_TIMEOUT),
)
.await;
@@ -685,7 +685,7 @@ impl System {
&self.system_endpoint,
peer,
SystemRpc::PullClusterLayout,
- RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
+ RequestStrategy::with_priority(PRIO_HIGH).with_timeout(SYSTEM_RPC_TIMEOUT),
)
.await;
if let Ok(SystemRpc::AdvertiseClusterLayout(layout)) = resp {
diff --git a/src/table/gc.rs b/src/table/gc.rs
index 12218d97..6cae9701 100644
--- a/src/table/gc.rs
+++ b/src/table/gc.rs
@@ -25,7 +25,8 @@ use crate::replication::*;
use crate::schema::*;
const TABLE_GC_BATCH_SIZE: usize = 1024;
-const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
+// Same timeout as NEED_BLOCK_QUERY_TIMEOUT in block manager
+const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(15);
// GC delay for table entries: 1 day (24 hours)
// (the delay before the entry is added in the GC todo list
diff --git a/src/table/sync.rs b/src/table/sync.rs
index b3756a5e..62b88a58 100644
--- a/src/table/sync.rs
+++ b/src/table/sync.rs
@@ -24,7 +24,8 @@ use crate::merkle::*;
use crate::replication::*;
use crate::*;
-const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
+// Sync RPC can contain a lot of data, so have a 1min timeout
+const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(60);
// Do anti-entropy every 10 minutes
const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60);
diff --git a/src/table/table.rs b/src/table/table.rs
index 3c211728..51f3837f 100644
--- a/src/table/table.rs
+++ b/src/table/table.rs
@@ -31,7 +31,7 @@ use crate::schema::*;
use crate::sync::*;
use crate::util::*;
-pub const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
+pub const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(30);
pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> {
pub system: Arc<System>,