diff options
author | Alex Auvolat <alex@adnab.me> | 2021-10-14 11:50:12 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2021-10-19 23:38:38 +0200 |
commit | e6da0dc90098a7c830e14f6f0dce61e8e7132d3a (patch) | |
tree | 2d4375e390435d0552417726e58223a49536775c /src | |
parent | dc017a0cab40cb2f33a01b420bb1b04038abb875 (diff) | |
download | garage-e6da0dc90098a7c830e14f6f0dce61e8e7132d3a.tar.gz garage-e6da0dc90098a7c830e14f6f0dce61e8e7132d3a.zip |
First port of Garage to Netapp
Diffstat (limited to 'src')
-rw-r--r-- | src/garage/Cargo.toml | 4 | ||||
-rw-r--r-- | src/garage/admin_rpc.rs | 80 | ||||
-rw-r--r-- | src/garage/cli.rs | 168 | ||||
-rw-r--r-- | src/garage/main.rs | 71 | ||||
-rw-r--r-- | src/garage/server.rs | 87 | ||||
-rw-r--r-- | src/model/Cargo.toml | 3 | ||||
-rw-r--r-- | src/model/block.rs | 109 | ||||
-rw-r--r-- | src/model/garage.rs | 43 | ||||
-rw-r--r-- | src/rpc/Cargo.toml | 12 | ||||
-rw-r--r-- | src/rpc/lib.rs | 8 | ||||
-rw-r--r-- | src/rpc/membership.rs | 722 | ||||
-rw-r--r-- | src/rpc/ring.rs | 11 | ||||
-rw-r--r-- | src/rpc/rpc_client.rs | 369 | ||||
-rw-r--r-- | src/rpc/rpc_helper.rs | 206 | ||||
-rw-r--r-- | src/rpc/rpc_server.rs | 247 | ||||
-rw-r--r-- | src/rpc/system.rs | 363 | ||||
-rw-r--r-- | src/rpc/tls_util.rs | 140 | ||||
-rw-r--r-- | src/table/Cargo.toml | 2 | ||||
-rw-r--r-- | src/table/data.rs | 2 | ||||
-rw-r--r-- | src/table/gc.rs | 80 | ||||
-rw-r--r-- | src/table/replication/fullcopy.rs | 13 | ||||
-rw-r--r-- | src/table/replication/parameters.rs | 6 | ||||
-rw-r--r-- | src/table/replication/sharded.rs | 7 | ||||
-rw-r--r-- | src/table/sync.rs | 97 | ||||
-rw-r--r-- | src/table/table.rs | 110 | ||||
-rw-r--r-- | src/util/Cargo.toml | 3 | ||||
-rw-r--r-- | src/util/config.rs | 63 | ||||
-rw-r--r-- | src/util/error.rs | 11 |
28 files changed, 1082 insertions, 1955 deletions
diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 09ed3e1e..032d1cf6 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -27,6 +27,8 @@ hex = "0.4" log = "0.4" pretty_env_logger = "0.4" rand = "0.8" +async-trait = "0.1.7" +sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" } sled = "0.34" @@ -38,3 +40,5 @@ toml = "0.5" futures = "0.3" futures-util = "0.3" tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } + +netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" } diff --git a/src/garage/admin_rpc.rs b/src/garage/admin_rpc.rs index fe5a9d88..b9e57c40 100644 --- a/src/garage/admin_rpc.rs +++ b/src/garage/admin_rpc.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::fmt::Write; use std::sync::Arc; +use async_trait::async_trait; use serde::{Deserialize, Serialize}; use garage_util::error::Error; @@ -10,8 +11,7 @@ use garage_table::crdt::Crdt; use garage_table::replication::*; use garage_table::*; -use garage_rpc::rpc_client::*; -use garage_rpc::rpc_server::*; +use garage_rpc::*; use garage_model::bucket_table::*; use garage_model::garage::Garage; @@ -19,10 +19,8 @@ use garage_model::key_table::*; use crate::cli::*; use crate::repair::Repair; -use crate::*; -pub const ADMIN_RPC_TIMEOUT: Duration = Duration::from_secs(30); -pub const ADMIN_RPC_PATH: &str = "_admin"; +pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc"; #[derive(Debug, Serialize, Deserialize)] pub enum AdminRpc { @@ -33,41 +31,31 @@ pub enum AdminRpc { // Replies Ok(String), + Error(String), BucketList(Vec<String>), BucketInfo(Bucket), KeyList(Vec<(String, String)>), KeyInfo(Key), } -impl RpcMessage for AdminRpc {} +impl Message for AdminRpc { + type Response = AdminRpc; +} pub struct AdminRpcHandler { garage: Arc<Garage>, - rpc_client: Arc<RpcClient<AdminRpc>>, + endpoint: Arc<Endpoint<AdminRpc, Self>>, } impl AdminRpcHandler { pub fn new(garage: Arc<Garage>) -> Arc<Self> { - let rpc_client = garage.system.clone().rpc_client::<AdminRpc>(ADMIN_RPC_PATH); - Arc::new(Self { garage, rpc_client }) - } - - pub fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer) { - rpc_server.add_handler::<AdminRpc, _, _>(ADMIN_RPC_PATH.to_string(), move |msg, _addr| { - let self2 = self.clone(); - async move { - match msg { - AdminRpc::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await, - AdminRpc::KeyOperation(ko) => self2.handle_key_cmd(ko).await, - AdminRpc::LaunchRepair(opt) => self2.handle_launch_repair(opt).await, - AdminRpc::Stats(opt) => self2.handle_stats(opt).await, - _ => Err(Error::BadRpc("Invalid RPC".to_string())), - } - } - }); + let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into()); + let admin = Arc::new(Self { garage, endpoint }); + admin.endpoint.set_handler(admin.clone()); + admin } - async fn handle_bucket_cmd(&self, cmd: BucketOperation) -> Result<AdminRpc, Error> { + async fn handle_bucket_cmd(&self, cmd: &BucketOperation) -> Result<AdminRpc, Error> { match cmd { BucketOperation::List => { let bucket_names = self @@ -187,7 +175,7 @@ impl AdminRpcHandler { } } - async fn handle_key_cmd(&self, cmd: KeyOperation) -> Result<AdminRpc, Error> { + async fn handle_key_cmd(&self, cmd: &KeyOperation) -> Result<AdminRpc, Error> { match cmd { KeyOperation::List => { let key_ids = self @@ -210,13 +198,13 @@ impl AdminRpcHandler { Ok(AdminRpc::KeyInfo(key)) } KeyOperation::New(query) => { - let key = Key::new(query.name); + let key = Key::new(query.name.clone()); self.garage.key_table.insert(&key).await?; Ok(AdminRpc::KeyInfo(key)) } KeyOperation::Rename(query) => { let mut key = self.get_existing_key(&query.key_pattern).await?; - key.name.update(query.new_name); + key.name.update(query.new_name.clone()); self.garage.key_table.insert(&key).await?; Ok(AdminRpc::KeyInfo(key)) } @@ -353,17 +341,18 @@ impl AdminRpcHandler { let mut failures = vec![]; let ring = self.garage.system.ring.borrow().clone(); for node in ring.config.members.keys() { + let node = NodeID::from_slice(node.as_slice()).unwrap(); if self - .rpc_client + .endpoint .call( - *node, - AdminRpc::LaunchRepair(opt_to_send.clone()), - ADMIN_RPC_TIMEOUT, + &node, + &AdminRpc::LaunchRepair(opt_to_send.clone()), + PRIO_NORMAL, ) .await .is_err() { - failures.push(*node); + failures.push(node); } } if failures.is_empty() { @@ -397,14 +386,16 @@ impl AdminRpcHandler { let ring = self.garage.system.ring.borrow().clone(); for node in ring.config.members.keys() { + let node = NodeID::from_slice(node.as_slice()).unwrap(); + let mut opt = opt.clone(); opt.all_nodes = false; writeln!(&mut ret, "\n======================").unwrap(); writeln!(&mut ret, "Stats for node {:?}:", node).unwrap(); match self - .rpc_client - .call(*node, AdminRpc::Stats(opt), ADMIN_RPC_TIMEOUT) + .endpoint + .call(&node, &AdminRpc::Stats(opt), PRIO_NORMAL) .await { Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(), @@ -495,4 +486,23 @@ impl AdminRpcHandler { .unwrap(); writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap(); } + + async fn handle_rpc(self: &Arc<Self>, msg: &AdminRpc) -> Result<AdminRpc, Error> { + match msg { + AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await, + AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await, + AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await, + AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await, + _ => Err(Error::BadRpc("Invalid RPC".to_string())), + } + } +} + +#[async_trait] +impl EndpointHandler<AdminRpc> for AdminRpcHandler { + async fn handle(self: &Arc<Self>, message: &AdminRpc, _from: NodeID) -> AdminRpc { + self.handle_rpc(message) + .await + .unwrap_or_else(|e| AdminRpc::Error(format!("{}", e))) + } } diff --git a/src/garage/cli.rs b/src/garage/cli.rs index f9e67fac..91ec5ab2 100644 --- a/src/garage/cli.rs +++ b/src/garage/cli.rs @@ -1,6 +1,5 @@ -use std::cmp::max; -use std::collections::HashSet; -use std::net::SocketAddr; +//use std::cmp::max; +//use std::collections::HashSet; use std::path::PathBuf; use serde::{Deserialize, Serialize}; @@ -8,11 +7,11 @@ use structopt::StructOpt; use garage_util::data::Uuid; use garage_util::error::Error; -use garage_util::time::*; +//use garage_util::time::*; -use garage_rpc::membership::*; use garage_rpc::ring::*; -use garage_rpc::rpc_client::*; +use garage_rpc::system::*; +use garage_rpc::*; use garage_model::bucket_table::*; use garage_model::key_table::*; @@ -298,54 +297,65 @@ pub struct StatsOpt { pub async fn cli_cmd( cmd: Command, - membership_rpc_cli: RpcAddrClient<Message>, - admin_rpc_cli: RpcAddrClient<AdminRpc>, - rpc_host: SocketAddr, + system_rpc_endpoint: &Endpoint<SystemRpc, ()>, + admin_rpc_endpoint: &Endpoint<AdminRpc, ()>, + rpc_host: NodeID, ) -> Result<(), Error> { match cmd { - Command::Status => cmd_status(membership_rpc_cli, rpc_host).await, + Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await, Command::Node(NodeOperation::Configure(configure_opt)) => { - cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await + cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await } Command::Node(NodeOperation::Remove(remove_opt)) => { - cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await + cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await } Command::Bucket(bo) => { - cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::BucketOperation(bo)).await + cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await } - Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::KeyOperation(ko)).await, - Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::LaunchRepair(ro)).await, - Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::Stats(so)).await, + Command::Key(ko) => { + cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await + } + Command::Repair(ro) => { + cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await + } + Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await, _ => unreachable!(), } } -pub async fn cmd_status( - rpc_cli: RpcAddrClient<Message>, - rpc_host: SocketAddr, -) -> Result<(), Error> { +pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> { let status = match rpc_cli - .call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) - .await?? + .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL) + .await? { - Message::AdvertiseNodesUp(nodes) => nodes, + SystemRpc::ReturnKnownNodes(nodes) => nodes, resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), }; let config = match rpc_cli - .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) - .await?? + .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL) + .await? { - Message::AdvertiseConfig(cfg) => cfg, + SystemRpc::AdvertiseConfig(cfg) => cfg, resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), }; + println!("STATUS:"); + for node in status { + println!("{:?}", node); + } + println!("CONFIG: (v{})", config.version); + for (id, node) in config.members { + println!("{} {:?}", hex::encode(id.as_slice()), node); + } + + /* TODO let (hostname_len, addr_len, tag_len, zone_len) = status .iter() - .map(|adv| (adv, config.members.get(&adv.id))) - .map(|(adv, cfg)| { + .map(|(id, addr, _)| (addr, config.members.get(&adv.id))) + .map(|(addr, cfg)| { ( - adv.state_info.hostname.len(), - adv.addr.to_string().len(), + 8, + addr.to_string().len(), cfg.map(|c| c.tag.len()).unwrap_or(0), cfg.map(|c| c.zone.len()).unwrap_or(0), ) @@ -355,13 +365,13 @@ pub async fn cmd_status( }); println!("Healthy nodes:"); - for adv in status.iter().filter(|x| x.is_up) { + for (id, addr, _) in status.iter().filter(|(id, addr, is_up)| is_up) { if let Some(cfg) = config.members.get(&adv.id) { println!( "{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}", - id = adv.id, - host = adv.state_info.hostname, - addr = adv.addr, + id = id, + host = "", + addr = addr, tag = cfg.tag, zone = cfg.zone, capacity = cfg.capacity_string(), @@ -373,36 +383,36 @@ pub async fn cmd_status( } else { println!( "{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED", - id = adv.id, - h = adv.state_info.hostname, - addr = adv.addr, - h_pad = " ".repeat(hostname_len - adv.state_info.hostname.len()), - a_pad = " ".repeat(addr_len - adv.addr.to_string().len()), + id = id, + h = "", + addr = addr, + h_pad = " ".repeat(hostname_len - "".len()), + a_pad = " ".repeat(addr_len - addr.to_string().len()), ); } } - let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>(); - let failure_case_1 = status.iter().any(|x| !x.is_up); + let status_keys = status.iter().map(|(id, _, _)| id).collect::<HashSet<_>>(); + let failure_case_1 = status.iter().any(|(_, _, is_up)| !is_up); let failure_case_2 = config .members .iter() .any(|(id, _)| !status_keys.contains(id)); if failure_case_1 || failure_case_2 { println!("\nFailed nodes:"); - for adv in status.iter().filter(|x| !x.is_up) { - if let Some(cfg) = config.members.get(&adv.id) { + for (id, addr) in status.iter().filter(|(_, _, is_up)| !is_up) { + if let Some(cfg) = config.members.get(&id) { println!( "{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago", - id=adv.id, - host=adv.state_info.hostname, - addr=adv.addr, + id=id, + host="", + addr=addr, tag=cfg.tag, zone=cfg.zone, capacity=cfg.capacity_string(), - last_seen=(now_msec() - adv.last_seen) / 1000, - h_pad=" ".repeat(hostname_len - adv.state_info.hostname.len()), - a_pad=" ".repeat(addr_len - adv.addr.to_string().len()), + last_seen=(now_msec() - 0) / 1000, + h_pad=" ".repeat(hostname_len - "".len()), + a_pad=" ".repeat(addr_len - addr.to_string().len()), t_pad=" ".repeat(tag_len - cfg.tag.len()), z_pad=" ".repeat(zone_len - cfg.zone.len()), ); @@ -411,12 +421,12 @@ pub async fn cmd_status( let (tag_len, zone_len) = config .members .iter() - .filter(|(&id, _)| !status.iter().any(|x| x.id == id)) + .filter(|(&id, _)| !status.iter().any(|(xid, _, _)| xid == id)) .map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len())) .fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz))); for (id, cfg) in config.members.iter() { - if !status.iter().any(|x| x.id == *id) { + if !status.iter().any(|(xid, _, _)| xid == *id) { println!( "{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen", id = id, @@ -429,6 +439,7 @@ pub async fn cmd_status( } } } + */ Ok(()) } @@ -455,25 +466,30 @@ pub fn find_matching_node( } pub async fn cmd_configure( - rpc_cli: RpcAddrClient<Message>, - rpc_host: SocketAddr, + rpc_cli: &Endpoint<SystemRpc, ()>, + rpc_host: NodeID, args: ConfigureNodeOpt, ) -> Result<(), Error> { let status = match rpc_cli - .call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) - .await?? + .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL) + .await? { - Message::AdvertiseNodesUp(nodes) => nodes, + SystemRpc::ReturnKnownNodes(nodes) => nodes, resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), }; - let added_node = find_matching_node(status.iter().map(|x| x.id), &args.node_id)?; + let added_node = find_matching_node( + status + .iter() + .map(|(id, _, _)| Uuid::try_from(id.as_ref()).unwrap()), + &args.node_id, + )?; let mut config = match rpc_cli - .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) - .await?? + .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL) + .await? { - Message::AdvertiseConfig(cfg) => cfg, + SystemRpc::AdvertiseConfig(cfg) => cfg, resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), }; @@ -527,25 +543,21 @@ pub async fn cmd_configure( config.version += 1; rpc_cli - .call( - &rpc_host, - &Message::AdvertiseConfig(config), - ADMIN_RPC_TIMEOUT, - ) - .await??; + .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL) + .await?; Ok(()) } pub async fn cmd_remove( - rpc_cli: RpcAddrClient<Message>, - rpc_host: SocketAddr, + rpc_cli: &Endpoint<SystemRpc, ()>, + rpc_host: NodeID, args: RemoveNodeOpt, ) -> Result<(), Error> { let mut config = match rpc_cli - .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) - .await?? + .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL) + .await? { - Message::AdvertiseConfig(cfg) => cfg, + SystemRpc::AdvertiseConfig(cfg) => cfg, resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), }; @@ -562,21 +574,17 @@ pub async fn cmd_remove( config.version += 1; rpc_cli - .call( - &rpc_host, - &Message::AdvertiseConfig(config), - ADMIN_RPC_TIMEOUT, - ) - .await??; + .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL) + .await?; Ok(()) } pub async fn cmd_admin( - rpc_cli: RpcAddrClient<AdminRpc>, - rpc_host: SocketAddr, + rpc_cli: &Endpoint<AdminRpc, ()>, + rpc_host: NodeID, args: AdminRpc, ) -> Result<(), Error> { - match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? { + match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await? { AdminRpc::Ok(msg) => { println!("{}", msg); } diff --git a/src/garage/main.rs b/src/garage/main.rs index 66828cba..7fe791b8 100644 --- a/src/garage/main.rs +++ b/src/garage/main.rs @@ -10,16 +10,16 @@ mod repair; mod server; use std::net::SocketAddr; -use std::sync::Arc; -use std::time::Duration; use structopt::StructOpt; -use garage_util::config::TlsConfig; +use netapp::util::parse_peer_addr; +use netapp::NetworkKey; + use garage_util::error::Error; -use garage_rpc::membership::*; -use garage_rpc::rpc_client::*; +use garage_rpc::system::*; +use garage_rpc::*; use admin_rpc::*; use cli::*; @@ -27,16 +27,14 @@ use cli::*; #[derive(StructOpt, Debug)] #[structopt(name = "garage")] struct Opt { - /// RPC connect to this host to execute client operations - #[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901", parse(try_from_str = parse_address))] - pub rpc_host: SocketAddr, + /// Host to connect to for admin operations, in the format: + /// <public-key>@<ip>:<port> + #[structopt(short = "h", long = "rpc-host")] + pub rpc_host: Option<String>, - #[structopt(long = "ca-cert")] - pub ca_cert: Option<String>, - #[structopt(long = "client-cert")] - pub client_cert: Option<String>, - #[structopt(long = "client-key")] - pub client_key: Option<String>, + /// RPC secret network key for admin operations + #[structopt(short = "s", long = "rpc-secret")] + pub rpc_secret: Option<String>, #[structopt(subcommand)] cmd: Command, @@ -66,33 +64,20 @@ async fn main() { } async fn cli_command(opt: Opt) -> Result<(), Error> { - let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) { - (Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig { - ca_cert, - node_cert: client_cert, - node_key: client_key, - }), - (None, None, None) => None, - _ => { - warn!("Missing one of: --ca-cert, --node-cert, --node-key. Not using TLS."); - None - } - }; - - let rpc_http_cli = - Arc::new(RpcHttpClient::new(8, &tls_config).expect("Could not create RPC client")); - let membership_rpc_cli = - RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string()); - let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string()); - - cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await -} - -fn parse_address(address: &str) -> Result<SocketAddr, String> { - use std::net::ToSocketAddrs; - address - .to_socket_addrs() - .map_err(|_| format!("Could not resolve {}", address))? - .next() - .ok_or_else(|| format!("Could not resolve {}", address)) + let net_key_hex_str = &opt.rpc_secret.expect("No RPC secret provided"); + let network_key = NetworkKey::from_slice( + &hex::decode(net_key_hex_str).expect("Invalid RPC secret key (bad hex)")[..], + ) + .expect("Invalid RPC secret provided (wrong length)"); + let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair(); + + let netapp = NetApp::new(network_key, sk); + let (id, addr) = + parse_peer_addr(&opt.rpc_host.expect("No RPC host provided")).expect("Invalid RPC host"); + netapp.clone().try_connect(addr, id).await?; + + let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into()); + let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into()); + + cli_cmd(opt.cmd, &system_rpc_endpoint, &admin_rpc_endpoint, id).await } diff --git a/src/garage/server.rs b/src/garage/server.rs index 36f7de5c..0edf3e2d 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -1,7 +1,5 @@ use std::path::PathBuf; -use std::sync::Arc; -use futures_util::future::*; use tokio::sync::watch; use garage_util::background::*; @@ -10,21 +8,10 @@ use garage_util::error::Error; use garage_api::run_api_server; use garage_model::garage::Garage; -use garage_rpc::rpc_server::RpcServer; use garage_web::run_web_server; use crate::admin_rpc::*; -async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error> { - // Wait for the CTRL+C signal - tokio::signal::ctrl_c() - .await - .expect("failed to install CTRL+C signal handler"); - info!("Received CTRL+C, shutting down."); - send_cancel.send(true)?; - Ok(()) -} - async fn wait_from(mut chan: watch::Receiver<bool>) { while !*chan.borrow() { if chan.changed().await.is_err() { @@ -47,52 +34,46 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { .open() .expect("Unable to open sled DB"); - info!("Initialize RPC server..."); - let mut rpc_server = RpcServer::new(config.rpc_bind_addr, config.rpc_tls.clone()); - info!("Initializing background runner..."); - let (send_cancel, watch_cancel) = watch::channel(false); + let watch_cancel = netapp::util::watch_ctrl_c(); let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone()); info!("Initializing Garage main data store..."); - let garage = Garage::new(config.clone(), db, background, &mut rpc_server); - let bootstrap = garage.system.clone().bootstrap( - config.bootstrap_peers, - config.consul_host, - config.consul_service_name, - ); + let garage = Garage::new(config.clone(), db, background); + + let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone())); info!("Crate admin RPC handler..."); - AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server); - - info!("Initializing RPC and API servers..."); - let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone())); - let api_server = run_api_server(garage.clone(), wait_from(watch_cancel.clone())); - let web_server = run_web_server(garage, wait_from(watch_cancel.clone())); - - futures::try_join!( - bootstrap.map(|()| { - info!("Bootstrap done"); - Ok(()) - }), - run_rpc_server.map(|rv| { - info!("RPC server exited"); - rv - }), - api_server.map(|rv| { - info!("API server exited"); - rv - }), - web_server.map(|rv| { - info!("Web server exited"); - rv - }), - await_background_done.map(|rv| { - info!("Background runner exited: {:?}", rv); - Ok(()) - }), - shutdown_signal(send_cancel), - )?; + AdminRpcHandler::new(garage.clone()); + + info!("Initializing API server..."); + let api_server = tokio::spawn(run_api_server( + garage.clone(), + wait_from(watch_cancel.clone()), + )); + + info!("Initializing web server..."); + let web_server = tokio::spawn(run_web_server( + garage.clone(), + wait_from(watch_cancel.clone()), + )); + + // Stuff runs + + // When a cancel signal is sent, stuff stops + if let Err(e) = api_server.await? { + warn!("API server exited with error: {}", e); + } + if let Err(e) = web_server.await? { + warn!("Web server exited with error: {}", e); + } + + // Remove RPC handlers for system to break reference cycles + garage.system.netapp.drop_all_handlers(); + + // Await for last parts to end + run_system.await?; + await_background_done.await?; info!("Cleaning up..."); diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 4d5d7f9d..a9ae5edf 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -17,6 +17,7 @@ garage_rpc = { version = "0.3.0", path = "../rpc" } garage_table = { version = "0.3.0", path = "../table" } garage_util = { version = "0.3.0", path = "../util" } +async-trait = "0.1.7" arc-swap = "1.0" hex = "0.4" log = "0.4" @@ -31,3 +32,5 @@ serde_bytes = "0.11" futures = "0.3" futures-util = "0.3" tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } + +netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" } diff --git a/src/model/block.rs b/src/model/block.rs index 348f0711..5574b7f6 100644 --- a/src/model/block.rs +++ b/src/model/block.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use std::time::Duration; use arc_swap::ArcSwapOption; +use async_trait::async_trait; use futures::future::*; use futures::select; use serde::{Deserialize, Serialize}; @@ -14,9 +15,8 @@ use garage_util::data::*; use garage_util::error::Error; use garage_util::time::*; -use garage_rpc::membership::System; -use garage_rpc::rpc_client::*; -use garage_rpc::rpc_server::*; +use garage_rpc::system::System; +use garage_rpc::*; use garage_table::replication::{TableReplication, TableShardedReplication}; @@ -36,8 +36,9 @@ const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10); /// RPC messages used to share blocks of data between nodes #[derive(Debug, Serialize, Deserialize)] -pub enum Message { +pub enum BlockRpc { Ok, + Error(String), /// Message to ask for a block of data, by hash GetBlock(Hash), /// Message to send a block of data, either because requested, of for first delivery of new @@ -60,7 +61,9 @@ pub struct PutBlockMessage { pub data: Vec<u8>, } -impl RpcMessage for Message {} +impl Message for BlockRpc { + type Response = BlockRpc; +} /// The block manager, handling block exchange between nodes, and block storage on local node pub struct BlockManager { @@ -77,7 +80,7 @@ pub struct BlockManager { resync_notify: Notify, system: Arc<System>, - rpc_client: Arc<RpcClient<Message>>, + endpoint: Arc<Endpoint<BlockRpc, Self>>, pub(crate) garage: ArcSwapOption<Garage>, } @@ -87,7 +90,6 @@ impl BlockManager { data_dir: PathBuf, replication: TableShardedReplication, system: Arc<System>, - rpc_server: &mut RpcServer, ) -> Arc<Self> { let rc = db .open_tree("block_local_rc") @@ -97,8 +99,7 @@ impl BlockManager { .open_tree("block_local_resync_queue") .expect("Unable to open block_local_resync_queue tree"); - let rpc_path = "block_manager"; - let rpc_client = system.rpc_client::<Message>(rpc_path); + let endpoint = system.netapp.endpoint(format!("garage_model/block.rs/Rpc")); let block_manager = Arc::new(Self { replication, @@ -108,35 +109,19 @@ impl BlockManager { resync_queue, resync_notify: Notify::new(), system, - rpc_client, + endpoint, garage: ArcSwapOption::from(None), }); - block_manager - .clone() - .register_handler(rpc_server, rpc_path.into()); - block_manager - } - - fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) { - let self2 = self.clone(); - rpc_server.add_handler::<Message, _, _>(path, move |msg, _addr| { - let self2 = self2.clone(); - async move { self2.handle(&msg).await } - }); + block_manager.endpoint.set_handler(block_manager.clone()); - let self2 = self.clone(); - self.rpc_client - .set_local_handler(self.system.id, move |msg| { - let self2 = self2.clone(); - async move { self2.handle(&msg).await } - }); + block_manager } - async fn handle(self: Arc<Self>, msg: &Message) -> Result<Message, Error> { + async fn handle_rpc(self: Arc<Self>, msg: &BlockRpc) -> Result<BlockRpc, Error> { match msg { - Message::PutBlock(m) => self.write_block(&m.hash, &m.data).await, - Message::GetBlock(h) => self.read_block(h).await, - Message::NeedBlockQuery(h) => self.need_block(h).await.map(Message::NeedBlockReply), + BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await, + BlockRpc::GetBlock(h) => self.read_block(h).await, + BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply), _ => Err(Error::BadRpc("Unexpected RPC message".to_string())), } } @@ -157,7 +142,7 @@ impl BlockManager { } /// Write a block to disk - async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<Message, Error> { + async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<BlockRpc, Error> { let _lock = self.data_dir_lock.lock().await; let mut path = self.block_dir(hash); @@ -165,18 +150,18 @@ impl BlockManager { path.push(hex::encode(hash)); if fs::metadata(&path).await.is_ok() { - return Ok(Message::Ok); + return Ok(BlockRpc::Ok); } let mut f = fs::File::create(path).await?; f.write_all(data).await?; drop(f); - Ok(Message::Ok) + Ok(BlockRpc::Ok) } /// Read block from disk, verifying it's integrity - async fn read_block(&self, hash: &Hash) -> Result<Message, Error> { + async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> { let path = self.block_path(hash); let mut f = match fs::File::open(&path).await { @@ -204,7 +189,7 @@ impl BlockManager { return Err(Error::CorruptData(*hash)); } - Ok(Message::PutBlock(PutBlockMessage { hash: *hash, data })) + Ok(BlockRpc::PutBlock(PutBlockMessage { hash: *hash, data })) } /// Check if this node should have a block, but don't actually have it @@ -346,17 +331,22 @@ impl BlockManager { } who.retain(|id| *id != self.system.id); - let msg = Arc::new(Message::NeedBlockQuery(*hash)); + let msg = Arc::new(BlockRpc::NeedBlockQuery(*hash)); let who_needs_fut = who.iter().map(|to| { - self.rpc_client - .call_arc(*to, msg.clone(), NEED_BLOCK_QUERY_TIMEOUT) + self.system.rpc.call_arc( + &self.endpoint, + *to, + msg.clone(), + RequestStrategy::with_priority(PRIO_NORMAL) + .with_timeout(NEED_BLOCK_QUERY_TIMEOUT), + ) }); let who_needs_resps = join_all(who_needs_fut).await; let mut need_nodes = vec![]; for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) { match needed? { - Message::NeedBlockReply(needed) => { + BlockRpc::NeedBlockReply(needed) => { if needed { need_nodes.push(*node); } @@ -377,11 +367,14 @@ impl BlockManager { ); let put_block_message = self.read_block(hash).await?; - self.rpc_client + self.system + .rpc .try_call_many( + &self.endpoint, &need_nodes[..], put_block_message, - RequestStrategy::with_quorum(need_nodes.len()) + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(need_nodes.len()) .with_timeout(BLOCK_RW_TIMEOUT), ) .await?; @@ -413,18 +406,21 @@ impl BlockManager { pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> { let who = self.replication.read_nodes(&hash); let resps = self - .rpc_client + .system + .rpc .try_call_many( + &self.endpoint, &who[..], - Message::GetBlock(*hash), - RequestStrategy::with_quorum(1) + BlockRpc::GetBlock(*hash), + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(1) .with_timeout(BLOCK_RW_TIMEOUT) .interrupt_after_quorum(true), ) .await?; for resp in resps { - if let Message::PutBlock(msg) = resp { + if let BlockRpc::PutBlock(msg) = resp { return Ok(msg.data); } } @@ -437,11 +433,14 @@ impl BlockManager { /// Send block to nodes that should have it pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> { let who = self.replication.write_nodes(&hash); - self.rpc_client + self.system + .rpc .try_call_many( + &self.endpoint, &who[..], - Message::PutBlock(PutBlockMessage { hash, data }), - RequestStrategy::with_quorum(self.replication.write_quorum()) + BlockRpc::PutBlock(PutBlockMessage { hash, data }), + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(self.replication.write_quorum()) .with_timeout(BLOCK_RW_TIMEOUT), ) .await?; @@ -531,6 +530,16 @@ impl BlockManager { } } +#[async_trait] +impl EndpointHandler<BlockRpc> for BlockManager { + async fn handle(self: &Arc<Self>, message: &BlockRpc, _from: NodeID) -> BlockRpc { + self.clone() + .handle_rpc(message) + .await + .unwrap_or_else(|e| BlockRpc::Error(format!("{}", e))) + } +} + fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 { assert!(bytes.as_ref().len() == 8); let mut x8 = [0u8; 8]; diff --git a/src/model/garage.rs b/src/model/garage.rs index c3594934..d4ea6f55 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -1,11 +1,11 @@ use std::sync::Arc; +use netapp::NetworkKey; + use garage_util::background::*; use garage_util::config::*; -use garage_rpc::membership::System; -use garage_rpc::rpc_client::RpcHttpClient; -use garage_rpc::rpc_server::RpcServer; +use garage_rpc::system::System; use garage_table::replication::ReplicationMode; use garage_table::replication::TableFullReplication; @@ -45,26 +45,25 @@ pub struct Garage { impl Garage { /// Create and run garage - pub fn new( - config: Config, - db: sled::Db, - background: Arc<BackgroundRunner>, - rpc_server: &mut RpcServer, - ) -> Arc<Self> { + pub fn new(config: Config, db: sled::Db, background: Arc<BackgroundRunner>) -> Arc<Self> { + let network_key = NetworkKey::from_slice( + &hex::decode(&config.rpc_secret).expect("Invalid RPC secret key")[..], + ) + .expect("Invalid RPC secret key"); + let replication_mode = ReplicationMode::parse(&config.replication_mode) .expect("Invalid replication_mode in config file."); info!("Initialize membership management system..."); - let rpc_http_client = Arc::new( - RpcHttpClient::new(config.max_concurrent_rpc_requests, &config.rpc_tls) - .expect("Could not create RPC client"), - ); let system = System::new( + network_key, config.metadata_dir.clone(), - rpc_http_client, background.clone(), - rpc_server, replication_mode.replication_factor(), + config.rpc_bind_addr, + config.bootstrap_peers.clone(), + config.consul_host.clone(), + config.consul_service_name.clone(), ); let data_rep_param = TableShardedReplication { @@ -87,13 +86,8 @@ impl Garage { }; info!("Initialize block manager..."); - let block_manager = BlockManager::new( - &db, - config.data_dir.clone(), - data_rep_param, - system.clone(), - rpc_server, - ); + let block_manager = + BlockManager::new(&db, config.data_dir.clone(), data_rep_param, system.clone()); info!("Initialize block_ref_table..."); let block_ref_table = Table::new( @@ -104,7 +98,6 @@ impl Garage { system.clone(), &db, "block_ref".to_string(), - rpc_server, ); info!("Initialize version_table..."); @@ -117,7 +110,6 @@ impl Garage { system.clone(), &db, "version".to_string(), - rpc_server, ); info!("Initialize object_table..."); @@ -130,7 +122,6 @@ impl Garage { system.clone(), &db, "object".to_string(), - rpc_server, ); info!("Initialize bucket_table..."); @@ -140,7 +131,6 @@ impl Garage { system.clone(), &db, "bucket".to_string(), - rpc_server, ); info!("Initialize key_table_table..."); @@ -150,7 +140,6 @@ impl Garage { system.clone(), &db, "key".to_string(), - rpc_server, ); info!("Initialize Garage..."); diff --git a/src/rpc/Cargo.toml b/src/rpc/Cargo.toml index f1204cdf..1100c737 100644 --- a/src/rpc/Cargo.toml +++ b/src/rpc/Cargo.toml @@ -22,7 +22,10 @@ bytes = "1.0" gethostname = "0.2" hex = "0.4" log = "0.4" +rand = "0.8" +sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" } +async-trait = "0.1.7" rmp-serde = "0.15" serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } serde_json = "1.0" @@ -32,11 +35,6 @@ futures-util = "0.3" tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio-stream = { version = "0.1", features = ["net"] } -http = "0.2" -hyper = { version = "0.14", features = ["full"] } -hyper-rustls = { version = "0.22", default-features = false } -rustls = "0.19" -tokio-rustls = "0.22" -webpki = "0.21" - +netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" } +hyper = "0.14" diff --git a/src/rpc/lib.rs b/src/rpc/lib.rs index 96561d0e..ea3f1139 100644 --- a/src/rpc/lib.rs +++ b/src/rpc/lib.rs @@ -4,10 +4,10 @@ extern crate log; mod consul; -pub(crate) mod tls_util; -pub mod membership; pub mod ring; +pub mod system; -pub mod rpc_client; -pub mod rpc_server; +pub mod rpc_helper; + +pub use rpc_helper::*; diff --git a/src/rpc/membership.rs b/src/rpc/membership.rs deleted file mode 100644 index a77eeed3..00000000 --- a/src/rpc/membership.rs +++ /dev/null @@ -1,722 +0,0 @@ -//! Module containing structs related to membership management -use std::collections::HashMap; -use std::fmt::Write as FmtWrite; -use std::io::{Read, Write}; -use std::net::{IpAddr, SocketAddr}; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::time::Duration; - -use futures::future::join_all; -use futures::select; -use futures_util::future::*; -use serde::{Deserialize, Serialize}; -use tokio::sync::watch; -use tokio::sync::Mutex; - -use garage_util::background::BackgroundRunner; -use garage_util::data::*; -use garage_util::error::Error; -use garage_util::persister::Persister; -use garage_util::time::*; - -use crate::consul::get_consul_nodes; -use crate::ring::*; -use crate::rpc_client::*; -use crate::rpc_server::*; - -const PING_INTERVAL: Duration = Duration::from_secs(10); -const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60); -const PING_TIMEOUT: Duration = Duration::from_secs(2); -const MAX_FAILURES_BEFORE_CONSIDERED_DOWN: usize = 5; - -/// RPC endpoint used for calls related to membership -pub const MEMBERSHIP_RPC_PATH: &str = "_membership"; - -/// RPC messages related to membership -#[derive(Debug, Serialize, Deserialize)] -pub enum Message { - /// Response to successfull advertisements - Ok, - /// Message sent to detect other nodes status - Ping(PingMessage), - /// Ask other node for the nodes it knows. Answered with AdvertiseNodesUp - PullStatus, - /// Ask other node its config. Answered with AdvertiseConfig - PullConfig, - /// Advertisement of nodes the host knows up. Sent spontanously or in response to PullStatus - AdvertiseNodesUp(Vec<AdvertisedNode>), - /// Advertisement of nodes config. Sent spontanously or in response to PullConfig - AdvertiseConfig(NetworkConfig), -} - -impl RpcMessage for Message {} - -/// A ping, containing informations about status and config -#[derive(Debug, Serialize, Deserialize)] -pub struct PingMessage { - id: Uuid, - rpc_port: u16, - - status_hash: Hash, - config_version: u64, - - state_info: StateInfo, -} - -/// A node advertisement -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct AdvertisedNode { - /// Id of the node this advertisement relates to - pub id: Uuid, - /// IP and port of the node - pub addr: SocketAddr, - - /// Is the node considered up - pub is_up: bool, - /// When was the node last seen up, in milliseconds since UNIX epoch - pub last_seen: u64, - - pub state_info: StateInfo, -} - -/// This node's membership manager -pub struct System { - /// The id of this node - pub id: Uuid, - - persist_config: Persister<NetworkConfig>, - persist_status: Persister<Vec<AdvertisedNode>>, - rpc_local_port: u16, - - state_info: StateInfo, - - rpc_http_client: Arc<RpcHttpClient>, - rpc_client: Arc<RpcClient<Message>>, - - replication_factor: usize, - pub(crate) status: watch::Receiver<Arc<Status>>, - /// The ring - pub ring: watch::Receiver<Arc<Ring>>, - - update_lock: Mutex<Updaters>, - - /// The job runner of this node - pub background: Arc<BackgroundRunner>, -} - -struct Updaters { - update_status: watch::Sender<Arc<Status>>, - update_ring: watch::Sender<Arc<Ring>>, -} - -/// The status of each nodes, viewed by this node -#[derive(Debug, Clone)] -pub struct Status { - /// Mapping of each node id to its known status - pub nodes: HashMap<Uuid, Arc<StatusEntry>>, - /// Hash of `nodes`, used to detect when nodes have different views of the cluster - pub hash: Hash, -} - -/// The status of a single node -#[derive(Debug)] -pub struct StatusEntry { - /// The IP and port used to connect to this node - pub addr: SocketAddr, - /// Last time this node was seen - pub last_seen: u64, - /// Number of consecutive pings sent without reply to this node - pub num_failures: AtomicUsize, - pub state_info: StateInfo, -} - -impl StatusEntry { - /// is the node associated to this entry considered up - pub fn is_up(&self) -> bool { - self.num_failures.load(Ordering::SeqCst) < MAX_FAILURES_BEFORE_CONSIDERED_DOWN - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct StateInfo { - /// Hostname of the node - pub hostname: String, - /// Replication factor configured on the node - pub replication_factor: Option<usize>, // TODO Option is just for retrocompatibility. It should become a simple usize at some point -} - -impl Status { - fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool { - let addr = SocketAddr::new(ip, info.rpc_port); - let old_status = self.nodes.insert( - info.id, - Arc::new(StatusEntry { - addr, - last_seen: now_msec(), - num_failures: AtomicUsize::from(0), - state_info: info.state_info.clone(), - }), - ); - match old_status { - None => { - info!("Newly pingable node: {}", hex::encode(&info.id)); - true - } - Some(x) => x.addr != addr, - } - } - - fn recalculate_hash(&mut self) { - let mut nodes = self.nodes.iter().collect::<Vec<_>>(); - nodes.sort_unstable_by_key(|(id, _status)| *id); - - let mut nodes_txt = String::new(); - debug!("Current set of pingable nodes: --"); - for (id, status) in nodes { - debug!("{} {}", hex::encode(&id), status.addr); - writeln!(&mut nodes_txt, "{} {}", hex::encode(&id), status.addr).unwrap(); - } - debug!("END --"); - self.hash = blake2sum(nodes_txt.as_bytes()); - } - - fn to_serializable_membership(&self, system: &System) -> Vec<AdvertisedNode> { - let mut mem = vec![]; - for (node, status) in self.nodes.iter() { - let state_info = if *node == system.id { - system.state_info.clone() - } else { - status.state_info.clone() - }; - mem.push(AdvertisedNode { - id: *node, - addr: status.addr, - is_up: status.is_up(), - last_seen: status.last_seen, - state_info, - }); - } - mem - } -} - -fn gen_node_id(metadata_dir: &Path) -> Result<Uuid, Error> { - let mut id_file = metadata_dir.to_path_buf(); - id_file.push("node_id"); - if id_file.as_path().exists() { - let mut f = std::fs::File::open(id_file.as_path())?; - let mut d = vec![]; - f.read_to_end(&mut d)?; - if d.len() != 32 { - return Err(Error::Message("Corrupt node_id file".to_string())); - } - - let mut id = [0u8; 32]; - id.copy_from_slice(&d[..]); - Ok(id.into()) - } else { - let id = gen_uuid(); - - let mut f = std::fs::File::create(id_file.as_path())?; - f.write_all(id.as_slice())?; - Ok(id) - } -} - -impl System { - /// Create this node's membership manager - pub fn new( - metadata_dir: PathBuf, - rpc_http_client: Arc<RpcHttpClient>, - background: Arc<BackgroundRunner>, - rpc_server: &mut RpcServer, - replication_factor: usize, - ) -> Arc<Self> { - let id = gen_node_id(&metadata_dir).expect("Unable to read or generate node ID"); - info!("Node ID: {}", hex::encode(&id)); - - let persist_config = Persister::new(&metadata_dir, "network_config"); - let persist_status = Persister::new(&metadata_dir, "peer_info"); - - let net_config = match persist_config.load() { - Ok(x) => x, - Err(e) => { - match Persister::<garage_rpc_021::ring::NetworkConfig>::new( - &metadata_dir, - "network_config", - ) - .load() - { - Ok(old_config) => NetworkConfig::migrate_from_021(old_config), - Err(e2) => { - info!( - "No valid previous network configuration stored ({}, {}), starting fresh.", - e, e2 - ); - NetworkConfig::new() - } - } - } - }; - - let mut status = Status { - nodes: HashMap::new(), - hash: Hash::default(), - }; - status.recalculate_hash(); - let (update_status, status) = watch::channel(Arc::new(status)); - - let state_info = StateInfo { - hostname: gethostname::gethostname() - .into_string() - .unwrap_or_else(|_| "<invalid utf-8>".to_string()), - replication_factor: Some(replication_factor), - }; - - let ring = Ring::new(net_config, replication_factor); - let (update_ring, ring) = watch::channel(Arc::new(ring)); - - let rpc_path = MEMBERSHIP_RPC_PATH.to_string(); - let rpc_client = RpcClient::new( - RpcAddrClient::<Message>::new(rpc_http_client.clone(), rpc_path.clone()), - background.clone(), - status.clone(), - ); - - let sys = Arc::new(System { - id, - persist_config, - persist_status, - rpc_local_port: rpc_server.bind_addr.port(), - state_info, - rpc_http_client, - rpc_client, - replication_factor, - status, - ring, - update_lock: Mutex::new(Updaters { - update_status, - update_ring, - }), - background, - }); - sys.clone().register_handler(rpc_server, rpc_path); - sys - } - - fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) { - rpc_server.add_handler::<Message, _, _>(path, move |msg, addr| { - let self2 = self.clone(); - async move { - match msg { - Message::Ping(ping) => self2.handle_ping(&addr, &ping).await, - - Message::PullStatus => Ok(self2.handle_pull_status()), - Message::PullConfig => Ok(self2.handle_pull_config()), - Message::AdvertiseNodesUp(adv) => self2.handle_advertise_nodes_up(&adv).await, - Message::AdvertiseConfig(adv) => self2.handle_advertise_config(&adv).await, - - _ => Err(Error::BadRpc("Unexpected RPC message".to_string())), - } - } - }); - } - - /// Get an RPC client - pub fn rpc_client<M: RpcMessage + 'static>(self: &Arc<Self>, path: &str) -> Arc<RpcClient<M>> { - RpcClient::new( - RpcAddrClient::new(self.rpc_http_client.clone(), path.to_string()), - self.background.clone(), - self.status.clone(), - ) - } - - /// Save network configuration to disc - async fn save_network_config(self: Arc<Self>) -> Result<(), Error> { - let ring = self.ring.borrow().clone(); - self.persist_config - .save_async(&ring.config) - .await - .expect("Cannot save current cluster configuration"); - Ok(()) - } - - fn make_ping(&self) -> Message { - let status = self.status.borrow().clone(); - let ring = self.ring.borrow().clone(); - Message::Ping(PingMessage { - id: self.id, - rpc_port: self.rpc_local_port, - status_hash: status.hash, - config_version: ring.config.version, - state_info: self.state_info.clone(), - }) - } - - async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) { - let status = self.status.borrow().clone(); - let to = status - .nodes - .keys() - .filter(|x| **x != self.id) - .cloned() - .collect::<Vec<_>>(); - self.rpc_client.call_many(&to[..], msg, timeout).await; - } - - /// Perform bootstraping, starting the ping loop - pub async fn bootstrap( - self: Arc<Self>, - peers: Vec<SocketAddr>, - consul_host: Option<String>, - consul_service_name: Option<String>, - ) { - let self2 = self.clone(); - self.background - .spawn_worker("discovery loop".to_string(), |stop_signal| { - self2.discovery_loop(peers, consul_host, consul_service_name, stop_signal) - }); - - let self2 = self.clone(); - self.background - .spawn_worker("ping loop".to_string(), |stop_signal| { - self2.ping_loop(stop_signal) - }); - } - - async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<Uuid>)>) { - let ping_msg = self.make_ping(); - let ping_resps = join_all(peers.iter().map(|(addr, id_option)| { - let sys = self.clone(); - let ping_msg_ref = &ping_msg; - async move { - ( - id_option, - addr, - sys.rpc_client - .by_addr() - .call(&addr, ping_msg_ref, PING_TIMEOUT) - .await, - ) - } - })) - .await; - - let update_locked = self.update_lock.lock().await; - let mut status: Status = self.status.borrow().as_ref().clone(); - let ring = self.ring.borrow().clone(); - - let mut has_changes = false; - let mut to_advertise = vec![]; - - for (id_option, addr, ping_resp) in ping_resps { - if let Ok(Ok(Message::Ping(info))) = ping_resp { - let is_new = status.handle_ping(addr.ip(), &info); - if is_new { - has_changes = true; - to_advertise.push(AdvertisedNode { - id: info.id, - addr: *addr, - is_up: true, - last_seen: now_msec(), - state_info: info.state_info.clone(), - }); - } - if is_new || status.hash != info.status_hash { - self.background - .spawn_cancellable(self.clone().pull_status(info.id).map(Ok)); - } - if is_new || ring.config.version < info.config_version { - self.background - .spawn_cancellable(self.clone().pull_config(info.id).map(Ok)); - } - } else if let Some(id) = id_option { - if let Some(st) = status.nodes.get_mut(id) { - // we need to increment failure counter as call was done using by_addr so the - // counter was not auto-incremented - st.num_failures.fetch_add(1, Ordering::SeqCst); - if !st.is_up() { - warn!("Node {:?} seems to be down.", id); - if !ring.config.members.contains_key(id) { - info!("Removing node {:?} from status (not in config and not responding to pings anymore)", id); - status.nodes.remove(&id); - has_changes = true; - } - } - } - } - } - if has_changes { - status.recalculate_hash(); - } - self.update_status(&update_locked, status).await; - drop(update_locked); - - if !to_advertise.is_empty() { - self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT) - .await; - } - } - - async fn handle_ping( - self: Arc<Self>, - from: &SocketAddr, - ping: &PingMessage, - ) -> Result<Message, Error> { - let update_locked = self.update_lock.lock().await; - let mut status: Status = self.status.borrow().as_ref().clone(); - - let is_new = status.handle_ping(from.ip(), ping); - if is_new { - status.recalculate_hash(); - } - let status_hash = status.hash; - let config_version = self.ring.borrow().config.version; - - self.update_status(&update_locked, status).await; - drop(update_locked); - - if is_new || status_hash != ping.status_hash { - self.background - .spawn_cancellable(self.clone().pull_status(ping.id).map(Ok)); - } - if is_new || config_version < ping.config_version { - self.background - .spawn_cancellable(self.clone().pull_config(ping.id).map(Ok)); - } - - Ok(self.make_ping()) - } - - fn handle_pull_status(&self) -> Message { - Message::AdvertiseNodesUp(self.status.borrow().to_serializable_membership(self)) - } - - fn handle_pull_config(&self) -> Message { - let ring = self.ring.borrow().clone(); - Message::AdvertiseConfig(ring.config.clone()) - } - - async fn handle_advertise_nodes_up( - self: Arc<Self>, - adv: &[AdvertisedNode], - ) -> Result<Message, Error> { - let mut to_ping = vec![]; - - let update_lock = self.update_lock.lock().await; - let mut status: Status = self.status.borrow().as_ref().clone(); - let mut has_changed = false; - let mut max_replication_factor = 0; - - for node in adv.iter() { - if node.id == self.id { - // learn our own ip address - let self_addr = SocketAddr::new(node.addr.ip(), self.rpc_local_port); - let old_self = status.nodes.insert( - node.id, - Arc::new(StatusEntry { - addr: self_addr, - last_seen: now_msec(), - num_failures: AtomicUsize::from(0), - state_info: self.state_info.clone(), - }), - ); - has_changed = match old_self { - None => true, - Some(x) => x.addr != self_addr, - }; - } else { - let ping_them = match status.nodes.get(&node.id) { - // Case 1: new node - None => true, - // Case 2: the node might have changed address - Some(our_node) => node.is_up && !our_node.is_up() && our_node.addr != node.addr, - }; - max_replication_factor = std::cmp::max( - max_replication_factor, - node.state_info.replication_factor.unwrap_or_default(), - ); - if ping_them { - to_ping.push((node.addr, Some(node.id))); - } - } - } - - if self.replication_factor < max_replication_factor { - error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs", - max_replication_factor, - self.replication_factor); - std::process::exit(1); - } - if has_changed { - status.recalculate_hash(); - } - self.update_status(&update_lock, status).await; - drop(update_lock); - - if !to_ping.is_empty() { - self.background - .spawn_cancellable(self.clone().ping_nodes(to_ping).map(Ok)); - } - - Ok(Message::Ok) - } - - async fn handle_advertise_config( - self: Arc<Self>, - adv: &NetworkConfig, - ) -> Result<Message, Error> { - let update_lock = self.update_lock.lock().await; - let ring: Arc<Ring> = self.ring.borrow().clone(); - - if adv.version > ring.config.version { - let ring = Ring::new(adv.clone(), self.replication_factor); - update_lock.update_ring.send(Arc::new(ring))?; - drop(update_lock); - - self.background.spawn_cancellable( - self.clone() - .broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT) - .map(Ok), - ); - self.background.spawn(self.clone().save_network_config()); - } - - Ok(Message::Ok) - } - - async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) { - while !*stop_signal.borrow() { - let restart_at = tokio::time::sleep(PING_INTERVAL); - - let status = self.status.borrow().clone(); - let ping_addrs = status - .nodes - .iter() - .filter(|(id, _)| **id != self.id) - .map(|(id, status)| (status.addr, Some(*id))) - .collect::<Vec<_>>(); - - self.clone().ping_nodes(ping_addrs).await; - - select! { - _ = restart_at.fuse() => {}, - _ = stop_signal.changed().fuse() => {}, - } - } - } - - async fn discovery_loop( - self: Arc<Self>, - bootstrap_peers: Vec<SocketAddr>, - consul_host: Option<String>, - consul_service_name: Option<String>, - mut stop_signal: watch::Receiver<bool>, - ) { - let consul_config = match (consul_host, consul_service_name) { - (Some(ch), Some(csn)) => Some((ch, csn)), - _ => None, - }; - - while !*stop_signal.borrow() { - let not_configured = self.ring.borrow().config.members.is_empty(); - let no_peers = self.status.borrow().nodes.len() < 3; - let bad_peers = self - .status - .borrow() - .nodes - .iter() - .filter(|(_, v)| v.is_up()) - .count() != self.ring.borrow().config.members.len(); - - if not_configured || no_peers || bad_peers { - info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); - - let mut ping_list = bootstrap_peers - .iter() - .map(|ip| (*ip, None)) - .collect::<Vec<_>>(); - - if let Ok(peers) = self.persist_status.load_async().await { - ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id)))); - } - - if let Some((consul_host, consul_service_name)) = &consul_config { - match get_consul_nodes(consul_host, consul_service_name).await { - Ok(node_list) => { - ping_list.extend(node_list.iter().map(|a| (*a, None))); - } - Err(e) => { - warn!("Could not retrieve node list from Consul: {}", e); - } - } - } - - self.clone().ping_nodes(ping_list).await; - } - - let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL); - select! { - _ = restart_at.fuse() => {}, - _ = stop_signal.changed().fuse() => {}, - } - } - } - - // for some reason fixing this is causing compilation error, see https://github.com/rust-lang/rust-clippy/issues/7052 - #[allow(clippy::manual_async_fn)] - fn pull_status( - self: Arc<Self>, - peer: Uuid, - ) -> impl futures::future::Future<Output = ()> + Send + 'static { - async move { - let resp = self - .rpc_client - .call(peer, Message::PullStatus, PING_TIMEOUT) - .await; - if let Ok(Message::AdvertiseNodesUp(nodes)) = resp { - let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await; - } - } - } - - async fn pull_config(self: Arc<Self>, peer: Uuid) { - let resp = self - .rpc_client - .call(peer, Message::PullConfig, PING_TIMEOUT) - .await; - if let Ok(Message::AdvertiseConfig(config)) = resp { - let _: Result<_, _> = self.handle_advertise_config(&config).await; - } - } - - async fn update_status(self: &Arc<Self>, updaters: &Updaters, status: Status) { - if status.hash != self.status.borrow().hash { - let mut list = status.to_serializable_membership(&self); - - // Combine with old peer list to make sure no peer is lost - if let Ok(old_list) = self.persist_status.load_async().await { - for pp in old_list { - if !list.iter().any(|np| pp.id == np.id) { - list.push(pp); - } - } - } - - if !list.is_empty() { - info!("Persisting new peer list ({} peers)", list.len()); - self.persist_status - .save_async(&list) - .await - .expect("Unable to persist peer list"); - } - } - - updaters - .update_status - .send(Arc::new(status)) - .expect("Could not update internal membership status"); - } -} diff --git a/src/rpc/ring.rs b/src/rpc/ring.rs index 90db8fd2..7cbab762 100644 --- a/src/rpc/ring.rs +++ b/src/rpc/ring.rs @@ -3,6 +3,8 @@ use std::collections::{HashMap, HashSet}; use std::convert::TryInto; +use netapp::NodeID; + use serde::{Deserialize, Serialize}; use garage_util::data::*; @@ -98,7 +100,7 @@ pub struct Ring { pub config: NetworkConfig, // Internal order of nodes used to make a more compact representation of the ring - nodes: Vec<Uuid>, + nodes: Vec<NodeID>, // The list of entries in the ring ring: Vec<RingEntry>, @@ -260,6 +262,11 @@ impl Ring { }) .collect::<Vec<_>>(); + let nodes = nodes + .iter() + .map(|id| NodeID::from_slice(id.as_slice()).unwrap()) + .collect::<Vec<_>>(); + Self { replication_factor, config, @@ -291,7 +298,7 @@ impl Ring { } /// Walk the ring to find the n servers in which data should be replicated - pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<Uuid> { + pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<NodeID> { if self.ring.len() != 1 << PARTITION_BITS { warn!("Ring not yet ready, read/writes will be lost!"); return vec![]; diff --git a/src/rpc/rpc_client.rs b/src/rpc/rpc_client.rs deleted file mode 100644 index 806c7e69..00000000 --- a/src/rpc/rpc_client.rs +++ /dev/null @@ -1,369 +0,0 @@ -//! Contain structs related to making RPCs -use std::borrow::Borrow; -use std::marker::PhantomData; -use std::net::SocketAddr; -use std::pin::Pin; -use std::sync::atomic::Ordering; -use std::sync::Arc; -use std::time::Duration; - -use arc_swap::ArcSwapOption; -use futures::future::Future; -use futures::stream::futures_unordered::FuturesUnordered; -use futures::stream::StreamExt; -use futures_util::future::FutureExt; -use hyper::client::{Client, HttpConnector}; -use hyper::{Body, Method, Request}; -use tokio::sync::{watch, Semaphore}; - -use garage_util::background::BackgroundRunner; -use garage_util::config::TlsConfig; -use garage_util::data::*; -use garage_util::error::{Error, RpcError}; - -use crate::membership::Status; -use crate::rpc_server::RpcMessage; -use crate::tls_util; - -const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); - -/// Strategy to apply when making RPC -#[derive(Copy, Clone)] -pub struct RequestStrategy { - /// Max time to wait for reponse - pub rs_timeout: Duration, - /// Min number of response to consider the request successful - pub rs_quorum: usize, - /// Should requests be dropped after enough response are received - pub rs_interrupt_after_quorum: bool, -} - -impl RequestStrategy { - /// Create a RequestStrategy with default timeout and not interrupting when quorum reached - pub fn with_quorum(quorum: usize) -> Self { - RequestStrategy { - rs_timeout: DEFAULT_TIMEOUT, - rs_quorum: quorum, - rs_interrupt_after_quorum: false, - } - } - /// Set timeout of the strategy - pub fn with_timeout(mut self, timeout: Duration) -> Self { - self.rs_timeout = timeout; - self - } - /// Set if requests can be dropped after quorum has been reached - /// In general true for read requests, and false for write - pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self { - self.rs_interrupt_after_quorum = interrupt; - self - } -} - -/// Shortcut for a boxed async function taking a message, and resolving to another message or an -/// error -pub type LocalHandlerFn<M> = - Box<dyn Fn(Arc<M>) -> Pin<Box<dyn Future<Output = Result<M, Error>> + Send>> + Send + Sync>; - -/// Client used to send RPC -pub struct RpcClient<M: RpcMessage> { - status: watch::Receiver<Arc<Status>>, - background: Arc<BackgroundRunner>, - - local_handler: ArcSwapOption<(Uuid, LocalHandlerFn<M>)>, - - rpc_addr_client: RpcAddrClient<M>, -} - -impl<M: RpcMessage + 'static> RpcClient<M> { - /// Create a new RpcClient from an address, a job runner, and the status of all RPC servers - pub fn new( - rac: RpcAddrClient<M>, - background: Arc<BackgroundRunner>, - status: watch::Receiver<Arc<Status>>, - ) -> Arc<Self> { - Arc::new(Self { - rpc_addr_client: rac, - background, - status, - local_handler: ArcSwapOption::new(None), - }) - } - - /// Set the local handler, to process RPC to this node without network usage - pub fn set_local_handler<F, Fut>(&self, my_id: Uuid, handler: F) - where - F: Fn(Arc<M>) -> Fut + Send + Sync + 'static, - Fut: Future<Output = Result<M, Error>> + Send + 'static, - { - let handler_arc = Arc::new(handler); - let handler: LocalHandlerFn<M> = Box::new(move |msg| { - let handler_arc2 = handler_arc.clone(); - Box::pin(async move { handler_arc2(msg).await }) - }); - self.local_handler.swap(Some(Arc::new((my_id, handler)))); - } - - /// Get a RPC client to make calls using node's SocketAddr instead of its ID - pub fn by_addr(&self) -> &RpcAddrClient<M> { - &self.rpc_addr_client - } - - /// Make a RPC call - pub async fn call(&self, to: Uuid, msg: M, timeout: Duration) -> Result<M, Error> { - self.call_arc(to, Arc::new(msg), timeout).await - } - - /// Make a RPC call from a message stored in an Arc - pub async fn call_arc(&self, to: Uuid, msg: Arc<M>, timeout: Duration) -> Result<M, Error> { - if let Some(lh) = self.local_handler.load_full() { - let (my_id, local_handler) = lh.as_ref(); - if to.borrow() == my_id { - return local_handler(msg).await; - } - } - let status = self.status.borrow().clone(); - let node_status = match status.nodes.get(&to) { - Some(node_status) => { - if node_status.is_up() { - node_status - } else { - return Err(Error::from(RpcError::NodeDown(to))); - } - } - None => { - return Err(Error::Message(format!( - "Peer ID not found: {:?}", - to.borrow() - ))) - } - }; - match self - .rpc_addr_client - .call(&node_status.addr, msg, timeout) - .await - { - Err(rpc_error) => { - node_status.num_failures.fetch_add(1, Ordering::SeqCst); - Err(Error::from(rpc_error)) - } - Ok(x) => x, - } - } - - /// Make a RPC call to multiple servers, returning a Vec containing each result - pub async fn call_many(&self, to: &[Uuid], msg: M, timeout: Duration) -> Vec<Result<M, Error>> { - let msg = Arc::new(msg); - let mut resp_stream = to - .iter() - .map(|to| self.call_arc(*to, msg.clone(), timeout)) - .collect::<FuturesUnordered<_>>(); - - let mut results = vec![]; - while let Some(resp) = resp_stream.next().await { - results.push(resp); - } - results - } - - /// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if - /// strategy could not be respected due to too many errors - pub async fn try_call_many( - self: &Arc<Self>, - to: &[Uuid], - msg: M, - strategy: RequestStrategy, - ) -> Result<Vec<M>, Error> { - let timeout = strategy.rs_timeout; - - let msg = Arc::new(msg); - let mut resp_stream = to - .to_vec() - .into_iter() - .map(|to| { - let self2 = self.clone(); - let msg = msg.clone(); - async move { self2.call_arc(to, msg, timeout).await } - }) - .collect::<FuturesUnordered<_>>(); - - let mut results = vec![]; - let mut errors = vec![]; - - while let Some(resp) = resp_stream.next().await { - match resp { - Ok(msg) => { - results.push(msg); - if results.len() >= strategy.rs_quorum { - break; - } - } - Err(e) => { - errors.push(e); - } - } - } - - if results.len() >= strategy.rs_quorum { - // Continue requests in background. - // Continue the remaining requests immediately using tokio::spawn - // but enqueue a task in the background runner - // to ensure that the process won't exit until the requests are done - // (if we had just enqueued the resp_stream.collect directly in the background runner, - // the requests might have been put on hold in the background runner's queue, - // in which case they might timeout or otherwise fail) - if !strategy.rs_interrupt_after_quorum { - let wait_finished_fut = tokio::spawn(async move { - resp_stream.collect::<Vec<_>>().await; - }); - self.background.spawn(wait_finished_fut.map(|_| Ok(()))); - } - - Ok(results) - } else { - let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>(); - Err(Error::from(RpcError::TooManyErrors(errors))) - } - } -} - -/// Thin wrapper arround an `RpcHttpClient` specifying the path of the request -pub struct RpcAddrClient<M: RpcMessage> { - phantom: PhantomData<M>, - - http_client: Arc<RpcHttpClient>, - path: String, -} - -impl<M: RpcMessage> RpcAddrClient<M> { - /// Create an RpcAddrClient from an HTTP client and the endpoint to reach for RPCs - pub fn new(http_client: Arc<RpcHttpClient>, path: String) -> Self { - Self { - phantom: PhantomData::default(), - http_client, - path, - } - } - - /// Make a RPC - pub async fn call<MB>( - &self, - to_addr: &SocketAddr, - msg: MB, - timeout: Duration, - ) -> Result<Result<M, Error>, RpcError> - where - MB: Borrow<M>, - { - self.http_client - .call(&self.path, to_addr, msg, timeout) - .await - } -} - -/// HTTP client used to make RPCs -pub struct RpcHttpClient { - request_limiter: Semaphore, - method: ClientMethod, -} - -enum ClientMethod { - Http(Client<HttpConnector, hyper::Body>), - Https(Client<tls_util::HttpsConnectorFixedDnsname<HttpConnector>, hyper::Body>), -} - -impl RpcHttpClient { - /// Create a new RpcHttpClient - pub fn new( - max_concurrent_requests: usize, - tls_config: &Option<TlsConfig>, - ) -> Result<Self, Error> { - let method = if let Some(cf) = tls_config { - let ca_certs = tls_util::load_certs(&cf.ca_cert).map_err(|e| { - Error::Message(format!("Failed to open CA certificate file: {:?}", e)) - })?; - let node_certs = tls_util::load_certs(&cf.node_cert) - .map_err(|e| Error::Message(format!("Failed to open certificate file: {:?}", e)))?; - let node_key = tls_util::load_private_key(&cf.node_key) - .map_err(|e| Error::Message(format!("Failed to open private key file: {:?}", e)))?; - - let mut config = rustls::ClientConfig::new(); - - for crt in ca_certs.iter() { - config.root_store.add(crt)?; - } - - config.set_single_client_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?; - - let connector = - tls_util::HttpsConnectorFixedDnsname::<HttpConnector>::new(config, "garage"); - - ClientMethod::Https(Client::builder().build(connector)) - } else { - ClientMethod::Http(Client::new()) - }; - Ok(RpcHttpClient { - method, - request_limiter: Semaphore::new(max_concurrent_requests), - }) - } - - /// Make a RPC - async fn call<M, MB>( - &self, - path: &str, - to_addr: &SocketAddr, - msg: MB, - timeout: Duration, - ) -> Result<Result<M, Error>, RpcError> - where - MB: Borrow<M>, - M: RpcMessage, - { - let uri = match self.method { - ClientMethod::Http(_) => format!("http://{}/{}", to_addr, path), - ClientMethod::Https(_) => format!("https://{}/{}", to_addr, path), - }; - - let req = Request::builder() - .method(Method::POST) - .uri(uri) - .body(Body::from(rmp_to_vec_all_named(msg.borrow())?))?; - - let resp_fut = match &self.method { - ClientMethod::Http(client) => client.request(req).fuse(), - ClientMethod::Https(client) => client.request(req).fuse(), - }; - - trace!("({}) Acquiring request_limiter slot...", path); - let slot = self.request_limiter.acquire().await; - trace!("({}) Got slot, doing request to {}...", path, to_addr); - let resp = tokio::time::timeout(timeout, resp_fut) - .await - .map_err(|e| { - debug!( - "RPC timeout to {}: {}", - to_addr, - debug_serialize(msg.borrow()) - ); - e - })? - .map_err(|e| { - warn!( - "RPC HTTP client error when connecting to {}: {}", - to_addr, e - ); - e - })?; - - let status = resp.status(); - trace!("({}) Request returned, got status {}", path, status); - let body = hyper::body::to_bytes(resp.into_body()).await?; - drop(slot); - - match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? { - Err(e) => Ok(Err(Error::RemoteError(e, status))), - Ok(x) => Ok(Ok(x)), - } - } -} diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs new file mode 100644 index 00000000..c9458ee6 --- /dev/null +++ b/src/rpc/rpc_helper.rs @@ -0,0 +1,206 @@ +//! Contain structs related to making RPCs +use std::sync::Arc; +use std::time::Duration; + +use futures::future::join_all; +use futures::stream::futures_unordered::FuturesUnordered; +use futures::stream::StreamExt; +use futures_util::future::FutureExt; +use tokio::select; + +pub use netapp::endpoint::{Endpoint, EndpointHandler, Message}; +use netapp::peering::fullmesh::FullMeshPeeringStrategy; +pub use netapp::proto::*; +pub use netapp::{NetApp, NodeID}; + +use garage_util::background::BackgroundRunner; +use garage_util::error::{Error, RpcError}; + +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); + +/// Strategy to apply when making RPC +#[derive(Copy, Clone)] +pub struct RequestStrategy { + /// Max time to wait for reponse + pub rs_timeout: Duration, + /// Min number of response to consider the request successful + pub rs_quorum: Option<usize>, + /// Should requests be dropped after enough response are received + pub rs_interrupt_after_quorum: bool, + /// Request priority + pub rs_priority: RequestPriority, +} + +impl RequestStrategy { + /// Create a RequestStrategy with default timeout and not interrupting when quorum reached + pub fn with_priority(prio: RequestPriority) -> Self { + RequestStrategy { + rs_timeout: DEFAULT_TIMEOUT, + rs_quorum: None, + rs_interrupt_after_quorum: false, + rs_priority: prio, + } + } + /// Set quorum to be reached for request + pub fn with_quorum(mut self, quorum: usize) -> Self { + self.rs_quorum = Some(quorum); + self + } + /// Set timeout of the strategy + pub fn with_timeout(mut self, timeout: Duration) -> Self { + self.rs_timeout = timeout; + self + } + /// Set if requests can be dropped after quorum has been reached + /// In general true for read requests, and false for write + pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self { + self.rs_interrupt_after_quorum = interrupt; + self + } +} + +#[derive(Clone)] +pub struct RpcHelper { + pub(crate) fullmesh: Arc<FullMeshPeeringStrategy>, + pub(crate) background: Arc<BackgroundRunner>, +} + +impl RpcHelper { + pub async fn call<M, H>( + &self, + endpoint: &Endpoint<M, H>, + to: NodeID, + msg: M, + strat: RequestStrategy, + ) -> Result<M::Response, Error> + where + M: Message, + H: EndpointHandler<M>, + { + self.call_arc(endpoint, to, Arc::new(msg), strat).await + } + + pub async fn call_arc<M, H>( + &self, + endpoint: &Endpoint<M, H>, + to: NodeID, + msg: Arc<M>, + strat: RequestStrategy, + ) -> Result<M::Response, Error> + where + M: Message, + H: EndpointHandler<M>, + { + select! { + res = endpoint.call(&to, &msg, strat.rs_priority) => Ok(res?), + _ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Rpc(RpcError::Timeout)), + } + } + + pub async fn call_many<M, H>( + &self, + endpoint: &Endpoint<M, H>, + to: &[NodeID], + msg: M, + strat: RequestStrategy, + ) -> Vec<(NodeID, Result<M::Response, Error>)> + where + M: Message, + H: EndpointHandler<M>, + { + let msg = Arc::new(msg); + let resps = join_all( + to.iter() + .map(|to| self.call_arc(endpoint, *to, msg.clone(), strat)), + ) + .await; + to.iter() + .cloned() + .zip(resps.into_iter()) + .collect::<Vec<_>>() + } + + pub async fn broadcast<M, H>( + &self, + endpoint: &Endpoint<M, H>, + msg: M, + strat: RequestStrategy, + ) -> Vec<(NodeID, Result<M::Response, Error>)> + where + M: Message, + H: EndpointHandler<M>, + { + let to = self + .fullmesh + .get_peer_list() + .iter() + .map(|p| p.id) + .collect::<Vec<_>>(); + self.call_many(endpoint, &to[..], msg, strat).await + } + + /// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if + /// strategy could not be respected due to too many errors + pub async fn try_call_many<M, H>( + &self, + endpoint: &Arc<Endpoint<M, H>>, + to: &[NodeID], + msg: M, + strategy: RequestStrategy, + ) -> Result<Vec<M::Response>, Error> + where + M: Message + 'static, + H: EndpointHandler<M> + 'static, + { + let msg = Arc::new(msg); + let mut resp_stream = to + .to_vec() + .into_iter() + .map(|to| { + let self2 = self.clone(); + let msg = msg.clone(); + let endpoint2 = endpoint.clone(); + async move { self2.call_arc(&endpoint2, to, msg, strategy).await } + }) + .collect::<FuturesUnordered<_>>(); + + let mut results = vec![]; + let mut errors = vec![]; + let quorum = strategy.rs_quorum.unwrap_or(to.len()); + + while let Some(resp) = resp_stream.next().await { + match resp { + Ok(msg) => { + results.push(msg); + if results.len() >= quorum { + break; + } + } + Err(e) => { + errors.push(e); + } + } + } + + if results.len() >= quorum { + // Continue requests in background. + // Continue the remaining requests immediately using tokio::spawn + // but enqueue a task in the background runner + // to ensure that the process won't exit until the requests are done + // (if we had just enqueued the resp_stream.collect directly in the background runner, + // the requests might have been put on hold in the background runner's queue, + // in which case they might timeout or otherwise fail) + if !strategy.rs_interrupt_after_quorum { + let wait_finished_fut = tokio::spawn(async move { + resp_stream.collect::<Vec<_>>().await; + }); + self.background.spawn(wait_finished_fut.map(|_| Ok(()))); + } + + Ok(results) + } else { + let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>(); + Err(Error::from(RpcError::TooManyErrors(errors))) + } + } +} diff --git a/src/rpc/rpc_server.rs b/src/rpc/rpc_server.rs deleted file mode 100644 index 81361ab9..00000000 --- a/src/rpc/rpc_server.rs +++ /dev/null @@ -1,247 +0,0 @@ -//! Contains structs related to receiving RPCs -use std::collections::HashMap; -use std::net::SocketAddr; -use std::pin::Pin; -use std::sync::Arc; -use std::time::Instant; - -use futures::future::Future; -use futures_util::future::*; -use futures_util::stream::*; -use hyper::server::conn::AddrStream; -use hyper::service::{make_service_fn, service_fn}; -use hyper::{Body, Method, Request, Response, Server, StatusCode}; -use serde::{Deserialize, Serialize}; -use tokio::net::{TcpListener, TcpStream}; -use tokio_rustls::server::TlsStream; -use tokio_rustls::TlsAcceptor; -use tokio_stream::wrappers::TcpListenerStream; - -use garage_util::config::TlsConfig; -use garage_util::data::*; -use garage_util::error::Error; - -use crate::tls_util; - -/// Trait for messages that can be sent as RPC -pub trait RpcMessage: Serialize + for<'de> Deserialize<'de> + Send + Sync {} - -type ResponseFuture = Pin<Box<dyn Future<Output = Result<Response<Body>, Error>> + Send>>; -type Handler = Box<dyn Fn(Request<Body>, SocketAddr) -> ResponseFuture + Send + Sync>; - -/// Structure handling RPCs -pub struct RpcServer { - /// The address the RpcServer will bind - pub bind_addr: SocketAddr, - /// The tls configuration used for RPC - pub tls_config: Option<TlsConfig>, - - handlers: HashMap<String, Handler>, -} - -async fn handle_func<M, F, Fut>( - handler: Arc<F>, - req: Request<Body>, - sockaddr: SocketAddr, - name: Arc<String>, -) -> Result<Response<Body>, Error> -where - M: RpcMessage + 'static, - F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static, - Fut: Future<Output = Result<M, Error>> + Send + 'static, -{ - let begin_time = Instant::now(); - let whole_body = hyper::body::to_bytes(req.into_body()).await?; - let msg = rmp_serde::decode::from_read::<_, M>(&whole_body[..])?; - - trace!( - "Request message: {}", - serde_json::to_string(&msg) - .unwrap_or_else(|_| "<json error>".into()) - .chars() - .take(100) - .collect::<String>() - ); - - match handler(msg, sockaddr).await { - Ok(resp) => { - let resp_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Ok(resp))?; - let rpc_duration = (Instant::now() - begin_time).as_millis(); - if rpc_duration > 100 { - debug!("RPC {} ok, took long: {} ms", name, rpc_duration,); - } - Ok(Response::new(Body::from(resp_bytes))) - } - Err(e) => { - let err_str = format!("{}", e); - let rep_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Err(err_str))?; - let mut err_response = Response::new(Body::from(rep_bytes)); - *err_response.status_mut() = match e { - Error::BadRpc(_) => StatusCode::BAD_REQUEST, - _ => StatusCode::INTERNAL_SERVER_ERROR, - }; - warn!( - "RPC error ({}): {} ({} ms)", - name, - e, - (Instant::now() - begin_time).as_millis(), - ); - Ok(err_response) - } - } -} - -impl RpcServer { - /// Create a new RpcServer - pub fn new(bind_addr: SocketAddr, tls_config: Option<TlsConfig>) -> Self { - Self { - bind_addr, - tls_config, - handlers: HashMap::new(), - } - } - - /// Add handler handling request made to `name` - pub fn add_handler<M, F, Fut>(&mut self, name: String, handler: F) - where - M: RpcMessage + 'static, - F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static, - Fut: Future<Output = Result<M, Error>> + Send + 'static, - { - let name2 = Arc::new(name.clone()); - let handler_arc = Arc::new(handler); - let handler = Box::new(move |req: Request<Body>, sockaddr: SocketAddr| { - let handler2 = handler_arc.clone(); - let b: ResponseFuture = Box::pin(handle_func(handler2, req, sockaddr, name2.clone())); - b - }); - self.handlers.insert(name, handler); - } - - async fn handler( - self: Arc<Self>, - req: Request<Body>, - addr: SocketAddr, - ) -> Result<Response<Body>, Error> { - if req.method() != Method::POST { - let mut bad_request = Response::default(); - *bad_request.status_mut() = StatusCode::BAD_REQUEST; - return Ok(bad_request); - } - - let path = &req.uri().path()[1..].to_string(); - - let handler = match self.handlers.get(path) { - Some(h) => h, - None => { - let mut not_found = Response::default(); - *not_found.status_mut() = StatusCode::NOT_FOUND; - return Ok(not_found); - } - }; - - trace!("({}) Handling request", path); - - let resp_waiter = tokio::spawn(handler(req, addr)); - match resp_waiter.await { - Err(err) => { - warn!("Handler await error: {}", err); - let mut ise = Response::default(); - *ise.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; - Ok(ise) - } - Ok(Err(err)) => { - trace!("({}) Request handler failed: {}", path, err); - let mut bad_request = Response::new(Body::from(format!("{}", err))); - *bad_request.status_mut() = StatusCode::BAD_REQUEST; - Ok(bad_request) - } - Ok(Ok(resp)) => { - trace!("({}) Request handler succeeded", path); - Ok(resp) - } - } - } - - /// Run the RpcServer - pub async fn run( - self: Arc<Self>, - shutdown_signal: impl Future<Output = ()>, - ) -> Result<(), Error> { - if let Some(tls_config) = self.tls_config.as_ref() { - let ca_certs = tls_util::load_certs(&tls_config.ca_cert)?; - let node_certs = tls_util::load_certs(&tls_config.node_cert)?; - let node_key = tls_util::load_private_key(&tls_config.node_key)?; - - let mut ca_store = rustls::RootCertStore::empty(); - for crt in ca_certs.iter() { - ca_store.add(crt)?; - } - - let mut config = - rustls::ServerConfig::new(rustls::AllowAnyAuthenticatedClient::new(ca_store)); - config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?; - let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config))); - - let listener = TcpListener::bind(&self.bind_addr).await?; - let incoming = TcpListenerStream::new(listener).filter_map(|socket| async { - match socket { - Ok(stream) => match tls_acceptor.clone().accept(stream).await { - Ok(x) => Some(Ok::<_, hyper::Error>(x)), - Err(_e) => None, - }, - Err(_) => None, - } - }); - let incoming = hyper::server::accept::from_stream(incoming); - - let self_arc = self.clone(); - let service = make_service_fn(|conn: &TlsStream<TcpStream>| { - let client_addr = conn - .get_ref() - .0 - .peer_addr() - .unwrap_or_else(|_| ([0, 0, 0, 0], 0).into()); - let self_arc = self_arc.clone(); - async move { - Ok::<_, Error>(service_fn(move |req: Request<Body>| { - self_arc.clone().handler(req, client_addr).map_err(|e| { - warn!("RPC handler error: {}", e); - e - }) - })) - } - }); - - let server = Server::builder(incoming).serve(service); - - let graceful = server.with_graceful_shutdown(shutdown_signal); - info!("RPC server listening on http://{}", self.bind_addr); - - graceful.await?; - } else { - let self_arc = self.clone(); - let service = make_service_fn(move |conn: &AddrStream| { - let client_addr = conn.remote_addr(); - let self_arc = self_arc.clone(); - async move { - Ok::<_, Error>(service_fn(move |req: Request<Body>| { - self_arc.clone().handler(req, client_addr).map_err(|e| { - warn!("RPC handler error: {}", e); - e - }) - })) - } - }); - - let server = Server::bind(&self.bind_addr).serve(service); - - let graceful = server.with_graceful_shutdown(shutdown_signal); - info!("RPC server listening on http://{}", self.bind_addr); - - graceful.await?; - } - - Ok(()) - } -} diff --git a/src/rpc/system.rs b/src/rpc/system.rs new file mode 100644 index 00000000..7ccec945 --- /dev/null +++ b/src/rpc/system.rs @@ -0,0 +1,363 @@ +//! Module containing structs related to membership management +use std::io::{Read, Write}; +use std::net::SocketAddr; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; + +use arc_swap::ArcSwap; +use async_trait::async_trait; +use futures::{join, select}; +use futures_util::future::*; +use serde::{Deserialize, Serialize}; +use sodiumoxide::crypto::sign::ed25519; +use tokio::sync::watch; +use tokio::sync::Mutex; + +use netapp::endpoint::{Endpoint, EndpointHandler, Message}; +use netapp::peering::fullmesh::FullMeshPeeringStrategy; +use netapp::proto::*; +use netapp::{NetApp, NetworkKey, NodeID, NodeKey}; + +use garage_util::background::BackgroundRunner; +use garage_util::error::Error; +use garage_util::persister::Persister; +//use garage_util::time::*; + +//use crate::consul::get_consul_nodes; +use crate::ring::*; +use crate::rpc_helper::{RequestStrategy, RpcHelper}; + +const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60); +const PING_TIMEOUT: Duration = Duration::from_secs(2); + +/// RPC endpoint used for calls related to membership +pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc"; + +/// RPC messages related to membership +#[derive(Debug, Serialize, Deserialize, Clone)] +pub enum SystemRpc { + /// Response to successfull advertisements + Ok, + /// Error response + Error(String), + /// Ask other node its config. Answered with AdvertiseConfig + PullConfig, + /// Advertise Garage status. Answered with another AdvertiseStatus. + /// Exchanged with every node on a regular basis. + AdvertiseStatus(StateInfo), + /// Advertisement of nodes config. Sent spontanously or in response to PullConfig + AdvertiseConfig(NetworkConfig), + /// Get known nodes states + GetKnownNodes, + /// Return known nodes + ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>), +} + +impl Message for SystemRpc { + type Response = SystemRpc; +} + +/// This node's membership manager +pub struct System { + /// The id of this node + pub id: NodeID, + + persist_config: Persister<NetworkConfig>, + + state_info: ArcSwap<StateInfo>, + + pub netapp: Arc<NetApp>, + fullmesh: Arc<FullMeshPeeringStrategy>, + pub rpc: RpcHelper, + + system_endpoint: Arc<Endpoint<SystemRpc, System>>, + + rpc_listen_addr: SocketAddr, + bootstrap_peers: Vec<(NodeID, SocketAddr)>, + consul_host: Option<String>, + consul_service_name: Option<String>, + replication_factor: usize, + + /// The ring + pub ring: watch::Receiver<Arc<Ring>>, + update_ring: Mutex<watch::Sender<Arc<Ring>>>, + + /// The job runner of this node + pub background: Arc<BackgroundRunner>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StateInfo { + /// Hostname of the node + pub hostname: String, + /// Replication factor configured on the node + pub replication_factor: usize, + /// Configuration version + pub config_version: u64, +} + +fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> { + let mut id_file = metadata_dir.to_path_buf(); + id_file.push("node_id"); + if id_file.as_path().exists() { + let mut f = std::fs::File::open(id_file.as_path())?; + let mut d = vec![]; + f.read_to_end(&mut d)?; + if d.len() != 64 { + return Err(Error::Message("Corrupt node_id file".to_string())); + } + + let mut key = [0u8; 64]; + key.copy_from_slice(&d[..]); + Ok(NodeKey::from_slice(&key[..]).unwrap()) + } else { + let (key, _) = ed25519::gen_keypair(); + + let mut f = std::fs::File::create(id_file.as_path())?; + f.write_all(&key[..])?; + Ok(NodeKey::from_slice(&key[..]).unwrap()) + } +} + +impl System { + /// Create this node's membership manager + pub fn new( + network_key: NetworkKey, + metadata_dir: PathBuf, + background: Arc<BackgroundRunner>, + replication_factor: usize, + rpc_listen_addr: SocketAddr, + bootstrap_peers: Vec<(NodeID, SocketAddr)>, + consul_host: Option<String>, + consul_service_name: Option<String>, + ) -> Arc<Self> { + let node_key = gen_node_key(&metadata_dir).expect("Unable to read or generate node ID"); + info!("Node public key: {}", hex::encode(&node_key.public_key())); + + let persist_config = Persister::new(&metadata_dir, "network_config"); + + let net_config = match persist_config.load() { + Ok(x) => x, + Err(e) => { + match Persister::<garage_rpc_021::ring::NetworkConfig>::new( + &metadata_dir, + "network_config", + ) + .load() + { + Ok(old_config) => NetworkConfig::migrate_from_021(old_config), + Err(e2) => { + info!( + "No valid previous network configuration stored ({}, {}), starting fresh.", + e, e2 + ); + NetworkConfig::new() + } + } + } + }; + + let state_info = StateInfo { + hostname: gethostname::gethostname() + .into_string() + .unwrap_or_else(|_| "<invalid utf-8>".to_string()), + replication_factor: replication_factor, + config_version: net_config.version, + }; + + let ring = Ring::new(net_config, replication_factor); + let (update_ring, ring) = watch::channel(Arc::new(ring)); + + let netapp = NetApp::new(network_key, node_key); + let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone()); + + let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into()); + + let sys = Arc::new(System { + id: netapp.id.clone(), + persist_config, + state_info: ArcSwap::new(Arc::new(state_info)), + netapp: netapp.clone(), + fullmesh: fullmesh.clone(), + rpc: RpcHelper { + fullmesh: fullmesh.clone(), + background: background.clone(), + }, + system_endpoint, + replication_factor, + rpc_listen_addr, + bootstrap_peers, + consul_host, + consul_service_name, + ring, + update_ring: Mutex::new(update_ring), + background: background.clone(), + }); + sys.system_endpoint.set_handler(sys.clone()); + sys + } + + /// Perform bootstraping, starting the ping loop + pub async fn run(self: Arc<Self>, must_exit: watch::Receiver<bool>) { + join!( + self.netapp + .clone() + .listen(self.rpc_listen_addr, None, must_exit.clone()), + self.fullmesh.clone().run(must_exit.clone()), + self.discovery_loop(must_exit.clone()), + ); + } + + // ---- INTERNALS ---- + + /// Save network configuration to disc + async fn save_network_config(self: Arc<Self>) -> Result<(), Error> { + let ring: Arc<Ring> = self.ring.borrow().clone(); + self.persist_config + .save_async(&ring.config) + .await + .expect("Cannot save current cluster configuration"); + Ok(()) + } + + fn update_state_info(&self) { + let mut new_si: StateInfo = self.state_info.load().as_ref().clone(); + + let ring = self.ring.borrow(); + new_si.config_version = ring.config.version; + self.state_info.swap(Arc::new(new_si)); + } + + fn handle_pull_config(&self) -> SystemRpc { + let ring = self.ring.borrow().clone(); + SystemRpc::AdvertiseConfig(ring.config.clone()) + } + + async fn handle_advertise_config( + self: Arc<Self>, + adv: &NetworkConfig, + ) -> Result<SystemRpc, Error> { + let update_ring = self.update_ring.lock().await; + let ring: Arc<Ring> = self.ring.borrow().clone(); + + if adv.version > ring.config.version { + let ring = Ring::new(adv.clone(), self.replication_factor); + update_ring.send(Arc::new(ring))?; + drop(update_ring); + + let self2 = self.clone(); + let adv2 = adv.clone(); + self.background.spawn_cancellable(async move { + self2 + .rpc + .broadcast( + &self2.system_endpoint, + SystemRpc::AdvertiseConfig(adv2), + RequestStrategy::with_priority(PRIO_NORMAL), + ) + .await; + Ok(()) + }); + self.background.spawn(self.clone().save_network_config()); + } + + Ok(SystemRpc::Ok) + } + + async fn discovery_loop(&self, mut stop_signal: watch::Receiver<bool>) { + /* TODO + let consul_config = match (&self.consul_host, &self.consul_service_name) { + (Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())), + _ => None, + }; + */ + + while !*stop_signal.borrow() { + let not_configured = self.ring.borrow().config.members.is_empty(); + let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor; + let bad_peers = self + .fullmesh + .get_peer_list() + .iter() + .filter(|p| p.is_up()) + .count() != self.ring.borrow().config.members.len(); + + if not_configured || no_peers || bad_peers { + info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); + + let ping_list = self.bootstrap_peers.clone(); + + /* + *TODO bring this back: persisted list of peers + if let Ok(peers) = self.persist_status.load_async().await { + ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id)))); + } + */ + + /* + * TODO bring this back: get peers from consul + if let Some((consul_host, consul_service_name)) = &consul_config { + match get_consul_nodes(consul_host, consul_service_name).await { + Ok(node_list) => { + ping_list.extend(node_list.iter().map(|a| (*a, None))); + } + Err(e) => { + warn!("Could not retrieve node list from Consul: {}", e); + } + } + } + */ + + for (node_id, node_addr) in ping_list { + tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id)); + } + } + + let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL); + select! { + _ = restart_at.fuse() => {}, + _ = stop_signal.changed().fuse() => {}, + } + } + } + + async fn pull_config(self: Arc<Self>, peer: NodeID) { + let resp = self + .rpc + .call( + &self.system_endpoint, + peer, + SystemRpc::PullConfig, + RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT), + ) + .await; + if let Ok(SystemRpc::AdvertiseConfig(config)) = resp { + let _: Result<_, _> = self.handle_advertise_config(&config).await; + } + } +} + +#[async_trait] +impl EndpointHandler<SystemRpc> for System { + async fn handle(self: &Arc<Self>, msg: &SystemRpc, _from: NodeID) -> SystemRpc { + let resp = match msg { + SystemRpc::PullConfig => Ok(self.handle_pull_config()), + SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await, + SystemRpc::GetKnownNodes => { + let known_nodes = self + .fullmesh + .get_peer_list() + .iter() + .map(|n| (n.id, n.addr, n.is_up())) + .collect::<Vec<_>>(); + Ok(SystemRpc::ReturnKnownNodes(known_nodes)) + } + _ => Err(Error::BadRpc("Unexpected RPC message".to_string())), + }; + match resp { + Ok(r) => r, + Err(e) => SystemRpc::Error(format!("{}", e)), + } + } +} diff --git a/src/rpc/tls_util.rs b/src/rpc/tls_util.rs deleted file mode 100644 index 8189f93b..00000000 --- a/src/rpc/tls_util.rs +++ /dev/null @@ -1,140 +0,0 @@ -use core::future::Future; -use core::task::{Context, Poll}; -use std::pin::Pin; -use std::sync::Arc; -use std::{fs, io}; - -use futures_util::future::*; -use hyper::client::connect::Connection; -use hyper::client::HttpConnector; -use hyper::service::Service; -use hyper::Uri; -use hyper_rustls::MaybeHttpsStream; -use rustls::internal::pemfile; -use tokio::io::{AsyncRead, AsyncWrite}; -use tokio_rustls::TlsConnector; -use webpki::DNSNameRef; - -use garage_util::error::Error; - -pub fn load_certs(filename: &str) -> Result<Vec<rustls::Certificate>, Error> { - let certfile = fs::File::open(&filename)?; - let mut reader = io::BufReader::new(certfile); - - let certs = pemfile::certs(&mut reader).map_err(|_| { - Error::Message(format!( - "Could not deecode certificates from file: {}", - filename - )) - })?; - - if certs.is_empty() { - return Err(Error::Message(format!( - "Invalid certificate file: {}", - filename - ))); - } - Ok(certs) -} - -pub fn load_private_key(filename: &str) -> Result<rustls::PrivateKey, Error> { - let keydata = fs::read_to_string(filename)?; - - let mut buf1 = keydata.as_bytes(); - let rsa_keys = pemfile::rsa_private_keys(&mut buf1).unwrap_or_default(); - - let mut buf2 = keydata.as_bytes(); - let pkcs8_keys = pemfile::pkcs8_private_keys(&mut buf2).unwrap_or_default(); - - let mut keys = rsa_keys; - keys.extend(pkcs8_keys.into_iter()); - - if keys.len() != 1 { - return Err(Error::Message(format!( - "Invalid private key file: {} ({} private keys)", - filename, - keys.len() - ))); - } - Ok(keys[0].clone()) -} - -// ---- AWFUL COPYPASTA FROM HYPER-RUSTLS connector.rs -// ---- ALWAYS USE `garage` AS HOSTNAME FOR TLS VERIFICATION - -#[derive(Clone)] -pub struct HttpsConnectorFixedDnsname<T> { - http: T, - tls_config: Arc<rustls::ClientConfig>, - fixed_dnsname: &'static str, -} - -type BoxError = Box<dyn std::error::Error + Send + Sync>; - -impl HttpsConnectorFixedDnsname<HttpConnector> { - pub fn new(mut tls_config: rustls::ClientConfig, fixed_dnsname: &'static str) -> Self { - let mut http = HttpConnector::new(); - http.enforce_http(false); - tls_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()]; - Self { - http, - tls_config: Arc::new(tls_config), - fixed_dnsname, - } - } -} - -impl<T> Service<Uri> for HttpsConnectorFixedDnsname<T> -where - T: Service<Uri>, - T::Response: Connection + AsyncRead + AsyncWrite + Send + Unpin + 'static, - T::Future: Send + 'static, - T::Error: Into<BoxError>, -{ - type Response = MaybeHttpsStream<T::Response>; - type Error = BoxError; - - #[allow(clippy::type_complexity)] - type Future = - Pin<Box<dyn Future<Output = Result<MaybeHttpsStream<T::Response>, BoxError>> + Send>>; - - fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> { - match self.http.poll_ready(cx) { - Poll::Ready(Ok(())) => Poll::Ready(Ok(())), - Poll::Ready(Err(e)) => Poll::Ready(Err(e.into())), - Poll::Pending => Poll::Pending, - } - } - - fn call(&mut self, dst: Uri) -> Self::Future { - let is_https = dst.scheme_str() == Some("https"); - - if !is_https { - let connecting_future = self.http.call(dst); - - let f = async move { - let tcp = connecting_future.await.map_err(Into::into)?; - - Ok(MaybeHttpsStream::Http(tcp)) - }; - f.boxed() - } else { - let cfg = self.tls_config.clone(); - let connecting_future = self.http.call(dst); - - let dnsname = - DNSNameRef::try_from_ascii_str(self.fixed_dnsname).expect("Invalid fixed dnsname"); - - let f = async move { - let tcp = connecting_future.await.map_err(Into::into)?; - let connector = TlsConnector::from(cfg); - let tls = connector - .connect(dnsname, tcp) - .await - .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; - Ok(MaybeHttpsStream::Https(tls)) - }; - f.boxed() - } - } -} diff --git a/src/table/Cargo.toml b/src/table/Cargo.toml index ccbd1748..996066dc 100644 --- a/src/table/Cargo.toml +++ b/src/table/Cargo.toml @@ -16,6 +16,7 @@ path = "lib.rs" garage_rpc = { version = "0.3.0", path = "../rpc" } garage_util = { version = "0.3.0", path = "../util" } +async-trait = "0.1.7" bytes = "1.0" hexdump = "0.1" log = "0.4" @@ -30,4 +31,3 @@ serde_bytes = "0.11" futures = "0.3" futures-util = "0.3" tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } - diff --git a/src/table/data.rs b/src/table/data.rs index e7e85e65..ffd494d5 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -9,7 +9,7 @@ use tokio::sync::Notify; use garage_util::data::*; use garage_util::error::*; -use garage_rpc::membership::System; +use garage_rpc::system::System; use crate::crdt::Crdt; use crate::replication::*; diff --git a/src/table/gc.rs b/src/table/gc.rs index 73e08827..c03648ef 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; +use async_trait::async_trait; use serde::{Deserialize, Serialize}; use serde_bytes::ByteBuf; @@ -13,9 +14,8 @@ use tokio::sync::watch; use garage_util::data::*; use garage_util::error::Error; -use garage_rpc::membership::System; -use garage_rpc::rpc_client::*; -use garage_rpc::rpc_server::*; +use garage_rpc::system::System; +use garage_rpc::*; use crate::data::*; use crate::replication::*; @@ -24,11 +24,11 @@ use crate::schema::*; const TABLE_GC_BATCH_SIZE: usize = 1024; const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30); -pub struct TableGc<F: TableSchema, R: TableReplication> { +pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> { system: Arc<System>, data: Arc<TableData<F, R>>, - rpc_client: Arc<RpcClient<GcRpc>>, + endpoint: Arc<Endpoint<GcRpc, Self>>, } #[derive(Serialize, Deserialize)] @@ -36,30 +36,30 @@ enum GcRpc { Update(Vec<ByteBuf>), DeleteIfEqualHash(Vec<(ByteBuf, Hash)>), Ok, + Error(String), } -impl RpcMessage for GcRpc {} +impl Message for GcRpc { + type Response = GcRpc; +} impl<F, R> TableGc<F, R> where F: TableSchema + 'static, R: TableReplication + 'static, { - pub(crate) fn launch( - system: Arc<System>, - data: Arc<TableData<F, R>>, - rpc_server: &mut RpcServer, - ) -> Arc<Self> { - let rpc_path = format!("table_{}/gc", data.name); - let rpc_client = system.rpc_client::<GcRpc>(&rpc_path); + pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> { + let endpoint = system + .netapp + .endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name)); let gc = Arc::new(Self { system: system.clone(), data: data.clone(), - rpc_client, + endpoint, }); - gc.register_handler(rpc_server, rpc_path); + gc.endpoint.set_handler(gc.clone()); let gc1 = gc.clone(); system.background.spawn_worker( @@ -168,7 +168,7 @@ where async fn try_send_and_delete( &self, - nodes: Vec<Uuid>, + nodes: Vec<NodeID>, items: Vec<(ByteBuf, Hash, ByteBuf)>, ) -> Result<(), Error> { let n_items = items.len(); @@ -180,11 +180,15 @@ where deletes.push((k, vhash)); } - self.rpc_client + self.system + .rpc .try_call_many( + &self.endpoint, &nodes[..], GcRpc::Update(updates), - RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT), + RequestStrategy::with_priority(PRIO_BACKGROUND) + .with_quorum(nodes.len()) + .with_timeout(TABLE_GC_RPC_TIMEOUT), ) .await?; @@ -193,11 +197,15 @@ where self.data.name, n_items ); - self.rpc_client + self.system + .rpc .try_call_many( + &self.endpoint, &nodes[..], GcRpc::DeleteIfEqualHash(deletes.clone()), - RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT), + RequestStrategy::with_priority(PRIO_BACKGROUND) + .with_quorum(nodes.len()) + .with_timeout(TABLE_GC_RPC_TIMEOUT), ) .await?; @@ -217,24 +225,7 @@ where Ok(()) } - // ---- RPC HANDLER ---- - - fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) { - let self2 = self.clone(); - rpc_server.add_handler::<GcRpc, _, _>(path, move |msg, _addr| { - let self2 = self2.clone(); - async move { self2.handle_rpc(&msg).await } - }); - - let self2 = self.clone(); - self.rpc_client - .set_local_handler(self.system.id, move |msg| { - let self2 = self2.clone(); - async move { self2.handle_rpc(&msg).await } - }); - } - - async fn handle_rpc(self: &Arc<Self>, message: &GcRpc) -> Result<GcRpc, Error> { + async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> { match message { GcRpc::Update(items) => { self.data.update_many(items)?; @@ -251,3 +242,16 @@ where } } } + +#[async_trait] +impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R> +where + F: TableSchema + 'static, + R: TableReplication + 'static, +{ + async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc { + self.handle_rpc(message) + .await + .unwrap_or_else(|e| GcRpc::Error(format!("{}", e))) + } +} diff --git a/src/table/replication/fullcopy.rs b/src/table/replication/fullcopy.rs index 3ce7c0bf..b41c5360 100644 --- a/src/table/replication/fullcopy.rs +++ b/src/table/replication/fullcopy.rs @@ -1,7 +1,8 @@ use std::sync::Arc; -use garage_rpc::membership::System; use garage_rpc::ring::*; +use garage_rpc::system::System; +use garage_rpc::NodeID; use garage_util::data::*; use crate::replication::*; @@ -19,16 +20,20 @@ pub struct TableFullReplication { } impl TableReplication for TableFullReplication { - fn read_nodes(&self, _hash: &Hash) -> Vec<Uuid> { + fn read_nodes(&self, _hash: &Hash) -> Vec<NodeID> { vec![self.system.id] } fn read_quorum(&self) -> usize { 1 } - fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> { + fn write_nodes(&self, _hash: &Hash) -> Vec<NodeID> { let ring = self.system.ring.borrow(); - ring.config.members.keys().cloned().collect::<Vec<_>>() + ring.config + .members + .keys() + .map(|id| NodeID::from_slice(id.as_slice()).unwrap()) + .collect::<Vec<_>>() } fn write_quorum(&self) -> usize { let nmembers = self.system.ring.borrow().config.members.len(); diff --git a/src/table/replication/parameters.rs b/src/table/replication/parameters.rs index 64996828..7fdfce67 100644 --- a/src/table/replication/parameters.rs +++ b/src/table/replication/parameters.rs @@ -1,5 +1,5 @@ use garage_rpc::ring::*; - +use garage_rpc::NodeID; use garage_util::data::*; /// Trait to describe how a table shall be replicated @@ -8,12 +8,12 @@ pub trait TableReplication: Send + Sync { // To understand various replication methods /// Which nodes to send read requests to - fn read_nodes(&self, hash: &Hash) -> Vec<Uuid>; + fn read_nodes(&self, hash: &Hash) -> Vec<NodeID>; /// Responses needed to consider a read succesfull fn read_quorum(&self) -> usize; /// Which nodes to send writes to - fn write_nodes(&self, hash: &Hash) -> Vec<Uuid>; + fn write_nodes(&self, hash: &Hash) -> Vec<NodeID>; /// Responses needed to consider a write succesfull fn write_quorum(&self) -> usize; fn max_write_errors(&self) -> usize; diff --git a/src/table/replication/sharded.rs b/src/table/replication/sharded.rs index 8081b892..ffe686a5 100644 --- a/src/table/replication/sharded.rs +++ b/src/table/replication/sharded.rs @@ -1,7 +1,8 @@ use std::sync::Arc; -use garage_rpc::membership::System; use garage_rpc::ring::*; +use garage_rpc::system::System; +use garage_rpc::NodeID; use garage_util::data::*; use crate::replication::*; @@ -25,7 +26,7 @@ pub struct TableShardedReplication { } impl TableReplication for TableShardedReplication { - fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> { + fn read_nodes(&self, hash: &Hash) -> Vec<NodeID> { let ring = self.system.ring.borrow(); ring.get_nodes(&hash, self.replication_factor) } @@ -33,7 +34,7 @@ impl TableReplication for TableShardedReplication { self.read_quorum } - fn write_nodes(&self, hash: &Hash) -> Vec<Uuid> { + fn write_nodes(&self, hash: &Hash) -> Vec<NodeID> { let ring = self.system.ring.borrow(); ring.get_nodes(&hash, self.replication_factor) } diff --git a/src/table/sync.rs b/src/table/sync.rs index a3afbbba..c5db0987 100644 --- a/src/table/sync.rs +++ b/src/table/sync.rs @@ -2,6 +2,7 @@ use std::collections::VecDeque; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; +use async_trait::async_trait; use futures::select; use futures_util::future::*; use futures_util::stream::*; @@ -13,10 +14,9 @@ use tokio::sync::{mpsc, watch}; use garage_util::data::*; use garage_util::error::Error; -use garage_rpc::membership::System; use garage_rpc::ring::*; -use garage_rpc::rpc_client::*; -use garage_rpc::rpc_server::*; +use garage_rpc::system::System; +use garage_rpc::*; use crate::data::*; use crate::merkle::*; @@ -28,13 +28,13 @@ const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30); // Do anti-entropy every 10 minutes const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60); -pub struct TableSyncer<F: TableSchema, R: TableReplication> { +pub struct TableSyncer<F: TableSchema + 'static, R: TableReplication + 'static> { system: Arc<System>, data: Arc<TableData<F, R>>, merkle: Arc<MerkleUpdater<F, R>>, todo: Mutex<SyncTodo>, - rpc_client: Arc<RpcClient<SyncRpc>>, + endpoint: Arc<Endpoint<SyncRpc, Self>>, } #[derive(Serialize, Deserialize)] @@ -45,9 +45,12 @@ pub(crate) enum SyncRpc { Node(MerkleNodeKey, MerkleNode), Items(Vec<Arc<ByteBuf>>), Ok, + Error(String), } -impl RpcMessage for SyncRpc {} +impl Message for SyncRpc { + type Response = SyncRpc; +} struct SyncTodo { todo: Vec<TodoPartition>, @@ -72,10 +75,10 @@ where system: Arc<System>, data: Arc<TableData<F, R>>, merkle: Arc<MerkleUpdater<F, R>>, - rpc_server: &mut RpcServer, ) -> Arc<Self> { - let rpc_path = format!("table_{}/sync", data.name); - let rpc_client = system.rpc_client::<SyncRpc>(&rpc_path); + let endpoint = system + .netapp + .endpoint(format!("garage_table/sync.rs/Rpc:{}", data.name)); let todo = SyncTodo { todo: vec![] }; @@ -84,10 +87,10 @@ where data: data.clone(), merkle, todo: Mutex::new(todo), - rpc_client, + endpoint, }); - syncer.register_handler(rpc_server, rpc_path); + syncer.endpoint.set_handler(syncer.clone()); let (busy_tx, busy_rx) = mpsc::unbounded_channel(); @@ -112,21 +115,6 @@ where syncer } - fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) { - let self2 = self.clone(); - rpc_server.add_handler::<SyncRpc, _, _>(path, move |msg, _addr| { - let self2 = self2.clone(); - async move { self2.handle_rpc(&msg).await } - }); - - let self2 = self.clone(); - self.rpc_client - .set_local_handler(self.system.id, move |msg| { - let self2 = self2.clone(); - async move { self2.handle_rpc(&msg).await } - }); - } - async fn watcher_task( self: Arc<Self>, mut must_exit: watch::Receiver<bool>, @@ -317,15 +305,19 @@ where async fn offload_items( self: &Arc<Self>, items: &[(Vec<u8>, Arc<ByteBuf>)], - nodes: &[Uuid], + nodes: &[NodeID], ) -> Result<(), Error> { let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>(); - self.rpc_client + self.system + .rpc .try_call_many( + &self.endpoint, nodes, SyncRpc::Items(values), - RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_SYNC_RPC_TIMEOUT), + RequestStrategy::with_priority(PRIO_BACKGROUND) + .with_quorum(nodes.len()) + .with_timeout(TABLE_SYNC_RPC_TIMEOUT), ) .await?; @@ -362,7 +354,7 @@ where async fn do_sync_with( self: Arc<Self>, partition: TodoPartition, - who: Uuid, + who: NodeID, must_exit: watch::Receiver<bool>, ) -> Result<(), Error> { let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?; @@ -378,11 +370,14 @@ where // Check if they have the same root checksum // If so, do nothing. let root_resp = self - .rpc_client + .system + .rpc .call( + &self.endpoint, who, SyncRpc::RootCkHash(partition.partition, root_ck_hash), - TABLE_SYNC_RPC_TIMEOUT, + RequestStrategy::with_priority(PRIO_BACKGROUND) + .with_timeout(TABLE_SYNC_RPC_TIMEOUT), ) .await?; @@ -430,8 +425,15 @@ where // Get Merkle node for this tree position at remote node // and compare it with local node let remote_node = match self - .rpc_client - .call(who, SyncRpc::GetNode(key.clone()), TABLE_SYNC_RPC_TIMEOUT) + .system + .rpc + .call( + &self.endpoint, + who, + SyncRpc::GetNode(key.clone()), + RequestStrategy::with_priority(PRIO_BACKGROUND) + .with_timeout(TABLE_SYNC_RPC_TIMEOUT), + ) .await? { SyncRpc::Node(_, node) => node, @@ -478,7 +480,7 @@ where Ok(()) } - async fn send_items(&self, who: Uuid, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> { + async fn send_items(&self, who: NodeID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> { info!( "({}) Sending {} items to {:?}", self.data.name, @@ -492,8 +494,15 @@ where .collect::<Vec<_>>(); let rpc_resp = self - .rpc_client - .call(who, SyncRpc::Items(values), TABLE_SYNC_RPC_TIMEOUT) + .system + .rpc + .call( + &self.endpoint, + who, + SyncRpc::Items(values), + RequestStrategy::with_priority(PRIO_BACKGROUND) + .with_timeout(TABLE_SYNC_RPC_TIMEOUT), + ) .await?; if let SyncRpc::Ok = rpc_resp { Ok(()) @@ -506,7 +515,6 @@ where } // ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ====== - async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> { match message { SyncRpc::RootCkHash(range, h) => { @@ -527,6 +535,19 @@ where } } +#[async_trait] +impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R> +where + F: TableSchema + 'static, + R: TableReplication + 'static, +{ + async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> SyncRpc { + self.handle_rpc(message) + .await + .unwrap_or_else(|e| SyncRpc::Error(format!("{}", e))) + } +} + impl SyncTodo { fn add_full_sync<F: TableSchema, R: TableReplication>( &mut self, diff --git a/src/table/table.rs b/src/table/table.rs index eb9bd25c..ad263343 100644 --- a/src/table/table.rs +++ b/src/table/table.rs @@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap}; use std::sync::Arc; use std::time::Duration; +use async_trait::async_trait; use futures::stream::*; use serde::{Deserialize, Serialize}; use serde_bytes::ByteBuf; @@ -9,9 +10,8 @@ use serde_bytes::ByteBuf; use garage_util::data::*; use garage_util::error::Error; -use garage_rpc::membership::System; -use garage_rpc::rpc_client::*; -use garage_rpc::rpc_server::*; +use garage_rpc::system::System; +use garage_rpc::*; use crate::crdt::Crdt; use crate::data::*; @@ -23,17 +23,18 @@ use crate::sync::*; const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10); -pub struct Table<F: TableSchema, R: TableReplication> { +pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> { pub system: Arc<System>, pub data: Arc<TableData<F, R>>, pub merkle_updater: Arc<MerkleUpdater<F, R>>, pub syncer: Arc<TableSyncer<F, R>>, - rpc_client: Arc<RpcClient<TableRpc<F>>>, + endpoint: Arc<Endpoint<TableRpc<F>, Self>>, } #[derive(Serialize, Deserialize)] pub(crate) enum TableRpc<F: TableSchema> { Ok, + Error(String), ReadEntry(F::P, F::S), ReadEntryResponse(Option<ByteBuf>), @@ -44,7 +45,9 @@ pub(crate) enum TableRpc<F: TableSchema> { Update(Vec<Arc<ByteBuf>>), } -impl<F: TableSchema> RpcMessage for TableRpc<F> {} +impl<F: TableSchema> Message for TableRpc<F> { + type Response = TableRpc<F>; +} impl<F, R> Table<F, R> where @@ -59,32 +62,27 @@ where system: Arc<System>, db: &sled::Db, name: String, - rpc_server: &mut RpcServer, ) -> Arc<Self> { - let rpc_path = format!("table_{}", name); - let rpc_client = system.rpc_client::<TableRpc<F>>(&rpc_path); + let endpoint = system + .netapp + .endpoint(format!("garage_table/table.rs/Rpc:{}", name)); let data = TableData::new(system.clone(), name, instance, replication, db); let merkle_updater = MerkleUpdater::launch(&system.background, data.clone()); - let syncer = TableSyncer::launch( - system.clone(), - data.clone(), - merkle_updater.clone(), - rpc_server, - ); - TableGc::launch(system.clone(), data.clone(), rpc_server); + let syncer = TableSyncer::launch(system.clone(), data.clone(), merkle_updater.clone()); + TableGc::launch(system.clone(), data.clone()); let table = Arc::new(Self { system, data, merkle_updater, syncer, - rpc_client, + endpoint, }); - table.clone().register_handler(rpc_server, rpc_path); + table.endpoint.set_handler(table.clone()); table } @@ -97,11 +95,14 @@ where let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?)); let rpc = TableRpc::<F>::Update(vec![e_enc]); - self.rpc_client + self.system + .rpc .try_call_many( + &self.endpoint, &who[..], rpc, - RequestStrategy::with_quorum(self.data.replication.write_quorum()) + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(self.data.replication.write_quorum()) .with_timeout(TABLE_RPC_TIMEOUT), ) .await?; @@ -123,7 +124,16 @@ where let call_futures = call_list.drain().map(|(node, entries)| async move { let rpc = TableRpc::<F>::Update(entries); - let resp = self.rpc_client.call(node, rpc, TABLE_RPC_TIMEOUT).await?; + let resp = self + .system + .rpc + .call( + &self.endpoint, + node, + rpc, + RequestStrategy::with_priority(PRIO_NORMAL).with_timeout(TABLE_RPC_TIMEOUT), + ) + .await?; Ok::<_, Error>((node, resp)) }); let mut resps = call_futures.collect::<FuturesUnordered<_>>(); @@ -152,11 +162,14 @@ where let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone()); let resps = self - .rpc_client + .system + .rpc .try_call_many( + &self.endpoint, &who[..], rpc, - RequestStrategy::with_quorum(self.data.replication.read_quorum()) + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(self.data.replication.read_quorum()) .with_timeout(TABLE_RPC_TIMEOUT) .interrupt_after_quorum(true), ) @@ -208,11 +221,14 @@ where let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit); let resps = self - .rpc_client + .system + .rpc .try_call_many( + &self.endpoint, &who[..], rpc, - RequestStrategy::with_quorum(self.data.replication.read_quorum()) + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(self.data.replication.read_quorum()) .with_timeout(TABLE_RPC_TIMEOUT) .interrupt_after_quorum(true), ) @@ -261,36 +277,25 @@ where // =============== UTILITY FUNCTION FOR CLIENT OPERATIONS =============== - async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> { + async fn repair_on_read(&self, who: &[NodeID], what: F::E) -> Result<(), Error> { let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?)); - self.rpc_client + self.system + .rpc .try_call_many( + &self.endpoint, who, TableRpc::<F>::Update(vec![what_enc]), - RequestStrategy::with_quorum(who.len()).with_timeout(TABLE_RPC_TIMEOUT), + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(who.len()) + .with_timeout(TABLE_RPC_TIMEOUT), ) .await?; Ok(()) } - // =============== HANDLERS FOR RPC OPERATIONS (SERVER SIDE) ============== - - fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) { - let self2 = self.clone(); - rpc_server.add_handler::<TableRpc<F>, _, _>(path, move |msg, _addr| { - let self2 = self2.clone(); - async move { self2.handle(&msg).await } - }); - - let self2 = self.clone(); - self.rpc_client - .set_local_handler(self.system.id, move |msg| { - let self2 = self2.clone(); - async move { self2.handle(&msg).await } - }); - } - - async fn handle(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> { + // ====== RPC HANDLER ===== + // + async fn handle_rpc(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> { match msg { TableRpc::ReadEntry(key, sort_key) => { let value = self.data.read_entry(key, sort_key)?; @@ -308,3 +313,16 @@ where } } } + +#[async_trait] +impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R> +where + F: TableSchema + 'static, + R: TableReplication + 'static, +{ + async fn handle(self: &Arc<Self>, msg: &TableRpc<F>, _from: NodeID) -> TableRpc<F> { + self.handle_rpc(msg) + .await + .unwrap_or_else(|e| TableRpc::<F>::Error(format!("{}", e))) + } +} diff --git a/src/util/Cargo.toml b/src/util/Cargo.toml index 91e0b2b9..0f041074 100644 --- a/src/util/Cargo.toml +++ b/src/util/Cargo.toml @@ -32,7 +32,6 @@ toml = "0.5" futures = "0.3" tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } +netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" } http = "0.2" hyper = "0.14" -rustls = "0.19" -webpki = "0.21" diff --git a/src/util/config.rs b/src/util/config.rs index 46b918a9..ee153dfa 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -3,8 +3,11 @@ use std::io::Read; use std::net::SocketAddr; use std::path::PathBuf; +use serde::de::Error as SerdeError; use serde::{de, Deserialize}; +use netapp::NodeID; + use crate::error::Error; /// Represent the whole configuration @@ -26,20 +29,20 @@ pub struct Config { // (we can add more aliases for this later) pub replication_mode: String, + /// RPC secret key: 32 bytes hex encoded + pub rpc_secret: String, + /// Address to bind for RPC pub rpc_bind_addr: SocketAddr, /// Bootstrap peers RPC address #[serde(deserialize_with = "deserialize_vec_addr")] - pub bootstrap_peers: Vec<SocketAddr>, + pub bootstrap_peers: Vec<(NodeID, SocketAddr)>, /// Consule host to connect to to discover more peers pub consul_host: Option<String>, /// Consul service name to use pub consul_service_name: Option<String>, - /// Configuration for RPC TLS - pub rpc_tls: Option<TlsConfig>, - /// Max number of concurrent RPC request #[serde(default = "default_max_concurrent_rpc_requests")] pub max_concurrent_rpc_requests: usize, @@ -59,17 +62,6 @@ pub struct Config { pub s3_web: WebConfig, } -/// Configuration for RPC TLS -#[derive(Deserialize, Debug, Clone)] -pub struct TlsConfig { - /// Path to certificate autority used for all nodes - pub ca_cert: String, - /// Path to public certificate for this node - pub node_cert: String, - /// Path to private key for this node - pub node_key: String, -} - /// Configuration for S3 api #[derive(Deserialize, Debug, Clone)] pub struct ApiConfig { @@ -115,27 +107,32 @@ pub fn read_config(config_file: PathBuf) -> Result<Config, Error> { Ok(toml::from_str(&config)?) } -fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<SocketAddr>, D::Error> +fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<(NodeID, SocketAddr)>, D::Error> where D: de::Deserializer<'de>, { use std::net::ToSocketAddrs; - Ok(<Vec<&str>>::deserialize(deserializer)? - .iter() - .filter_map(|&name| { - name.to_socket_addrs() - .map(|iter| (name, iter)) - .map_err(|_| warn!("Error resolving \"{}\"", name)) - .ok() - }) - .map(|(name, iter)| { - let v = iter.collect::<Vec<_>>(); - if v.is_empty() { - warn!("Error resolving \"{}\"", name) - } - v - }) - .flatten() - .collect()) + let mut ret = vec![]; + + for peer in <Vec<&str>>::deserialize(deserializer)? { + let delim = peer + .find('@') + .ok_or_else(|| D::Error::custom("Invalid bootstrap peer: public key not specified"))?; + let (key, host) = peer.split_at(delim); + let pubkey = NodeID::from_slice(&hex::decode(&key).map_err(D::Error::custom)?) + .ok_or_else(|| D::Error::custom("Invalid bootstrap peer public key"))?; + let hosts = host[1..] + .to_socket_addrs() + .map_err(D::Error::custom)? + .collect::<Vec<_>>(); + if hosts.is_empty() { + return Err(D::Error::custom(format!("Error resolving {}", &host[1..]))); + } + for host in hosts { + ret.push((pubkey.clone(), host)); + } + } + + Ok(ret) } diff --git a/src/util/error.rs b/src/util/error.rs index c3d84e63..804a0d4d 100644 --- a/src/util/error.rs +++ b/src/util/error.rs @@ -11,8 +11,8 @@ pub enum RpcError { #[error(display = "Node is down: {:?}.", _0)] NodeDown(Uuid), - #[error(display = "Timeout: {}", _0)] - Timeout(#[error(source)] tokio::time::error::Elapsed), + #[error(display = "Timeout")] + Timeout, #[error(display = "HTTP error: {}", _0)] Http(#[error(source)] http::Error), @@ -45,11 +45,8 @@ pub enum Error { #[error(display = "Invalid HTTP header value: {}", _0)] HttpHeader(#[error(source)] http::header::ToStrError), - #[error(display = "TLS error: {}", _0)] - Tls(#[error(source)] rustls::TLSError), - - #[error(display = "PKI error: {}", _0)] - Pki(#[error(source)] webpki::Error), + #[error(display = "Netapp error: {}", _0)] + Netapp(#[error(source)] netapp::error::Error), #[error(display = "Sled error: {}", _0)] Sled(#[error(source)] sled::Error), |