diff options
author | Alex Auvolat <alex@adnab.me> | 2021-03-18 19:27:02 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2021-03-18 19:27:02 +0100 |
commit | 4348bde180887f5185ca6da6024476e8e8fb2fe6 (patch) | |
tree | 8a35f0e4229d15665af7673eebcaa1b3d75a73cb /src/garage | |
parent | 5b659b28ce6bef15072d2fc93f777aa8ff73b2d8 (diff) | |
parent | 4eb16e886388f35d2bdee52b16922421004cf132 (diff) | |
download | garage-4348bde180887f5185ca6da6024476e8e8fb2fe6.tar.gz garage-4348bde180887f5185ca6da6024476e8e8fb2fe6.zip |
Merge branch 'dev-0.2'
Diffstat (limited to 'src/garage')
-rw-r--r-- | src/garage/Cargo.toml | 13 | ||||
-rw-r--r-- | src/garage/admin_rpc.rs | 189 | ||||
-rw-r--r-- | src/garage/cli.rs | 552 | ||||
-rw-r--r-- | src/garage/main.rs | 512 | ||||
-rw-r--r-- | src/garage/repair.rs | 83 | ||||
-rw-r--r-- | src/garage/server.rs | 63 |
6 files changed, 793 insertions, 619 deletions
diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 03bc472d..c1817bf2 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -21,21 +21,20 @@ garage_model = { version = "0.1.1", path = "../model" } garage_api = { version = "0.1.1", path = "../api" } garage_web = { version = "0.1.1", path = "../web" } -bytes = "0.4" -rand = "0.7" -hex = "0.3" -sha2 = "0.8" +bytes = "1.0" +rand = "0.8" +hex = "0.4" log = "0.4" pretty_env_logger = "0.4" +git-version = "0.3.4" sled = "0.34" -old_sled = { package = "sled", version = "0.31" } structopt = { version = "0.3", default-features = false } toml = "0.5" -rmp-serde = "0.14.3" +rmp-serde = "0.15" serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } futures = "0.3" futures-util = "0.3" -tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] } +tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } diff --git a/src/garage/admin_rpc.rs b/src/garage/admin_rpc.rs index e1981e3a..df00fcaf 100644 --- a/src/garage/admin_rpc.rs +++ b/src/garage/admin_rpc.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; +use std::fmt::Write; use std::sync::Arc; use serde::{Deserialize, Serialize}; @@ -5,6 +7,7 @@ use serde::{Deserialize, Serialize}; use garage_util::error::Error; use garage_table::crdt::CRDT; +use garage_table::replication::*; use garage_table::*; use garage_rpc::rpc_client::*; @@ -14,6 +17,7 @@ use garage_model::bucket_table::*; use garage_model::garage::Garage; use garage_model::key_table::*; +use crate::cli::*; use crate::repair::Repair; use crate::*; @@ -25,6 +29,7 @@ pub enum AdminRPC { BucketOperation(BucketOperation), KeyOperation(KeyOperation), LaunchRepair(RepairOpt), + Stats(StatsOpt), // Replies Ok(String), @@ -55,6 +60,7 @@ impl AdminRpcHandler { AdminRPC::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await, AdminRPC::KeyOperation(ko) => self2.handle_key_cmd(ko).await, AdminRPC::LaunchRepair(opt) => self2.handle_launch_repair(opt).await, + AdminRPC::Stats(opt) => self2.handle_stats(opt).await, _ => Err(Error::BadRPC(format!("Invalid RPC"))), } } @@ -116,7 +122,7 @@ impl AdminRpcHandler { for (key_id, _, _) in bucket.authorized_keys() { if let Some(key) = self.garage.key_table.get(&EmptyKey, key_id).await? { if !key.deleted.get() { - self.update_key_bucket(key, &bucket.name, false, false) + self.update_key_bucket(&key, &bucket.name, false, false) .await?; } } else { @@ -128,31 +134,31 @@ impl AdminRpcHandler { Ok(AdminRPC::Ok(format!("Bucket {} was deleted.", query.name))) } BucketOperation::Allow(query) => { - let key = self.get_existing_key(&query.key_id).await?; + let key = self.get_existing_key(&query.key_pattern).await?; let bucket = self.get_existing_bucket(&query.bucket).await?; let allow_read = query.read || key.allow_read(&query.bucket); let allow_write = query.write || key.allow_write(&query.bucket); - self.update_key_bucket(key, &query.bucket, allow_read, allow_write) + self.update_key_bucket(&key, &query.bucket, allow_read, allow_write) .await?; - self.update_bucket_key(bucket, &query.key_id, allow_read, allow_write) + self.update_bucket_key(bucket, &key.key_id, allow_read, allow_write) .await?; Ok(AdminRPC::Ok(format!( "New permissions for {} on {}: read {}, write {}.", - &query.key_id, &query.bucket, allow_read, allow_write + &key.key_id, &query.bucket, allow_read, allow_write ))) } BucketOperation::Deny(query) => { - let key = self.get_existing_key(&query.key_id).await?; + let key = self.get_existing_key(&query.key_pattern).await?; let bucket = self.get_existing_bucket(&query.bucket).await?; let allow_read = !query.read && key.allow_read(&query.bucket); let allow_write = !query.write && key.allow_write(&query.bucket); - self.update_key_bucket(key, &query.bucket, allow_read, allow_write) + self.update_key_bucket(&key, &query.bucket, allow_read, allow_write) .await?; - self.update_bucket_key(bucket, &query.key_id, allow_read, allow_write) + self.update_bucket_key(bucket, &key.key_id, allow_read, allow_write) .await?; Ok(AdminRPC::Ok(format!( "New permissions for {} on {}: read {}, write {}.", - &query.key_id, &query.bucket, allow_read, allow_write + &key.key_id, &query.bucket, allow_read, allow_write ))) } BucketOperation::Website(query) => { @@ -187,7 +193,12 @@ impl AdminRpcHandler { let key_ids = self .garage .key_table - .get_range(&EmptyKey, None, Some(DeletedFilter::NotDeleted), 10000) + .get_range( + &EmptyKey, + None, + Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)), + 10000, + ) .await? .iter() .map(|k| (k.key_id.to_string(), k.name.get().clone())) @@ -195,7 +206,7 @@ impl AdminRpcHandler { Ok(AdminRPC::KeyList(key_ids)) } KeyOperation::Info(query) => { - let key = self.get_existing_key(&query.key_id).await?; + let key = self.get_existing_key(&query.key_pattern).await?; Ok(AdminRPC::KeyInfo(key)) } KeyOperation::New(query) => { @@ -204,13 +215,13 @@ impl AdminRpcHandler { Ok(AdminRPC::KeyInfo(key)) } KeyOperation::Rename(query) => { - let mut key = self.get_existing_key(&query.key_id).await?; + let mut key = self.get_existing_key(&query.key_pattern).await?; key.name.update(query.new_name); self.garage.key_table.insert(&key).await?; Ok(AdminRPC::KeyInfo(key)) } KeyOperation::Delete(query) => { - let key = self.get_existing_key(&query.key_id).await?; + let key = self.get_existing_key(&query.key_pattern).await?; if !query.yes { return Err(Error::BadRPC(format!( "Add --yes flag to really perform this operation" @@ -227,13 +238,24 @@ impl AdminRpcHandler { return Err(Error::Message(format!("Bucket not found: {}", ab_name))); } } - let del_key = Key::delete(key.key_id); + let del_key = Key::delete(key.key_id.to_string()); self.garage.key_table.insert(&del_key).await?; Ok(AdminRPC::Ok(format!( "Key {} was deleted successfully.", - query.key_id + key.key_id ))) } + KeyOperation::Import(query) => { + let prev_key = self.garage.key_table.get(&EmptyKey, &query.key_id) + .await?; + if prev_key.is_some() { + return Err(Error::Message(format!("Key {} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.", query.key_id))); + } + let imported_key = Key::import(&query.key_id, &query.secret_key, &query.name); + self.garage.key_table.insert(&imported_key).await?; + Ok(AdminRPC::KeyInfo(imported_key)) + + } } } @@ -250,14 +272,28 @@ impl AdminRpcHandler { )))) } - async fn get_existing_key(&self, id: &String) -> Result<Key, Error> { - self.garage + async fn get_existing_key(&self, pattern: &str) -> Result<Key, Error> { + let candidates = self + .garage .key_table - .get(&EmptyKey, id) + .get_range( + &EmptyKey, + None, + Some(KeyFilter::Matches(pattern.to_string())), + 10, + ) .await? + .into_iter() .filter(|k| !k.deleted.get()) - .map(Ok) - .unwrap_or(Err(Error::BadRPC(format!("Key {} does not exist", id)))) + .collect::<Vec<_>>(); + if candidates.len() != 1 { + Err(Error::Message(format!( + "{} matching keys", + candidates.len() + ))) + } else { + Ok(candidates.into_iter().next().unwrap()) + } } /// Update **bucket table** to inform of the new linked key @@ -290,11 +326,12 @@ impl AdminRpcHandler { /// Update **key table** to inform of the new linked bucket async fn update_key_bucket( &self, - mut key: Key, + key: &Key, bucket: &String, allow_read: bool, allow_write: bool, ) -> Result<(), Error> { + let mut key = key.clone(); let old_map = key.authorized_buckets.take_and_clear(); key.authorized_buckets.merge(&old_map.update_mutator( bucket.clone(), @@ -350,12 +387,118 @@ impl AdminRpcHandler { .background .spawn_worker("Repair worker".into(), move |must_exit| async move { repair.repair_worker(opt, must_exit).await - }) - .await; + }); Ok(AdminRPC::Ok(format!( "Repair launched on {:?}", self.garage.system.id ))) } } + + async fn handle_stats(&self, opt: StatsOpt) -> Result<AdminRPC, Error> { + if opt.all_nodes { + let mut ret = String::new(); + let ring = self.garage.system.ring.borrow().clone(); + + for node in ring.config.members.keys() { + let mut opt = opt.clone(); + opt.all_nodes = false; + + writeln!(&mut ret, "\n======================").unwrap(); + writeln!(&mut ret, "Stats for node {:?}:", node).unwrap(); + match self + .rpc_client + .call(*node, AdminRPC::Stats(opt), ADMIN_RPC_TIMEOUT) + .await + { + Ok(AdminRPC::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(), + Ok(x) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(), + Err(e) => writeln!(&mut ret, "Error: {}", e).unwrap(), + } + } + Ok(AdminRPC::Ok(ret)) + } else { + Ok(AdminRPC::Ok(self.gather_stats_local(opt)?)) + } + } + + fn gather_stats_local(&self, opt: StatsOpt) -> Result<String, Error> { + let mut ret = String::new(); + writeln!( + &mut ret, + "\nGarage version: {}", + git_version::git_version!() + ) + .unwrap(); + + // Gather ring statistics + let ring = self.garage.system.ring.borrow().clone(); + let mut ring_nodes = HashMap::new(); + for r in ring.ring.iter() { + for n in r.nodes.iter() { + if !ring_nodes.contains_key(n) { + ring_nodes.insert(*n, 0usize); + } + *ring_nodes.get_mut(n).unwrap() += 1; + } + } + writeln!(&mut ret, "\nRing nodes & partition count:").unwrap(); + for (n, c) in ring_nodes.iter() { + writeln!(&mut ret, " {:?} {}", n, c).unwrap(); + } + + self.gather_table_stats(&mut ret, &self.garage.bucket_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.key_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.object_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.version_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.block_ref_table, &opt)?; + + writeln!(&mut ret, "\nBlock manager stats:").unwrap(); + if opt.detailed { + writeln!( + &mut ret, + " number of blocks: {}", + self.garage.block_manager.rc_len() + ) + .unwrap(); + } + writeln!( + &mut ret, + " resync queue length: {}", + self.garage.block_manager.resync_queue_len() + ) + .unwrap(); + + Ok(ret) + } + + fn gather_table_stats<F, R>( + &self, + to: &mut String, + t: &Arc<Table<F, R>>, + opt: &StatsOpt, + ) -> Result<(), Error> + where + F: TableSchema + 'static, + R: TableReplication + 'static, + { + writeln!(to, "\nTable stats for {}", t.data.name).unwrap(); + if opt.detailed { + writeln!(to, " number of items: {}", t.data.store.len()).unwrap(); + writeln!( + to, + " Merkle tree size: {}", + t.merkle_updater.merkle_tree_len() + ) + .unwrap(); + } + writeln!( + to, + " Merkle updater todo queue length: {}", + t.merkle_updater.todo_len() + ) + .unwrap(); + writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap(); + Ok(()) + } } diff --git a/src/garage/cli.rs b/src/garage/cli.rs new file mode 100644 index 00000000..21bafebd --- /dev/null +++ b/src/garage/cli.rs @@ -0,0 +1,552 @@ +use std::collections::HashSet; +use std::net::SocketAddr; +use std::path::PathBuf; + +use serde::{Deserialize, Serialize}; +use structopt::StructOpt; + +use garage_util::error::Error; +use garage_util::time::*; + +use garage_rpc::membership::*; +use garage_rpc::ring::*; +use garage_rpc::rpc_client::*; + +use garage_model::bucket_table::*; +use garage_model::key_table::*; + +use crate::admin_rpc::*; + +#[derive(StructOpt, Debug)] +pub enum Command { + /// Run Garage server + #[structopt(name = "server")] + Server(ServerOpt), + + /// Get network status + #[structopt(name = "status")] + Status, + + /// Garage node operations + #[structopt(name = "node")] + Node(NodeOperation), + + /// Bucket operations + #[structopt(name = "bucket")] + Bucket(BucketOperation), + + /// Key operations + #[structopt(name = "key")] + Key(KeyOperation), + + /// Start repair of node data + #[structopt(name = "repair")] + Repair(RepairOpt), + + /// Gather node statistics + #[structopt(name = "stats")] + Stats(StatsOpt), +} + +#[derive(StructOpt, Debug)] +pub struct ServerOpt { + /// Configuration file + #[structopt(short = "c", long = "config", default_value = "./config.toml")] + pub config_file: PathBuf, +} + +#[derive(StructOpt, Debug)] +pub enum NodeOperation { + /// Configure Garage node + #[structopt(name = "configure")] + Configure(ConfigureNodeOpt), + + /// Remove Garage node from cluster + #[structopt(name = "remove")] + Remove(RemoveNodeOpt), +} + +#[derive(StructOpt, Debug)] +pub struct ConfigureNodeOpt { + /// Node to configure (prefix of hexadecimal node id) + node_id: String, + + /// Location (datacenter) of the node + #[structopt(short = "d", long = "datacenter")] + datacenter: Option<String>, + + /// Capacity (in relative terms, use 1 to represent your smallest server) + #[structopt(short = "c", long = "capacity")] + capacity: Option<u32>, + + /// Optionnal node tag + #[structopt(short = "t", long = "tag")] + tag: Option<String>, +} + +#[derive(StructOpt, Debug)] +pub struct RemoveNodeOpt { + /// Node to configure (prefix of hexadecimal node id) + node_id: String, + + /// If this flag is not given, the node won't be removed + #[structopt(long = "yes")] + yes: bool, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub enum BucketOperation { + /// List buckets + #[structopt(name = "list")] + List, + + /// Get bucket info + #[structopt(name = "info")] + Info(BucketOpt), + + /// Create bucket + #[structopt(name = "create")] + Create(BucketOpt), + + /// Delete bucket + #[structopt(name = "delete")] + Delete(DeleteBucketOpt), + + /// Allow key to read or write to bucket + #[structopt(name = "allow")] + Allow(PermBucketOpt), + + /// Allow key to read or write to bucket + #[structopt(name = "deny")] + Deny(PermBucketOpt), + + /// Expose as website or not + #[structopt(name = "website")] + Website(WebsiteOpt), +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct WebsiteOpt { + /// Create + #[structopt(long = "allow")] + pub allow: bool, + + /// Delete + #[structopt(long = "deny")] + pub deny: bool, + + /// Bucket name + pub bucket: String, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct BucketOpt { + /// Bucket name + pub name: String, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct DeleteBucketOpt { + /// Bucket name + pub name: String, + + /// If this flag is not given, the bucket won't be deleted + #[structopt(long = "yes")] + pub yes: bool, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct PermBucketOpt { + /// Access key name or ID + #[structopt(long = "key")] + pub key_pattern: String, + + /// Allow/deny read operations + #[structopt(long = "read")] + pub read: bool, + + /// Allow/deny write operations + #[structopt(long = "write")] + pub write: bool, + + /// Bucket name + pub bucket: String, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub enum KeyOperation { + /// List keys + #[structopt(name = "list")] + List, + + /// Get key info + #[structopt(name = "info")] + Info(KeyOpt), + + /// Create new key + #[structopt(name = "new")] + New(KeyNewOpt), + + /// Rename key + #[structopt(name = "rename")] + Rename(KeyRenameOpt), + + /// Delete key + #[structopt(name = "delete")] + Delete(KeyDeleteOpt), + + /// Import key + #[structopt(name = "import")] + Import(KeyImportOpt), +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct KeyOpt { + /// ID or name of the key + pub key_pattern: String, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct KeyNewOpt { + /// Name of the key + #[structopt(long = "name", default_value = "Unnamed key")] + pub name: String, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct KeyRenameOpt { + /// ID or name of the key + pub key_pattern: String, + + /// New name of the key + pub new_name: String, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct KeyDeleteOpt { + /// ID or name of the key + pub key_pattern: String, + + /// Confirm deletion + #[structopt(long = "yes")] + pub yes: bool, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug)] +pub struct KeyImportOpt { + /// Access key ID + pub key_id: String, + + /// Secret access key + pub secret_key: String, + + /// Key name + #[structopt(short = "n", default_value = "Imported key")] + pub name: String, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] +pub struct RepairOpt { + /// Launch repair operation on all nodes + #[structopt(short = "a", long = "all-nodes")] + pub all_nodes: bool, + + /// Confirm the launch of the repair operation + #[structopt(long = "yes")] + pub yes: bool, + + #[structopt(subcommand)] + pub what: Option<RepairWhat>, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] +pub enum RepairWhat { + /// Only do a full sync of metadata tables + #[structopt(name = "tables")] + Tables, + /// Only repair (resync/rebalance) the set of stored blocks + #[structopt(name = "blocks")] + Blocks, + /// Only redo the propagation of object deletions to the version table (slow) + #[structopt(name = "versions")] + Versions, + /// Only redo the propagation of version deletions to the block ref table (extremely slow) + #[structopt(name = "block_refs")] + BlockRefs, +} + +#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] +pub struct StatsOpt { + /// Gather statistics from all nodes + #[structopt(short = "a", long = "all-nodes")] + pub all_nodes: bool, + + /// Gather detailed statistics (this can be long) + #[structopt(short = "d", long = "detailed")] + pub detailed: bool, +} + +pub async fn cli_cmd( + cmd: Command, + membership_rpc_cli: RpcAddrClient<Message>, + admin_rpc_cli: RpcAddrClient<AdminRPC>, + rpc_host: SocketAddr, +) -> Result<(), Error> { + match cmd { + Command::Status => cmd_status(membership_rpc_cli, rpc_host).await, + Command::Node(NodeOperation::Configure(configure_opt)) => { + cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await + } + Command::Node(NodeOperation::Remove(remove_opt)) => { + cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await + } + Command::Bucket(bo) => { + cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::BucketOperation(bo)).await + } + Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::KeyOperation(ko)).await, + Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::LaunchRepair(ro)).await, + Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::Stats(so)).await, + _ => unreachable!(), + } +} + +pub async fn cmd_status( + rpc_cli: RpcAddrClient<Message>, + rpc_host: SocketAddr, +) -> Result<(), Error> { + let status = match rpc_cli + .call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) + .await?? + { + Message::AdvertiseNodesUp(nodes) => nodes, + resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + }; + let config = match rpc_cli + .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) + .await?? + { + Message::AdvertiseConfig(cfg) => cfg, + resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + }; + + println!("Healthy nodes:"); + for adv in status.iter().filter(|x| x.is_up) { + if let Some(cfg) = config.members.get(&adv.id) { + println!( + "{:?}\t{}\t{}\t[{}]\t{}\t{}", + adv.id, adv.state_info.hostname, adv.addr, cfg.tag, cfg.datacenter, cfg.capacity + ); + } else { + println!( + "{:?}\t{}\t{}\tUNCONFIGURED/REMOVED", + adv.id, adv.state_info.hostname, adv.addr + ); + } + } + + let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>(); + let failure_case_1 = status.iter().any(|x| !x.is_up); + let failure_case_2 = config + .members + .iter() + .any(|(id, _)| !status_keys.contains(id)); + if failure_case_1 || failure_case_2 { + println!("\nFailed nodes:"); + for adv in status.iter().filter(|x| !x.is_up) { + if let Some(cfg) = config.members.get(&adv.id) { + println!( + "{:?}\t{}\t{}\t[{}]\t{}\t{}\tlast seen: {}s ago", + adv.id, + adv.state_info.hostname, + adv.addr, + cfg.tag, + cfg.datacenter, + cfg.capacity, + (now_msec() - adv.last_seen) / 1000, + ); + } + } + for (id, cfg) in config.members.iter() { + if !status.iter().any(|x| x.id == *id) { + println!( + "{:?}\t{}\t{}\t{}\tnever seen", + id, cfg.tag, cfg.datacenter, cfg.capacity + ); + } + } + } + + Ok(()) +} + +pub async fn cmd_configure( + rpc_cli: RpcAddrClient<Message>, + rpc_host: SocketAddr, + args: ConfigureNodeOpt, +) -> Result<(), Error> { + let status = match rpc_cli + .call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) + .await?? + { + Message::AdvertiseNodesUp(nodes) => nodes, + resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + }; + + let mut candidates = vec![]; + for adv in status.iter() { + if hex::encode(&adv.id).starts_with(&args.node_id) { + candidates.push(adv.id); + } + } + if candidates.len() != 1 { + return Err(Error::Message(format!( + "{} matching nodes", + candidates.len() + ))); + } + + let mut config = match rpc_cli + .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) + .await?? + { + Message::AdvertiseConfig(cfg) => cfg, + resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + }; + + let new_entry = match config.members.get(&candidates[0]) { + None => NetworkConfigEntry { + datacenter: args + .datacenter + .expect("Please specifiy a datacenter with the -d flag"), + capacity: args + .capacity + .expect("Please specifiy a capacity with the -c flag"), + tag: args.tag.unwrap_or("".to_string()), + }, + Some(old) => NetworkConfigEntry { + datacenter: args.datacenter.unwrap_or(old.datacenter.to_string()), + capacity: args.capacity.unwrap_or(old.capacity), + tag: args.tag.unwrap_or(old.tag.to_string()), + }, + }; + + config.members.insert(candidates[0].clone(), new_entry); + config.version += 1; + + rpc_cli + .call( + &rpc_host, + &Message::AdvertiseConfig(config), + ADMIN_RPC_TIMEOUT, + ) + .await??; + Ok(()) +} + +pub async fn cmd_remove( + rpc_cli: RpcAddrClient<Message>, + rpc_host: SocketAddr, + args: RemoveNodeOpt, +) -> Result<(), Error> { + let mut config = match rpc_cli + .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) + .await?? + { + Message::AdvertiseConfig(cfg) => cfg, + resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + }; + + let mut candidates = vec![]; + for (key, _) in config.members.iter() { + if hex::encode(key).starts_with(&args.node_id) { + candidates.push(*key); + } + } + if candidates.len() != 1 { + return Err(Error::Message(format!( + "{} matching nodes", + candidates.len() + ))); + } + + if !args.yes { + return Err(Error::Message(format!( + "Add the flag --yes to really remove {:?} from the cluster", + candidates[0] + ))); + } + + config.members.remove(&candidates[0]); + config.version += 1; + + rpc_cli + .call( + &rpc_host, + &Message::AdvertiseConfig(config), + ADMIN_RPC_TIMEOUT, + ) + .await??; + Ok(()) +} + +pub async fn cmd_admin( + rpc_cli: RpcAddrClient<AdminRPC>, + rpc_host: SocketAddr, + args: AdminRPC, +) -> Result<(), Error> { + match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? { + AdminRPC::Ok(msg) => { + println!("{}", msg); + } + AdminRPC::BucketList(bl) => { + println!("List of buckets:"); + for bucket in bl { + println!("{}", bucket); + } + } + AdminRPC::BucketInfo(bucket) => { + print_bucket_info(&bucket); + } + AdminRPC::KeyList(kl) => { + println!("List of keys:"); + for key in kl { + println!("{}\t{}", key.0, key.1); + } + } + AdminRPC::KeyInfo(key) => { + print_key_info(&key); + } + r => { + error!("Unexpected response: {:?}", r); + } + } + Ok(()) +} + +fn print_key_info(key: &Key) { + println!("Key name: {}", key.name.get()); + println!("Key ID: {}", key.key_id); + println!("Secret key: {}", key.secret_key); + if key.deleted.get() { + println!("Key is deleted."); + } else { + println!("Authorized buckets:"); + for (b, _, perm) in key.authorized_buckets.items().iter() { + println!("- {} R:{} W:{}", b, perm.allow_read, perm.allow_write); + } + } +} + +fn print_bucket_info(bucket: &Bucket) { + println!("Bucket name: {}", bucket.name); + match bucket.state.get() { + BucketState::Deleted => println!("Bucket is deleted."), + BucketState::Present(p) => { + println!("Authorized keys:"); + for (k, _, perm) in p.authorized_keys.items().iter() { + println!("- {} R:{} W:{}", k, perm.allow_read, perm.allow_write); + } + println!("Website access: {}", p.website.get()); + } + }; +} diff --git a/src/garage/main.rs b/src/garage/main.rs index 8757a1bb..6c86d0fb 100644 --- a/src/garage/main.rs +++ b/src/garage/main.rs @@ -4,289 +4,67 @@ extern crate log; mod admin_rpc; +mod cli; mod repair; mod server; -use std::collections::HashSet; use std::net::SocketAddr; -use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; -use serde::{Deserialize, Serialize}; use structopt::StructOpt; use garage_util::config::TlsConfig; -use garage_util::data::*; use garage_util::error::Error; use garage_rpc::membership::*; -use garage_rpc::ring::*; use garage_rpc::rpc_client::*; use admin_rpc::*; +use cli::*; #[derive(StructOpt, Debug)] #[structopt(name = "garage")] pub struct Opt { /// RPC connect to this host to execute client operations #[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901")] - rpc_host: SocketAddr, + pub rpc_host: SocketAddr, #[structopt(long = "ca-cert")] - ca_cert: Option<String>, + pub ca_cert: Option<String>, #[structopt(long = "client-cert")] - client_cert: Option<String>, + pub client_cert: Option<String>, #[structopt(long = "client-key")] - client_key: Option<String>, + pub client_key: Option<String>, #[structopt(subcommand)] cmd: Command, } -#[derive(StructOpt, Debug)] -pub enum Command { - /// Run Garage server - #[structopt(name = "server")] - Server(ServerOpt), - - /// Get network status - #[structopt(name = "status")] - Status, - - /// Garage node operations - #[structopt(name = "node")] - Node(NodeOperation), - - /// Bucket operations - #[structopt(name = "bucket")] - Bucket(BucketOperation), - - /// Key operations - #[structopt(name = "key")] - Key(KeyOperation), - - /// Start repair of node data - #[structopt(name = "repair")] - Repair(RepairOpt), -} - -#[derive(StructOpt, Debug)] -pub struct ServerOpt { - /// Configuration file - #[structopt(short = "c", long = "config", default_value = "./config.toml")] - config_file: PathBuf, -} - -#[derive(StructOpt, Debug)] -pub enum NodeOperation { - /// Configure Garage node - #[structopt(name = "configure")] - Configure(ConfigureNodeOpt), - - /// Remove Garage node from cluster - #[structopt(name = "remove")] - Remove(RemoveNodeOpt), -} - -#[derive(StructOpt, Debug)] -pub struct ConfigureNodeOpt { - /// Node to configure (prefix of hexadecimal node id) - node_id: String, - - /// Location (datacenter) of the node - #[structopt(short = "d", long = "datacenter")] - datacenter: Option<String>, - - /// Capacity (in relative terms, use 1 to represent your smallest server) - #[structopt(short = "c", long = "capacity")] - capacity: Option<u32>, - - /// Optionnal node tag - #[structopt(short = "t", long = "tag")] - tag: Option<String>, -} - -#[derive(StructOpt, Debug)] -pub struct RemoveNodeOpt { - /// Node to configure (prefix of hexadecimal node id) - node_id: String, - - /// If this flag is not given, the node won't be removed - #[structopt(long = "yes")] - yes: bool, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub enum BucketOperation { - /// List buckets - #[structopt(name = "list")] - List, - - /// Get bucket info - #[structopt(name = "info")] - Info(BucketOpt), - - /// Create bucket - #[structopt(name = "create")] - Create(BucketOpt), - - /// Delete bucket - #[structopt(name = "delete")] - Delete(DeleteBucketOpt), - - /// Allow key to read or write to bucket - #[structopt(name = "allow")] - Allow(PermBucketOpt), - - /// Allow key to read or write to bucket - #[structopt(name = "deny")] - Deny(PermBucketOpt), - - /// Expose as website or not - #[structopt(name = "website")] - Website(WebsiteOpt), -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct WebsiteOpt { - /// Create - #[structopt(long = "allow")] - pub allow: bool, - - /// Delete - #[structopt(long = "deny")] - pub deny: bool, - - /// Bucket name - pub bucket: String, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct BucketOpt { - /// Bucket name - pub name: String, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct DeleteBucketOpt { - /// Bucket name - pub name: String, - - /// If this flag is not given, the bucket won't be deleted - #[structopt(long = "yes")] - pub yes: bool, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct PermBucketOpt { - /// Access key ID - #[structopt(long = "key")] - pub key_id: String, - - /// Allow/deny read operations - #[structopt(long = "read")] - pub read: bool, - - /// Allow/deny write operations - #[structopt(long = "write")] - pub write: bool, - - /// Bucket name - pub bucket: String, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub enum KeyOperation { - /// List keys - #[structopt(name = "list")] - List, - - /// Get key info - #[structopt(name = "info")] - Info(KeyOpt), - - /// Create new key - #[structopt(name = "new")] - New(KeyNewOpt), - - /// Rename key - #[structopt(name = "rename")] - Rename(KeyRenameOpt), - - /// Delete key - #[structopt(name = "delete")] - Delete(KeyDeleteOpt), -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct KeyOpt { - /// ID of the key - key_id: String, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct KeyNewOpt { - /// Name of the key - #[structopt(long = "name", default_value = "Unnamed key")] - name: String, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct KeyRenameOpt { - /// ID of the key - key_id: String, - - /// New name of the key - new_name: String, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug)] -pub struct KeyDeleteOpt { - /// ID of the key - key_id: String, - - /// Confirm deletion - #[structopt(long = "yes")] - yes: bool, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] -pub struct RepairOpt { - /// Launch repair operation on all nodes - #[structopt(short = "a", long = "all-nodes")] - pub all_nodes: bool, - - /// Confirm the launch of the repair operation - #[structopt(long = "yes")] - pub yes: bool, - - #[structopt(subcommand)] - pub what: Option<RepairWhat>, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] -pub enum RepairWhat { - /// Only do a full sync of metadata tables - #[structopt(name = "tables")] - Tables, - /// Only repair (resync/rebalance) the set of stored blocks - #[structopt(name = "blocks")] - Blocks, - /// Only redo the propagation of object deletions to the version table (slow) - #[structopt(name = "versions")] - Versions, - /// Only redo the propagation of version deletions to the block ref table (extremely slow) - #[structopt(name = "block_refs")] - BlockRefs, -} - #[tokio::main] async fn main() { pretty_env_logger::init(); let opt = Opt::from_args(); + let res = if let Command::Server(server_opt) = opt.cmd { + // Abort on panic (same behavior as in Go) + std::panic::set_hook(Box::new(|panic_info| { + error!("{}", panic_info.to_string()); + std::process::abort(); + })); + + server::run_server(server_opt.config_file).await + } else { + cli_command(opt).await + }; + + if let Err(e) = res { + error!("{}", e); + } +} + +async fn cli_command(opt: Opt) -> Result<(), Error> { let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) { (Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig { ca_cert, @@ -306,245 +84,5 @@ async fn main() { RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string()); let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string()); - let resp = match opt.cmd { - Command::Server(server_opt) => { - // Abort on panic (same behavior as in Go) - std::panic::set_hook(Box::new(|panic_info| { - error!("{}", panic_info.to_string()); - std::process::abort(); - })); - - server::run_server(server_opt.config_file).await - } - Command::Status => cmd_status(membership_rpc_cli, opt.rpc_host).await, - Command::Node(NodeOperation::Configure(configure_opt)) => { - cmd_configure(membership_rpc_cli, opt.rpc_host, configure_opt).await - } - Command::Node(NodeOperation::Remove(remove_opt)) => { - cmd_remove(membership_rpc_cli, opt.rpc_host, remove_opt).await - } - Command::Bucket(bo) => { - cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::BucketOperation(bo)).await - } - Command::Key(bo) => { - cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::KeyOperation(bo)).await - } - Command::Repair(ro) => { - cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::LaunchRepair(ro)).await - } - }; - - if let Err(e) = resp { - error!("Error: {}", e); - } -} - -async fn cmd_status(rpc_cli: RpcAddrClient<Message>, rpc_host: SocketAddr) -> Result<(), Error> { - let status = match rpc_cli - .call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) - .await?? - { - Message::AdvertiseNodesUp(nodes) => nodes, - resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), - }; - let config = match rpc_cli - .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) - .await?? - { - Message::AdvertiseConfig(cfg) => cfg, - resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), - }; - - println!("Healthy nodes:"); - for adv in status.iter().filter(|x| x.is_up) { - if let Some(cfg) = config.members.get(&adv.id) { - println!( - "{:?}\t{}\t{}\t[{}]\t{}\t{}", - adv.id, adv.state_info.hostname, adv.addr, cfg.tag, cfg.datacenter, cfg.capacity - ); - } else { - println!( - "{:?}\t{}\t{}\tUNCONFIGURED/REMOVED", - adv.id, adv.state_info.hostname, adv.addr - ); - } - } - - let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>(); - let failure_case_1 = status.iter().any(|x| !x.is_up); - let failure_case_2 = config - .members - .iter() - .any(|(id, _)| !status_keys.contains(id)); - if failure_case_1 || failure_case_2 { - println!("\nFailed nodes:"); - for adv in status.iter().filter(|x| !x.is_up) { - if let Some(cfg) = config.members.get(&adv.id) { - println!( - "{:?}\t{}\t{}\t[{}]\t{}\t{}\tlast seen: {}s ago", - adv.id, - adv.state_info.hostname, - adv.addr, - cfg.tag, - cfg.datacenter, - cfg.capacity, - (now_msec() - adv.last_seen) / 1000, - ); - } - } - for (id, cfg) in config.members.iter() { - if !status.iter().any(|x| x.id == *id) { - println!( - "{:?}\t{}\t{}\t{}\tnever seen", - id, cfg.tag, cfg.datacenter, cfg.capacity - ); - } - } - } - - Ok(()) -} - -async fn cmd_configure( - rpc_cli: RpcAddrClient<Message>, - rpc_host: SocketAddr, - args: ConfigureNodeOpt, -) -> Result<(), Error> { - let status = match rpc_cli - .call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) - .await?? - { - Message::AdvertiseNodesUp(nodes) => nodes, - resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), - }; - - let mut candidates = vec![]; - for adv in status.iter() { - if hex::encode(&adv.id).starts_with(&args.node_id) { - candidates.push(adv.id); - } - } - if candidates.len() != 1 { - return Err(Error::Message(format!( - "{} matching nodes", - candidates.len() - ))); - } - - let mut config = match rpc_cli - .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) - .await?? - { - Message::AdvertiseConfig(cfg) => cfg, - resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), - }; - - let new_entry = match config.members.get(&candidates[0]) { - None => NetworkConfigEntry { - datacenter: args - .datacenter - .expect("Please specifiy a datacenter with the -d flag"), - capacity: args - .capacity - .expect("Please specifiy a capacity with the -c flag"), - tag: args.tag.unwrap_or("".to_string()), - }, - Some(old) => NetworkConfigEntry { - datacenter: args.datacenter.unwrap_or(old.datacenter.to_string()), - capacity: args.capacity.unwrap_or(old.capacity), - tag: args.tag.unwrap_or(old.tag.to_string()), - }, - }; - - config.members.insert(candidates[0].clone(), new_entry); - config.version += 1; - - rpc_cli - .call( - &rpc_host, - &Message::AdvertiseConfig(config), - ADMIN_RPC_TIMEOUT, - ) - .await??; - Ok(()) -} - -async fn cmd_remove( - rpc_cli: RpcAddrClient<Message>, - rpc_host: SocketAddr, - args: RemoveNodeOpt, -) -> Result<(), Error> { - let mut config = match rpc_cli - .call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) - .await?? - { - Message::AdvertiseConfig(cfg) => cfg, - resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), - }; - - let mut candidates = vec![]; - for (key, _) in config.members.iter() { - if hex::encode(key).starts_with(&args.node_id) { - candidates.push(*key); - } - } - if candidates.len() != 1 { - return Err(Error::Message(format!( - "{} matching nodes", - candidates.len() - ))); - } - - if !args.yes { - return Err(Error::Message(format!( - "Add the flag --yes to really remove {:?} from the cluster", - candidates[0] - ))); - } - - config.members.remove(&candidates[0]); - config.version += 1; - - rpc_cli - .call( - &rpc_host, - &Message::AdvertiseConfig(config), - ADMIN_RPC_TIMEOUT, - ) - .await??; - Ok(()) -} - -async fn cmd_admin( - rpc_cli: RpcAddrClient<AdminRPC>, - rpc_host: SocketAddr, - args: AdminRPC, -) -> Result<(), Error> { - match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? { - AdminRPC::Ok(msg) => { - println!("{}", msg); - } - AdminRPC::BucketList(bl) => { - println!("List of buckets:"); - for bucket in bl { - println!("{}", bucket); - } - } - AdminRPC::BucketInfo(bucket) => { - println!("{:?}", bucket); - } - AdminRPC::KeyList(kl) => { - println!("List of keys:"); - for key in kl { - println!("{}\t{}", key.0, key.1); - } - } - AdminRPC::KeyInfo(key) => { - println!("{:?}", key); - } - r => { - error!("Unexpected response: {:?}", r); - } - } - Ok(()) + cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await } diff --git a/src/garage/repair.rs b/src/garage/repair.rs index 297ae9cd..8200f1f0 100644 --- a/src/garage/repair.rs +++ b/src/garage/repair.rs @@ -16,7 +16,13 @@ pub struct Repair { } impl Repair { - pub async fn repair_worker( + pub async fn repair_worker(&self, opt: RepairOpt, must_exit: watch::Receiver<bool>) { + if let Err(e) = self.repair_worker_aux(opt, must_exit).await { + warn!("Repair worker failed with error: {}", e); + } + } + + async fn repair_worker_aux( &self, opt: RepairOpt, must_exit: watch::Receiver<bool>, @@ -25,41 +31,11 @@ impl Repair { if todo(RepairWhat::Tables) { info!("Launching a full sync of tables"); - self.garage - .bucket_table - .syncer - .load_full() - .unwrap() - .add_full_scan() - .await; - self.garage - .object_table - .syncer - .load_full() - .unwrap() - .add_full_scan() - .await; - self.garage - .version_table - .syncer - .load_full() - .unwrap() - .add_full_scan() - .await; - self.garage - .block_ref_table - .syncer - .load_full() - .unwrap() - .add_full_scan() - .await; - self.garage - .key_table - .syncer - .load_full() - .unwrap() - .add_full_scan() - .await; + self.garage.bucket_table.syncer.add_full_sync(); + self.garage.object_table.syncer.add_full_sync(); + self.garage.version_table.syncer.add_full_sync(); + self.garage.block_ref_table.syncer.add_full_sync(); + self.garage.key_table.syncer.add_full_sync(); } // TODO: wait for full sync to finish before proceeding to the rest? @@ -93,11 +69,13 @@ impl Repair { async fn repair_versions(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> { let mut pos = vec![]; - while let Some((item_key, item_bytes)) = self.garage.version_table.store.get_gt(&pos)? { + while let Some((item_key, item_bytes)) = + self.garage.version_table.data.store.get_gt(&pos)? + { pos = item_key.to_vec(); let version = rmp_serde::decode::from_read_ref::<_, Version>(item_bytes.as_ref())?; - if version.deleted { + if version.deleted.get() { continue; } let object = self @@ -110,13 +88,7 @@ impl Repair { .versions() .iter() .any(|x| x.uuid == version.uuid && x.state != ObjectVersionState::Aborted), - None => { - warn!( - "Repair versions: object for version {:?} not found, skipping.", - version - ); - continue; - } + None => false, }; if !version_exists { info!("Repair versions: marking version as deleted: {:?}", version); @@ -127,7 +99,6 @@ impl Repair { version.bucket, version.key, true, - vec![], )) .await?; } @@ -142,11 +113,13 @@ impl Repair { async fn repair_block_ref(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> { let mut pos = vec![]; - while let Some((item_key, item_bytes)) = self.garage.block_ref_table.store.get_gt(&pos)? { + while let Some((item_key, item_bytes)) = + self.garage.block_ref_table.data.store.get_gt(&pos)? + { pos = item_key.to_vec(); let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(item_bytes.as_ref())?; - if block_ref.deleted { + if block_ref.deleted.get() { continue; } let version = self @@ -154,16 +127,8 @@ impl Repair { .version_table .get(&block_ref.version, &EmptyKey) .await?; - let ref_exists = match version { - Some(v) => !v.deleted, - None => { - warn!( - "Block ref repair: version for block ref {:?} not found, skipping.", - block_ref - ); - continue; - } - }; + // The version might not exist if it has been GC'ed + let ref_exists = version.map(|v| !v.deleted.get()).unwrap_or(false); if !ref_exists { info!( "Repair block ref: marking block_ref as deleted: {:?}", @@ -174,7 +139,7 @@ impl Repair { .insert(&BlockRef { block: block_ref.block, version: block_ref.version, - deleted: true, + deleted: true.into(), }) .await?; } diff --git a/src/garage/server.rs b/src/garage/server.rs index 2e109f8b..c45a69b8 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -21,13 +21,13 @@ async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error> .await .expect("failed to install CTRL+C signal handler"); info!("Received CTRL+C, shutting down."); - send_cancel.broadcast(true)?; + send_cancel.send(true)?; Ok(()) } async fn wait_from(mut chan: watch::Receiver<bool>) -> () { - while let Some(exit_now) = chan.recv().await { - if exit_now { + while !*chan.borrow() { + if chan.changed().await.is_err() { return; } } @@ -40,37 +40,22 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { info!("Opening database..."); let mut db_path = config.metadata_dir.clone(); db_path.push("db"); - let db = match sled::open(&db_path) { - Ok(db) => db, - Err(e) => { - warn!("Old DB could not be openned ({}), attempting migration.", e); - let old = old_sled::open(&db_path).expect("Unable to open old DB for migration"); - let mut new_path = config.metadata_dir.clone(); - new_path.push("db2"); - let new = sled::open(&new_path).expect("Unable to open new DB for migration"); - new.import(old.export()); - if old.checksum().expect("unable to compute old db checksum") - != new.checksum().expect("unable to compute new db checksum") - { - panic!("db checksums don't match after migration"); - } - drop(new); - drop(old); - std::fs::remove_dir_all(&db_path).expect("Cannot remove old DB folder"); - std::fs::rename(new_path, &db_path) - .expect("Cannot move new DB folder to correct place"); - sled::open(db_path).expect("Unable to open new DB after migration") - } - }; + let db = sled::open(&db_path).expect("Unable to open sled DB"); info!("Initialize RPC server..."); let mut rpc_server = RpcServer::new(config.rpc_bind_addr.clone(), config.rpc_tls.clone()); info!("Initializing background runner..."); let (send_cancel, watch_cancel) = watch::channel(false); - let background = BackgroundRunner::new(16, watch_cancel.clone()); + let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone()); - let garage = Garage::new(config, db, background.clone(), &mut rpc_server).await; + info!("Initializing Garage main data store..."); + let garage = Garage::new(config.clone(), db, background, &mut rpc_server); + let bootstrap = garage.system.clone().bootstrap( + &config.bootstrap_peers[..], + config.consul_host, + config.consul_service_name, + ); info!("Crate admin RPC handler..."); AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server); @@ -78,21 +63,13 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { info!("Initializing RPC and API servers..."); let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone())); let api_server = api_server::run_api_server(garage.clone(), wait_from(watch_cancel.clone())); - let web_server = web_server::run_web_server(garage.clone(), wait_from(watch_cancel.clone())); + let web_server = web_server::run_web_server(garage, wait_from(watch_cancel.clone())); futures::try_join!( - garage - .system - .clone() - .bootstrap( - &garage.config.bootstrap_peers[..], - garage.config.consul_host.clone(), - garage.config.consul_service_name.clone() - ) - .map(|rv| { - info!("Bootstrap done"); - Ok(rv) - }), + bootstrap.map(|rv| { + info!("Bootstrap done"); + Ok(rv) + }), run_rpc_server.map(|rv| { info!("RPC server exited"); rv @@ -105,9 +82,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { info!("Web server exited"); rv }), - background.run().map(|rv| { - info!("Background runner exited"); - Ok(rv) + await_background_done.map(|rv| { + info!("Background runner exited: {:?}", rv); + Ok(()) }), shutdown_signal(send_cancel), )?; |