diff options
author | Alex Auvolat <alex@adnab.me> | 2020-04-08 22:00:41 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2020-04-08 22:00:41 +0200 |
commit | bacc76a057bcd90d61bfe3584bd3cdbadc748364 (patch) | |
tree | b9a55aec2ef1fa7660bf539c02e651ea4053688c /src | |
parent | d50edcdb4f8b8ec00b1f0ffb6a3ebbb0e5afdc1f (diff) | |
download | garage-bacc76a057bcd90d61bfe3584bd3cdbadc748364.tar.gz garage-bacc76a057bcd90d61bfe3584bd3cdbadc748364.zip |
Some work in actually storing things
Diffstat (limited to 'src')
-rw-r--r-- | src/api_server.rs | 70 | ||||
-rw-r--r-- | src/data.rs | 24 | ||||
-rw-r--r-- | src/main.rs | 5 | ||||
-rw-r--r-- | src/membership.rs | 7 | ||||
-rw-r--r-- | src/proto.rs | 3 | ||||
-rw-r--r-- | src/rpc_server.rs | 24 | ||||
-rw-r--r-- | src/server.rs | 55 | ||||
-rw-r--r-- | src/table.rs | 116 | ||||
-rw-r--r-- | src/version_table.rs | 59 |
9 files changed, 287 insertions, 76 deletions
diff --git a/src/api_server.rs b/src/api_server.rs index c1b4d81d..8acd15d8 100644 --- a/src/api_server.rs +++ b/src/api_server.rs @@ -9,21 +9,21 @@ use hyper::{Body, Method, Request, Response, Server, StatusCode}; use futures::future::Future; use crate::error::Error; -use crate::membership::System; use crate::data::*; use crate::proto::*; use crate::rpc_client::*; +use crate::server::Garage; -pub async fn run_api_server(sys: Arc<System>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> { - let addr = ([0, 0, 0, 0], sys.config.api_port).into(); +pub async fn run_api_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> { + let addr = ([0, 0, 0, 0], garage.system.config.api_port).into(); let service = make_service_fn(|conn: &AddrStream| { - let sys = sys.clone(); + let garage = garage.clone(); let client_addr = conn.remote_addr(); async move { Ok::<_, Error>(service_fn(move |req: Request<Body>| { - let sys = sys.clone(); - handler(sys, req, client_addr) + let garage = garage.clone(); + handler(garage, req, client_addr) })) } }); @@ -36,8 +36,8 @@ pub async fn run_api_server(sys: Arc<System>, shutdown_signal: impl Future<Outpu graceful.await } -async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { - match handler_inner(sys, req, addr).await { +async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { + match handler_inner(garage, req, addr).await { Ok(x) => Ok(x), Err(Error::BadRequest(e)) => { let mut bad_request = Response::new(Body::from(format!("{}\n", e))); @@ -53,7 +53,7 @@ async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Resu } } -async fn handler_inner(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { +async fn handler_inner(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { eprintln!("{} {} {}", addr, req.method(), req.uri()); let bucket = req.headers() @@ -75,7 +75,7 @@ async fn handler_inner(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) - .map(|x| x.to_str()) .unwrap_or(Ok("blob"))? .to_string(); - let version_uuid = handle_put(sys, &mime_type, &bucket, &key, req.into_body()).await?; + let version_uuid = handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?; Ok(Response::new(Body::from( format!("Version UUID: {:?}", version_uuid), ))) @@ -84,22 +84,24 @@ async fn handler_inner(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) - } } -async fn handle_put(sys: Arc<System>, +async fn handle_put(garage: Arc<Garage>, mime_type: &str, bucket: &str, key: &str, body: Body) -> Result<UUID, Error> { let version_uuid = gen_uuid(); - let mut chunker = BodyChunker::new(body, sys.config.block_size); + let mut chunker = BodyChunker::new(body, garage.system.config.block_size); let first_block = match chunker.next().await? { Some(x) => x, None => return Err(Error::BadRequest(format!("Empty body"))), }; - let mut version = VersionMeta{ + let version_key = VersionMetaKey{ bucket: bucket.to_string(), key: key.to_string(), + }; + let mut version_value = VersionMetaValue { timestamp: now_msec(), uuid: version_uuid.clone(), mime_type: mime_type.to_string(), @@ -107,27 +109,17 @@ async fn handle_put(sys: Arc<System>, is_complete: false, data: VersionData::DeleteMarker, }; - let version_who = sys.members.read().await - .walk_ring(&version_uuid, sys.config.meta_replication_factor); if first_block.len() < INLINE_THRESHOLD { - version.data = VersionData::Inline(first_block); - version.is_complete = true; - rpc_try_call_many(sys.clone(), - &version_who[..], - &Message::AdvertiseVersion(version), - (sys.config.meta_replication_factor+1)/2, - DEFAULT_TIMEOUT).await?; + version_value.data = VersionData::Inline(first_block); + version_value.is_complete = true; + garage.version_table.insert(&version_key, &version_value).await?; return Ok(version_uuid) } let first_block_hash = hash(&first_block[..]); - version.data = VersionData::FirstBlock(first_block_hash); - rpc_try_call_many(sys.clone(), - &version_who[..], - &Message::AdvertiseVersion(version.clone()), - (sys.config.meta_replication_factor+1)/2, - DEFAULT_TIMEOUT).await?; + version_value.data = VersionData::FirstBlock(first_block_hash); + garage.version_table.insert(&version_key, &version_value).await?; let block_meta = BlockMeta{ version_uuid: version_uuid.clone(), @@ -135,7 +127,7 @@ async fn handle_put(sys: Arc<System>, hash: hash(&first_block[..]), }; let mut next_offset = first_block.len(); - let mut put_curr_block = put_block(sys.clone(), block_meta, first_block); + let mut put_curr_block = put_block(garage.clone(), block_meta, first_block); loop { let (_, next_block) = futures::try_join!(put_curr_block, chunker.next())?; if let Some(block) = next_block { @@ -145,7 +137,7 @@ async fn handle_put(sys: Arc<System>, hash: hash(&block[..]), }; next_offset += block.len(); - put_curr_block = put_block(sys.clone(), block_meta, block); + put_curr_block = put_block(garage.clone(), block_meta, block); } else { break; } @@ -153,25 +145,21 @@ async fn handle_put(sys: Arc<System>, // TODO: if at any step we have an error, we should undo everything we did - version.is_complete = true; - rpc_try_call_many(sys.clone(), - &version_who[..], - &Message::AdvertiseVersion(version), - (sys.config.meta_replication_factor+1)/2, - DEFAULT_TIMEOUT).await?; + version_value.is_complete = true; + garage.version_table.insert(&version_key, &version_value).await?; Ok(version_uuid) } -async fn put_block(sys: Arc<System>, meta: BlockMeta, data: Vec<u8>) -> Result<(), Error> { - let who = sys.members.read().await - .walk_ring(&meta.hash, sys.config.meta_replication_factor); - rpc_try_call_many(sys.clone(), +async fn put_block(garage: Arc<Garage>, meta: BlockMeta, data: Vec<u8>) -> Result<(), Error> { + let who = garage.system.members.read().await + .walk_ring(&meta.hash, garage.system.config.meta_replication_factor); + rpc_try_call_many(garage.system.clone(), &who[..], &Message::PutBlock(PutBlockMessage{ meta, data, }), - (sys.config.meta_replication_factor+1)/2, + (garage.system.config.meta_replication_factor+1)/2, DEFAULT_TIMEOUT).await?; Ok(()) } diff --git a/src/data.rs b/src/data.rs index 3c71b782..bbe9aa1d 100644 --- a/src/data.rs +++ b/src/data.rs @@ -106,7 +106,7 @@ pub struct NetworkConfigEntry { // Data management -pub const INLINE_THRESHOLD: usize = 2048; +pub const INLINE_THRESHOLD: usize = 3072; #[derive(Debug, Serialize, Deserialize)] pub struct SplitpointMeta { @@ -118,27 +118,7 @@ pub struct SplitpointMeta { pub deleted: bool, } -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct VersionMeta { - pub bucket: String, - pub key: String, - - pub timestamp: u64, - pub uuid: UUID, - - pub mime_type: String, - pub size: u64, - pub is_complete: bool, - - pub data: VersionData, -} - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub enum VersionData { - DeleteMarker, - Inline(#[serde(with="serde_bytes")] Vec<u8>), - FirstBlock(Hash), -} +pub use crate::version_table::*; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct BlockMeta { diff --git a/src/main.rs b/src/main.rs index 1e4107c2..aa0f23dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,12 @@ mod error; mod data; mod proto; + mod membership; +mod table; + +mod version_table; + mod server; mod rpc_server; mod rpc_client; diff --git a/src/membership.rs b/src/membership.rs index 69805f2a..e1eeae41 100644 --- a/src/membership.rs +++ b/src/membership.rs @@ -124,6 +124,11 @@ impl Members { i - 1 } }; + + self.walk_ring_from_pos(start, n) + } + + fn walk_ring_from_pos(&self, start: usize, n: usize) -> Vec<UUID> { let mut ret = vec![]; let mut datacenters = vec![]; @@ -143,7 +148,7 @@ impl Members { } ret - } + } } fn read_network_config(metadata_dir: &PathBuf) -> Result<NetworkConfig, Error> { diff --git a/src/proto.rs b/src/proto.rs index 04b8e2b2..99ab8fbe 100644 --- a/src/proto.rs +++ b/src/proto.rs @@ -18,7 +18,8 @@ pub enum Message { AdvertiseConfig(NetworkConfig), PutBlock(PutBlockMessage), - AdvertiseVersion(VersionMeta), + + TableRPC(String, #[serde(with = "serde_bytes")] Vec<u8>), } #[derive(Debug, Serialize, Deserialize)] diff --git a/src/rpc_server.rs b/src/rpc_server.rs index 55c7482b..eda300c4 100644 --- a/src/rpc_server.rs +++ b/src/rpc_server.rs @@ -1,6 +1,7 @@ use std::net::SocketAddr; use std::sync::Arc; +use futures_util::future::FutureExt; use bytes::IntoBuf; use hyper::service::{make_service_fn, service_fn}; use hyper::server::conn::AddrStream; @@ -9,7 +10,7 @@ use futures::future::Future; use crate::error::Error; use crate::proto::Message; -use crate::membership::System; +use crate::server::Garage; fn err_to_msg(x: Result<Message, Error>) -> Message { match x { @@ -18,7 +19,7 @@ fn err_to_msg(x: Result<Message, Error>) -> Message { } } -async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { +async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { if req.method() != &Method::POST { let mut bad_request = Response::default(); *bad_request.status_mut() = StatusCode::BAD_REQUEST; @@ -30,12 +31,21 @@ async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Resu eprintln!("RPC from {}: {:?}", addr, msg); + let sys = garage.system.clone(); let resp = err_to_msg(match &msg { Message::Ping(ping) => sys.handle_ping(&addr, ping).await, Message::PullStatus => sys.handle_pull_status().await, Message::PullConfig => sys.handle_pull_config().await, Message::AdvertiseNodesUp(adv) => sys.handle_advertise_nodes_up(adv).await, Message::AdvertiseConfig(adv) => sys.handle_advertise_config(adv).await, + Message::TableRPC(table, msg) => { + if let Some(rpc_handler) = garage.table_rpc_handlers.get(table) { + rpc_handler.handle(&msg[..]).await + .map(|rep| Message::TableRPC(table.to_string(), rep)) + } else { + Ok(Message::Error(format!("Unknown table: {}", table))) + } + } _ => Ok(Message::Error(format!("Unexpected message: {:?}", msg))), }); @@ -46,16 +56,16 @@ async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Resu } -pub async fn run_rpc_server(sys: Arc<System>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> { - let bind_addr = ([0, 0, 0, 0], sys.config.rpc_port).into(); +pub async fn run_rpc_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> { + let bind_addr = ([0, 0, 0, 0], garage.system.config.rpc_port).into(); let service = make_service_fn(|conn: &AddrStream| { let client_addr = conn.remote_addr(); - let sys = sys.clone(); + let garage = garage.clone(); async move { Ok::<_, Error>(service_fn(move |req: Request<Body>| { - let sys = sys.clone(); - handler(sys, req, client_addr) + let garage = garage.clone(); + handler(garage, req, client_addr) })) } }); diff --git a/src/server.rs b/src/server.rs index d5da8c17..31f1cc28 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::io::{Read, Write}; use std::sync::Arc; use std::net::SocketAddr; @@ -6,10 +7,51 @@ use futures::channel::oneshot; use serde::Deserialize; use crate::data::*; +use crate::proto::*; use crate::error::Error; use crate::membership::System; use crate::api_server; use crate::rpc_server; +use crate::table::*; + +pub struct Garage { + pub db: sled::Db, + pub system: Arc<System>, + + pub table_rpc_handlers: HashMap<String, Box<dyn TableRpcHandler + Sync + Send>>, + + pub version_table: Arc<Table<VersionTable>>, +} + +impl Garage { + pub fn new(config: Config, id: UUID, db: sled::Db) -> Self { + let system = Arc::new(System::new(config, id)); + + let meta_rep_param = TableReplicationParams{ + replication_factor: system.config.meta_replication_factor, + write_quorum: (system.config.meta_replication_factor+1)/2, + read_quorum: (system.config.meta_replication_factor+1)/2, + timeout: DEFAULT_TIMEOUT, + }; + + let version_table = Arc::new(Table::new( + system.clone(), + &db, + "version".to_string(), + meta_rep_param.clone())); + + let mut garage = Self{ + db, + system: system.clone(), + table_rpc_handlers: HashMap::new(), + version_table, + }; + garage.table_rpc_handlers.insert( + garage.version_table.name.clone(), + garage.version_table.clone().rpc_handler()); + garage + } +} fn default_block_size() -> usize { 1048576 @@ -88,20 +130,25 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { let config = read_config(config_file) .expect("Unable to read config file"); + let mut db_path = config.metadata_dir.clone(); + db_path.push("garage_metadata"); + let db = sled::open(db_path) + .expect("Unable to open DB"); + let id = gen_node_id(&config.metadata_dir) .expect("Unable to read or generate node ID"); println!("Node ID: {}", hex::encode(&id)); - let sys = Arc::new(System::new(config, id)); + let garage = Arc::new(Garage::new(config, id, db)); let (tx1, rx1) = oneshot::channel(); let (tx2, rx2) = oneshot::channel(); - let rpc_server = rpc_server::run_rpc_server(sys.clone(), wait_from(rx1)); - let api_server = api_server::run_api_server(sys.clone(), wait_from(rx2)); + let rpc_server = rpc_server::run_rpc_server(garage.clone(), wait_from(rx1)); + let api_server = api_server::run_api_server(garage.clone(), wait_from(rx2)); tokio::spawn(shutdown_signal(vec![tx1, tx2])); - tokio::spawn(sys.bootstrap()); + tokio::spawn(garage.system.clone().bootstrap()); futures::try_join!(rpc_server, api_server)?; Ok(()) diff --git a/src/table.rs b/src/table.rs new file mode 100644 index 00000000..5c8e93a5 --- /dev/null +++ b/src/table.rs @@ -0,0 +1,116 @@ +use std::marker::PhantomData; +use std::time::Duration; +use std::sync::Arc; +use serde::{Serialize, Deserialize}; +use async_trait::async_trait; + +use crate::error::Error; +use crate::proto::*; +use crate::data::*; +use crate::membership::System; +use crate::rpc_client::*; + + +pub struct Table<F: TableFormat> { + phantom: PhantomData<F>, + + pub name: String, + + pub system: Arc<System>, + pub store: sled::Tree, + pub partitions: Vec<Partition>, + + pub param: TableReplicationParams, +} + +#[derive(Clone)] +pub struct TableReplicationParams { + pub replication_factor: usize, + pub read_quorum: usize, + pub write_quorum: usize, + pub timeout: Duration, +} + +#[async_trait] +pub trait TableRpcHandler { + async fn handle(&self, rpc: &[u8]) -> Result<Vec<u8>, Error>; +} + +struct TableRpcHandlerAdapter<F: TableFormat> { + table: Arc<Table<F>>, +} + +#[async_trait] +impl<F: TableFormat + 'static> TableRpcHandler for TableRpcHandlerAdapter<F> { + async fn handle(&self, rpc: &[u8]) -> Result<Vec<u8>, Error> { + let msg = rmp_serde::decode::from_read_ref::<_, TableRPC<F>>(rpc)?; + let rep = self.table.handle(msg).await?; + Ok(rmp_serde::encode::to_vec_named(&rep)?) + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub enum TableRPC<F: TableFormat> { + Update(F::K, F::V), +} + +pub struct Partition { + pub begin: Hash, + pub end: Hash, + pub other_nodes: Vec<UUID>, +} + +pub trait KeyHash { + fn hash(&self) -> Hash; +} + +pub trait ValueMerge { + fn merge(&mut self, other: &Self); +} + +#[async_trait] +pub trait TableFormat: Send + Sync { + type K: Clone + Serialize + for<'de> Deserialize<'de> + KeyHash + Send + Sync; + type V: Clone + Serialize + for<'de> Deserialize<'de> + ValueMerge + Send + Sync; + + async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V); +} + +impl<F: TableFormat + 'static> Table<F> { + pub fn new(system: Arc<System>, db: &sled::Db, name: String, param: TableReplicationParams) -> Self { + let store = db.open_tree(&name) + .expect("Unable to open DB tree"); + Self{ + phantom: PhantomData::default(), + name, + system, + store, + partitions: Vec::new(), + param, + } + } + + pub fn rpc_handler(self: Arc<Self>) -> Box<dyn TableRpcHandler + Send + Sync> { + Box::new(TableRpcHandlerAdapter::<F>{ table: self }) + } + + pub async fn insert(&self, k: &F::K, v: &F::V) -> Result<(), Error> { + unimplemented!(); + + let hash = k.hash(); + let who = self.system.members.read().await + .walk_ring(&hash, self.param.replication_factor); + + let msg = rmp_serde::encode::to_vec_named(&TableRPC::<F>::Update(k.clone(), v.clone()))?; + rpc_try_call_many(self.system.clone(), + &who[..], + &Message::TableRPC(self.name.to_string(), msg), + self.param.write_quorum, + self.param.timeout).await?; + Ok(()) + } + + async fn handle(&self, msg: TableRPC<F>) -> Result<TableRPC<F>, Error> { + unimplemented!() + } +} diff --git a/src/version_table.rs b/src/version_table.rs new file mode 100644 index 00000000..d857ac12 --- /dev/null +++ b/src/version_table.rs @@ -0,0 +1,59 @@ +use std::sync::Arc; +use serde::{Serialize, Deserialize}; +use async_trait::async_trait; + +use crate::data::*; +use crate::table::*; +use crate::membership::System; + + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct VersionMetaKey { + pub bucket: String, + pub key: String, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct VersionMetaValue { + pub timestamp: u64, + pub uuid: UUID, + + pub mime_type: String, + pub size: u64, + pub is_complete: bool, + + pub data: VersionData, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum VersionData { + DeleteMarker, + Inline(#[serde(with="serde_bytes")] Vec<u8>), + FirstBlock(Hash), +} + +pub struct VersionTable { + system: Arc<System>, +} + +impl KeyHash for VersionMetaKey { + fn hash(&self) -> Hash { + hash(self.bucket.as_bytes()) + } +} + +impl ValueMerge for VersionMetaValue { + fn merge(&mut self, other: &Self) { + unimplemented!() + } +} + +#[async_trait] +impl TableFormat for VersionTable { + type K = VersionMetaKey; + type V = VersionMetaValue; + + async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V) { + unimplemented!() + } +} |