aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/api_server.rs70
-rw-r--r--src/data.rs24
-rw-r--r--src/main.rs5
-rw-r--r--src/membership.rs7
-rw-r--r--src/proto.rs3
-rw-r--r--src/rpc_server.rs24
-rw-r--r--src/server.rs55
-rw-r--r--src/table.rs116
-rw-r--r--src/version_table.rs59
9 files changed, 287 insertions, 76 deletions
diff --git a/src/api_server.rs b/src/api_server.rs
index c1b4d81d..8acd15d8 100644
--- a/src/api_server.rs
+++ b/src/api_server.rs
@@ -9,21 +9,21 @@ use hyper::{Body, Method, Request, Response, Server, StatusCode};
use futures::future::Future;
use crate::error::Error;
-use crate::membership::System;
use crate::data::*;
use crate::proto::*;
use crate::rpc_client::*;
+use crate::server::Garage;
-pub async fn run_api_server(sys: Arc<System>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> {
- let addr = ([0, 0, 0, 0], sys.config.api_port).into();
+pub async fn run_api_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> {
+ let addr = ([0, 0, 0, 0], garage.system.config.api_port).into();
let service = make_service_fn(|conn: &AddrStream| {
- let sys = sys.clone();
+ let garage = garage.clone();
let client_addr = conn.remote_addr();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
- let sys = sys.clone();
- handler(sys, req, client_addr)
+ let garage = garage.clone();
+ handler(garage, req, client_addr)
}))
}
});
@@ -36,8 +36,8 @@ pub async fn run_api_server(sys: Arc<System>, shutdown_signal: impl Future<Outpu
graceful.await
}
-async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
- match handler_inner(sys, req, addr).await {
+async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
+ match handler_inner(garage, req, addr).await {
Ok(x) => Ok(x),
Err(Error::BadRequest(e)) => {
let mut bad_request = Response::new(Body::from(format!("{}\n", e)));
@@ -53,7 +53,7 @@ async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Resu
}
}
-async fn handler_inner(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
+async fn handler_inner(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
eprintln!("{} {} {}", addr, req.method(), req.uri());
let bucket = req.headers()
@@ -75,7 +75,7 @@ async fn handler_inner(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -
.map(|x| x.to_str())
.unwrap_or(Ok("blob"))?
.to_string();
- let version_uuid = handle_put(sys, &mime_type, &bucket, &key, req.into_body()).await?;
+ let version_uuid = handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?;
Ok(Response::new(Body::from(
format!("Version UUID: {:?}", version_uuid),
)))
@@ -84,22 +84,24 @@ async fn handler_inner(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -
}
}
-async fn handle_put(sys: Arc<System>,
+async fn handle_put(garage: Arc<Garage>,
mime_type: &str,
bucket: &str, key: &str, body: Body)
-> Result<UUID, Error>
{
let version_uuid = gen_uuid();
- let mut chunker = BodyChunker::new(body, sys.config.block_size);
+ let mut chunker = BodyChunker::new(body, garage.system.config.block_size);
let first_block = match chunker.next().await? {
Some(x) => x,
None => return Err(Error::BadRequest(format!("Empty body"))),
};
- let mut version = VersionMeta{
+ let version_key = VersionMetaKey{
bucket: bucket.to_string(),
key: key.to_string(),
+ };
+ let mut version_value = VersionMetaValue {
timestamp: now_msec(),
uuid: version_uuid.clone(),
mime_type: mime_type.to_string(),
@@ -107,27 +109,17 @@ async fn handle_put(sys: Arc<System>,
is_complete: false,
data: VersionData::DeleteMarker,
};
- let version_who = sys.members.read().await
- .walk_ring(&version_uuid, sys.config.meta_replication_factor);
if first_block.len() < INLINE_THRESHOLD {
- version.data = VersionData::Inline(first_block);
- version.is_complete = true;
- rpc_try_call_many(sys.clone(),
- &version_who[..],
- &Message::AdvertiseVersion(version),
- (sys.config.meta_replication_factor+1)/2,
- DEFAULT_TIMEOUT).await?;
+ version_value.data = VersionData::Inline(first_block);
+ version_value.is_complete = true;
+ garage.version_table.insert(&version_key, &version_value).await?;
return Ok(version_uuid)
}
let first_block_hash = hash(&first_block[..]);
- version.data = VersionData::FirstBlock(first_block_hash);
- rpc_try_call_many(sys.clone(),
- &version_who[..],
- &Message::AdvertiseVersion(version.clone()),
- (sys.config.meta_replication_factor+1)/2,
- DEFAULT_TIMEOUT).await?;
+ version_value.data = VersionData::FirstBlock(first_block_hash);
+ garage.version_table.insert(&version_key, &version_value).await?;
let block_meta = BlockMeta{
version_uuid: version_uuid.clone(),
@@ -135,7 +127,7 @@ async fn handle_put(sys: Arc<System>,
hash: hash(&first_block[..]),
};
let mut next_offset = first_block.len();
- let mut put_curr_block = put_block(sys.clone(), block_meta, first_block);
+ let mut put_curr_block = put_block(garage.clone(), block_meta, first_block);
loop {
let (_, next_block) = futures::try_join!(put_curr_block, chunker.next())?;
if let Some(block) = next_block {
@@ -145,7 +137,7 @@ async fn handle_put(sys: Arc<System>,
hash: hash(&block[..]),
};
next_offset += block.len();
- put_curr_block = put_block(sys.clone(), block_meta, block);
+ put_curr_block = put_block(garage.clone(), block_meta, block);
} else {
break;
}
@@ -153,25 +145,21 @@ async fn handle_put(sys: Arc<System>,
// TODO: if at any step we have an error, we should undo everything we did
- version.is_complete = true;
- rpc_try_call_many(sys.clone(),
- &version_who[..],
- &Message::AdvertiseVersion(version),
- (sys.config.meta_replication_factor+1)/2,
- DEFAULT_TIMEOUT).await?;
+ version_value.is_complete = true;
+ garage.version_table.insert(&version_key, &version_value).await?;
Ok(version_uuid)
}
-async fn put_block(sys: Arc<System>, meta: BlockMeta, data: Vec<u8>) -> Result<(), Error> {
- let who = sys.members.read().await
- .walk_ring(&meta.hash, sys.config.meta_replication_factor);
- rpc_try_call_many(sys.clone(),
+async fn put_block(garage: Arc<Garage>, meta: BlockMeta, data: Vec<u8>) -> Result<(), Error> {
+ let who = garage.system.members.read().await
+ .walk_ring(&meta.hash, garage.system.config.meta_replication_factor);
+ rpc_try_call_many(garage.system.clone(),
&who[..],
&Message::PutBlock(PutBlockMessage{
meta,
data,
}),
- (sys.config.meta_replication_factor+1)/2,
+ (garage.system.config.meta_replication_factor+1)/2,
DEFAULT_TIMEOUT).await?;
Ok(())
}
diff --git a/src/data.rs b/src/data.rs
index 3c71b782..bbe9aa1d 100644
--- a/src/data.rs
+++ b/src/data.rs
@@ -106,7 +106,7 @@ pub struct NetworkConfigEntry {
// Data management
-pub const INLINE_THRESHOLD: usize = 2048;
+pub const INLINE_THRESHOLD: usize = 3072;
#[derive(Debug, Serialize, Deserialize)]
pub struct SplitpointMeta {
@@ -118,27 +118,7 @@ pub struct SplitpointMeta {
pub deleted: bool,
}
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct VersionMeta {
- pub bucket: String,
- pub key: String,
-
- pub timestamp: u64,
- pub uuid: UUID,
-
- pub mime_type: String,
- pub size: u64,
- pub is_complete: bool,
-
- pub data: VersionData,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub enum VersionData {
- DeleteMarker,
- Inline(#[serde(with="serde_bytes")] Vec<u8>),
- FirstBlock(Hash),
-}
+pub use crate::version_table::*;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct BlockMeta {
diff --git a/src/main.rs b/src/main.rs
index 1e4107c2..aa0f23dc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,12 @@
mod error;
mod data;
mod proto;
+
mod membership;
+mod table;
+
+mod version_table;
+
mod server;
mod rpc_server;
mod rpc_client;
diff --git a/src/membership.rs b/src/membership.rs
index 69805f2a..e1eeae41 100644
--- a/src/membership.rs
+++ b/src/membership.rs
@@ -124,6 +124,11 @@ impl Members {
i - 1
}
};
+
+ self.walk_ring_from_pos(start, n)
+ }
+
+ fn walk_ring_from_pos(&self, start: usize, n: usize) -> Vec<UUID> {
let mut ret = vec![];
let mut datacenters = vec![];
@@ -143,7 +148,7 @@ impl Members {
}
ret
- }
+ }
}
fn read_network_config(metadata_dir: &PathBuf) -> Result<NetworkConfig, Error> {
diff --git a/src/proto.rs b/src/proto.rs
index 04b8e2b2..99ab8fbe 100644
--- a/src/proto.rs
+++ b/src/proto.rs
@@ -18,7 +18,8 @@ pub enum Message {
AdvertiseConfig(NetworkConfig),
PutBlock(PutBlockMessage),
- AdvertiseVersion(VersionMeta),
+
+ TableRPC(String, #[serde(with = "serde_bytes")] Vec<u8>),
}
#[derive(Debug, Serialize, Deserialize)]
diff --git a/src/rpc_server.rs b/src/rpc_server.rs
index 55c7482b..eda300c4 100644
--- a/src/rpc_server.rs
+++ b/src/rpc_server.rs
@@ -1,6 +1,7 @@
use std::net::SocketAddr;
use std::sync::Arc;
+use futures_util::future::FutureExt;
use bytes::IntoBuf;
use hyper::service::{make_service_fn, service_fn};
use hyper::server::conn::AddrStream;
@@ -9,7 +10,7 @@ use futures::future::Future;
use crate::error::Error;
use crate::proto::Message;
-use crate::membership::System;
+use crate::server::Garage;
fn err_to_msg(x: Result<Message, Error>) -> Message {
match x {
@@ -18,7 +19,7 @@ fn err_to_msg(x: Result<Message, Error>) -> Message {
}
}
-async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
+async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
if req.method() != &Method::POST {
let mut bad_request = Response::default();
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
@@ -30,12 +31,21 @@ async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Resu
eprintln!("RPC from {}: {:?}", addr, msg);
+ let sys = garage.system.clone();
let resp = err_to_msg(match &msg {
Message::Ping(ping) => sys.handle_ping(&addr, ping).await,
Message::PullStatus => sys.handle_pull_status().await,
Message::PullConfig => sys.handle_pull_config().await,
Message::AdvertiseNodesUp(adv) => sys.handle_advertise_nodes_up(adv).await,
Message::AdvertiseConfig(adv) => sys.handle_advertise_config(adv).await,
+ Message::TableRPC(table, msg) => {
+ if let Some(rpc_handler) = garage.table_rpc_handlers.get(table) {
+ rpc_handler.handle(&msg[..]).await
+ .map(|rep| Message::TableRPC(table.to_string(), rep))
+ } else {
+ Ok(Message::Error(format!("Unknown table: {}", table)))
+ }
+ }
_ => Ok(Message::Error(format!("Unexpected message: {:?}", msg))),
});
@@ -46,16 +56,16 @@ async fn handler(sys: Arc<System>, req: Request<Body>, addr: SocketAddr) -> Resu
}
-pub async fn run_rpc_server(sys: Arc<System>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> {
- let bind_addr = ([0, 0, 0, 0], sys.config.rpc_port).into();
+pub async fn run_rpc_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> {
+ let bind_addr = ([0, 0, 0, 0], garage.system.config.rpc_port).into();
let service = make_service_fn(|conn: &AddrStream| {
let client_addr = conn.remote_addr();
- let sys = sys.clone();
+ let garage = garage.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
- let sys = sys.clone();
- handler(sys, req, client_addr)
+ let garage = garage.clone();
+ handler(garage, req, client_addr)
}))
}
});
diff --git a/src/server.rs b/src/server.rs
index d5da8c17..31f1cc28 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
use std::io::{Read, Write};
use std::sync::Arc;
use std::net::SocketAddr;
@@ -6,10 +7,51 @@ use futures::channel::oneshot;
use serde::Deserialize;
use crate::data::*;
+use crate::proto::*;
use crate::error::Error;
use crate::membership::System;
use crate::api_server;
use crate::rpc_server;
+use crate::table::*;
+
+pub struct Garage {
+ pub db: sled::Db,
+ pub system: Arc<System>,
+
+ pub table_rpc_handlers: HashMap<String, Box<dyn TableRpcHandler + Sync + Send>>,
+
+ pub version_table: Arc<Table<VersionTable>>,
+}
+
+impl Garage {
+ pub fn new(config: Config, id: UUID, db: sled::Db) -> Self {
+ let system = Arc::new(System::new(config, id));
+
+ let meta_rep_param = TableReplicationParams{
+ replication_factor: system.config.meta_replication_factor,
+ write_quorum: (system.config.meta_replication_factor+1)/2,
+ read_quorum: (system.config.meta_replication_factor+1)/2,
+ timeout: DEFAULT_TIMEOUT,
+ };
+
+ let version_table = Arc::new(Table::new(
+ system.clone(),
+ &db,
+ "version".to_string(),
+ meta_rep_param.clone()));
+
+ let mut garage = Self{
+ db,
+ system: system.clone(),
+ table_rpc_handlers: HashMap::new(),
+ version_table,
+ };
+ garage.table_rpc_handlers.insert(
+ garage.version_table.name.clone(),
+ garage.version_table.clone().rpc_handler());
+ garage
+ }
+}
fn default_block_size() -> usize {
1048576
@@ -88,20 +130,25 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
let config = read_config(config_file)
.expect("Unable to read config file");
+ let mut db_path = config.metadata_dir.clone();
+ db_path.push("garage_metadata");
+ let db = sled::open(db_path)
+ .expect("Unable to open DB");
+
let id = gen_node_id(&config.metadata_dir)
.expect("Unable to read or generate node ID");
println!("Node ID: {}", hex::encode(&id));
- let sys = Arc::new(System::new(config, id));
+ let garage = Arc::new(Garage::new(config, id, db));
let (tx1, rx1) = oneshot::channel();
let (tx2, rx2) = oneshot::channel();
- let rpc_server = rpc_server::run_rpc_server(sys.clone(), wait_from(rx1));
- let api_server = api_server::run_api_server(sys.clone(), wait_from(rx2));
+ let rpc_server = rpc_server::run_rpc_server(garage.clone(), wait_from(rx1));
+ let api_server = api_server::run_api_server(garage.clone(), wait_from(rx2));
tokio::spawn(shutdown_signal(vec![tx1, tx2]));
- tokio::spawn(sys.bootstrap());
+ tokio::spawn(garage.system.clone().bootstrap());
futures::try_join!(rpc_server, api_server)?;
Ok(())
diff --git a/src/table.rs b/src/table.rs
new file mode 100644
index 00000000..5c8e93a5
--- /dev/null
+++ b/src/table.rs
@@ -0,0 +1,116 @@
+use std::marker::PhantomData;
+use std::time::Duration;
+use std::sync::Arc;
+use serde::{Serialize, Deserialize};
+use async_trait::async_trait;
+
+use crate::error::Error;
+use crate::proto::*;
+use crate::data::*;
+use crate::membership::System;
+use crate::rpc_client::*;
+
+
+pub struct Table<F: TableFormat> {
+ phantom: PhantomData<F>,
+
+ pub name: String,
+
+ pub system: Arc<System>,
+ pub store: sled::Tree,
+ pub partitions: Vec<Partition>,
+
+ pub param: TableReplicationParams,
+}
+
+#[derive(Clone)]
+pub struct TableReplicationParams {
+ pub replication_factor: usize,
+ pub read_quorum: usize,
+ pub write_quorum: usize,
+ pub timeout: Duration,
+}
+
+#[async_trait]
+pub trait TableRpcHandler {
+ async fn handle(&self, rpc: &[u8]) -> Result<Vec<u8>, Error>;
+}
+
+struct TableRpcHandlerAdapter<F: TableFormat> {
+ table: Arc<Table<F>>,
+}
+
+#[async_trait]
+impl<F: TableFormat + 'static> TableRpcHandler for TableRpcHandlerAdapter<F> {
+ async fn handle(&self, rpc: &[u8]) -> Result<Vec<u8>, Error> {
+ let msg = rmp_serde::decode::from_read_ref::<_, TableRPC<F>>(rpc)?;
+ let rep = self.table.handle(msg).await?;
+ Ok(rmp_serde::encode::to_vec_named(&rep)?)
+ }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub enum TableRPC<F: TableFormat> {
+ Update(F::K, F::V),
+}
+
+pub struct Partition {
+ pub begin: Hash,
+ pub end: Hash,
+ pub other_nodes: Vec<UUID>,
+}
+
+pub trait KeyHash {
+ fn hash(&self) -> Hash;
+}
+
+pub trait ValueMerge {
+ fn merge(&mut self, other: &Self);
+}
+
+#[async_trait]
+pub trait TableFormat: Send + Sync {
+ type K: Clone + Serialize + for<'de> Deserialize<'de> + KeyHash + Send + Sync;
+ type V: Clone + Serialize + for<'de> Deserialize<'de> + ValueMerge + Send + Sync;
+
+ async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V);
+}
+
+impl<F: TableFormat + 'static> Table<F> {
+ pub fn new(system: Arc<System>, db: &sled::Db, name: String, param: TableReplicationParams) -> Self {
+ let store = db.open_tree(&name)
+ .expect("Unable to open DB tree");
+ Self{
+ phantom: PhantomData::default(),
+ name,
+ system,
+ store,
+ partitions: Vec::new(),
+ param,
+ }
+ }
+
+ pub fn rpc_handler(self: Arc<Self>) -> Box<dyn TableRpcHandler + Send + Sync> {
+ Box::new(TableRpcHandlerAdapter::<F>{ table: self })
+ }
+
+ pub async fn insert(&self, k: &F::K, v: &F::V) -> Result<(), Error> {
+ unimplemented!();
+
+ let hash = k.hash();
+ let who = self.system.members.read().await
+ .walk_ring(&hash, self.param.replication_factor);
+
+ let msg = rmp_serde::encode::to_vec_named(&TableRPC::<F>::Update(k.clone(), v.clone()))?;
+ rpc_try_call_many(self.system.clone(),
+ &who[..],
+ &Message::TableRPC(self.name.to_string(), msg),
+ self.param.write_quorum,
+ self.param.timeout).await?;
+ Ok(())
+ }
+
+ async fn handle(&self, msg: TableRPC<F>) -> Result<TableRPC<F>, Error> {
+ unimplemented!()
+ }
+}
diff --git a/src/version_table.rs b/src/version_table.rs
new file mode 100644
index 00000000..d857ac12
--- /dev/null
+++ b/src/version_table.rs
@@ -0,0 +1,59 @@
+use std::sync::Arc;
+use serde::{Serialize, Deserialize};
+use async_trait::async_trait;
+
+use crate::data::*;
+use crate::table::*;
+use crate::membership::System;
+
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct VersionMetaKey {
+ pub bucket: String,
+ pub key: String,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct VersionMetaValue {
+ pub timestamp: u64,
+ pub uuid: UUID,
+
+ pub mime_type: String,
+ pub size: u64,
+ pub is_complete: bool,
+
+ pub data: VersionData,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub enum VersionData {
+ DeleteMarker,
+ Inline(#[serde(with="serde_bytes")] Vec<u8>),
+ FirstBlock(Hash),
+}
+
+pub struct VersionTable {
+ system: Arc<System>,
+}
+
+impl KeyHash for VersionMetaKey {
+ fn hash(&self) -> Hash {
+ hash(self.bucket.as_bytes())
+ }
+}
+
+impl ValueMerge for VersionMetaValue {
+ fn merge(&mut self, other: &Self) {
+ unimplemented!()
+ }
+}
+
+#[async_trait]
+impl TableFormat for VersionTable {
+ type K = VersionMetaKey;
+ type V = VersionMetaValue;
+
+ async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V) {
+ unimplemented!()
+ }
+}