aboutsummaryrefslogtreecommitdiff
path: root/src/store
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2020-04-24 10:10:01 +0000
committerAlex Auvolat <alex@adnab.me>2020-04-24 10:10:01 +0000
commitd8f5e643bcee95969b59c309809710a38b0661e3 (patch)
tree9bb179f351f60fc0396db731cb8ca0fe25dde17e /src/store
parent51fb3799a153a0db990fc74a37563ec612e20fc2 (diff)
downloadgarage-d8f5e643bcee95969b59c309809710a38b0661e3.tar.gz
garage-d8f5e643bcee95969b59c309809710a38b0661e3.zip
Split code for modular compilation
Diffstat (limited to 'src/store')
-rw-r--r--src/store/block.rs506
-rw-r--r--src/store/block_ref_table.rs68
-rw-r--r--src/store/bucket_table.rs121
-rw-r--r--src/store/key_table.rs154
-rw-r--r--src/store/mod.rs7
-rw-r--r--src/store/object_table.rs165
-rw-r--r--src/store/repair.rs184
-rw-r--r--src/store/version_table.rs131
8 files changed, 0 insertions, 1336 deletions
diff --git a/src/store/block.rs b/src/store/block.rs
deleted file mode 100644
index e2ef32e0..00000000
--- a/src/store/block.rs
+++ /dev/null
@@ -1,506 +0,0 @@
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::time::Duration;
-
-use arc_swap::ArcSwapOption;
-use futures::future::*;
-use futures::select;
-use futures::stream::*;
-use serde::{Deserialize, Serialize};
-use tokio::fs;
-use tokio::prelude::*;
-use tokio::sync::{watch, Mutex, Notify};
-
-use crate::data;
-use crate::data::*;
-use crate::error::Error;
-
-use crate::rpc::membership::System;
-use crate::rpc::rpc_client::*;
-use crate::rpc::rpc_server::*;
-
-use crate::table::table_sharded::TableShardedReplication;
-use crate::table::TableReplication;
-
-use crate::store::block_ref_table::*;
-
-use crate::server::Garage;
-
-pub const INLINE_THRESHOLD: usize = 3072;
-
-const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(42);
-const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(5);
-const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
-
-#[derive(Debug, Serialize, Deserialize)]
-pub enum Message {
- Ok,
- GetBlock(Hash),
- PutBlock(PutBlockMessage),
- NeedBlockQuery(Hash),
- NeedBlockReply(bool),
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct PutBlockMessage {
- pub hash: Hash,
-
- #[serde(with = "serde_bytes")]
- pub data: Vec<u8>,
-}
-
-impl RpcMessage for Message {}
-
-pub struct BlockManager {
- pub replication: TableShardedReplication,
- pub data_dir: PathBuf,
- pub data_dir_lock: Mutex<()>,
-
- pub rc: sled::Tree,
-
- pub resync_queue: sled::Tree,
- pub resync_notify: Notify,
-
- pub system: Arc<System>,
- rpc_client: Arc<RpcClient<Message>>,
- pub garage: ArcSwapOption<Garage>,
-}
-
-impl BlockManager {
- pub fn new(
- db: &sled::Db,
- data_dir: PathBuf,
- replication: TableShardedReplication,
- system: Arc<System>,
- rpc_server: &mut RpcServer,
- ) -> Arc<Self> {
- let rc = db
- .open_tree("block_local_rc")
- .expect("Unable to open block_local_rc tree");
- rc.set_merge_operator(rc_merge);
-
- let resync_queue = db
- .open_tree("block_local_resync_queue")
- .expect("Unable to open block_local_resync_queue tree");
-
- let rpc_path = "block_manager";
- let rpc_client = system.rpc_client::<Message>(rpc_path);
-
- let block_manager = Arc::new(Self {
- replication,
- data_dir,
- data_dir_lock: Mutex::new(()),
- rc,
- resync_queue,
- resync_notify: Notify::new(),
- system,
- rpc_client,
- garage: ArcSwapOption::from(None),
- });
- block_manager
- .clone()
- .register_handler(rpc_server, rpc_path.into());
- block_manager
- }
-
- fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
- let self2 = self.clone();
- rpc_server.add_handler::<Message, _, _>(path, move |msg, _addr| {
- let self2 = self2.clone();
- async move { self2.handle(&msg).await }
- });
-
- let self2 = self.clone();
- self.rpc_client
- .set_local_handler(self.system.id, move |msg| {
- let self2 = self2.clone();
- async move { self2.handle(&msg).await }
- });
- }
-
- async fn handle(self: Arc<Self>, msg: &Message) -> Result<Message, Error> {
- match msg {
- Message::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
- Message::GetBlock(h) => self.read_block(h).await,
- Message::NeedBlockQuery(h) => self.need_block(h).await.map(Message::NeedBlockReply),
- _ => Err(Error::BadRequest(format!("Unexpected RPC message"))),
- }
- }
-
- pub async fn spawn_background_worker(self: Arc<Self>) {
- // Launch 2 simultaneous workers for background resync loop preprocessing
- for i in 0..2usize {
- let bm2 = self.clone();
- let background = self.system.background.clone();
- tokio::spawn(async move {
- tokio::time::delay_for(Duration::from_secs(10)).await;
- background
- .spawn_worker(format!("block resync worker {}", i), move |must_exit| {
- bm2.resync_loop(must_exit)
- })
- .await;
- });
- }
- }
-
- pub async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<Message, Error> {
- let _lock = self.data_dir_lock.lock().await;
-
- let mut path = self.block_dir(hash);
- fs::create_dir_all(&path).await?;
-
- path.push(hex::encode(hash));
- if fs::metadata(&path).await.is_ok() {
- return Ok(Message::Ok);
- }
-
- let mut f = fs::File::create(path).await?;
- f.write_all(data).await?;
- drop(f);
-
- Ok(Message::Ok)
- }
-
- pub async fn read_block(&self, hash: &Hash) -> Result<Message, Error> {
- let path = self.block_path(hash);
-
- let mut f = match fs::File::open(&path).await {
- Ok(f) => f,
- Err(e) => {
- // Not found but maybe we should have had it ??
- self.put_to_resync(hash, 0)?;
- return Err(Into::into(e));
- }
- };
- let mut data = vec![];
- f.read_to_end(&mut data).await?;
- drop(f);
-
- if data::hash(&data[..]) != *hash {
- let _lock = self.data_dir_lock.lock().await;
- warn!("Block {:?} is corrupted. Deleting and resyncing.", hash);
- fs::remove_file(path).await?;
- self.put_to_resync(&hash, 0)?;
- return Err(Error::CorruptData(*hash));
- }
-
- Ok(Message::PutBlock(PutBlockMessage { hash: *hash, data }))
- }
-
- pub async fn need_block(&self, hash: &Hash) -> Result<bool, Error> {
- let needed = self
- .rc
- .get(hash.as_ref())?
- .map(|x| u64_from_bytes(x.as_ref()) > 0)
- .unwrap_or(false);
- if needed {
- let path = self.block_path(hash);
- let exists = fs::metadata(&path).await.is_ok();
- Ok(!exists)
- } else {
- Ok(false)
- }
- }
-
- fn block_dir(&self, hash: &Hash) -> PathBuf {
- let mut path = self.data_dir.clone();
- path.push(hex::encode(&hash.as_slice()[0..1]));
- path.push(hex::encode(&hash.as_slice()[1..2]));
- path
- }
- fn block_path(&self, hash: &Hash) -> PathBuf {
- let mut path = self.block_dir(hash);
- path.push(hex::encode(hash.as_ref()));
- path
- }
-
- pub fn block_incref(&self, hash: &Hash) -> Result<(), Error> {
- let old_rc = self.rc.get(&hash)?;
- self.rc.merge(&hash, vec![1])?;
- if old_rc.map(|x| u64_from_bytes(&x[..]) == 0).unwrap_or(true) {
- self.put_to_resync(&hash, BLOCK_RW_TIMEOUT.as_millis() as u64)?;
- }
- Ok(())
- }
-
- pub fn block_decref(&self, hash: &Hash) -> Result<(), Error> {
- let new_rc = self.rc.merge(&hash, vec![0])?;
- if new_rc.map(|x| u64_from_bytes(&x[..]) == 0).unwrap_or(true) {
- self.put_to_resync(&hash, 0)?;
- }
- Ok(())
- }
-
- fn put_to_resync(&self, hash: &Hash, delay_millis: u64) -> Result<(), Error> {
- let when = now_msec() + delay_millis;
- trace!("Put resync_queue: {} {:?}", when, hash);
- let mut key = u64::to_be_bytes(when).to_vec();
- key.extend(hash.as_ref());
- self.resync_queue.insert(key, hash.as_ref())?;
- self.resync_notify.notify();
- Ok(())
- }
-
- async fn resync_loop(
- self: Arc<Self>,
- mut must_exit: watch::Receiver<bool>,
- ) -> Result<(), Error> {
- let mut n_failures = 0usize;
- while !*must_exit.borrow() {
- if let Some((time_bytes, hash_bytes)) = self.resync_queue.pop_min()? {
- let time_msec = u64_from_bytes(&time_bytes[0..8]);
- let now = now_msec();
- if now >= time_msec {
- let mut hash = [0u8; 32];
- hash.copy_from_slice(hash_bytes.as_ref());
- let hash = Hash::from(hash);
-
- if let Err(e) = self.resync_iter(&hash).await {
- warn!("Failed to resync block {:?}, retrying later: {}", hash, e);
- self.put_to_resync(&hash, RESYNC_RETRY_TIMEOUT.as_millis() as u64)?;
- n_failures += 1;
- if n_failures >= 10 {
- warn!("Too many resync failures, throttling.");
- tokio::time::delay_for(Duration::from_secs(1)).await;
- }
- } else {
- n_failures = 0;
- }
- } else {
- self.resync_queue.insert(time_bytes, hash_bytes)?;
- let delay = tokio::time::delay_for(Duration::from_millis(time_msec - now));
- select! {
- _ = delay.fuse() => (),
- _ = self.resync_notify.notified().fuse() => (),
- _ = must_exit.recv().fuse() => (),
- }
- }
- } else {
- select! {
- _ = self.resync_notify.notified().fuse() => (),
- _ = must_exit.recv().fuse() => (),
- }
- }
- }
- Ok(())
- }
-
- async fn resync_iter(&self, hash: &Hash) -> Result<(), Error> {
- let path = self.block_path(hash);
-
- let exists = fs::metadata(&path).await.is_ok();
- let needed = self
- .rc
- .get(hash.as_ref())?
- .map(|x| u64_from_bytes(x.as_ref()) > 0)
- .unwrap_or(false);
-
- if exists != needed {
- info!(
- "Resync block {:?}: exists {}, needed {}",
- hash, exists, needed
- );
- }
-
- if exists && !needed {
- let garage = self.garage.load_full().unwrap();
- let active_refs = garage
- .block_ref_table
- .get_range(&hash, None, Some(()), 1)
- .await?;
- let needed_by_others = !active_refs.is_empty();
- if needed_by_others {
- let ring = self.system.ring.borrow().clone();
- let who = self.replication.replication_nodes(&hash, &ring);
- let msg = Arc::new(Message::NeedBlockQuery(*hash));
- let who_needs_fut = who.iter().map(|to| {
- self.rpc_client
- .call_arc(*to, msg.clone(), NEED_BLOCK_QUERY_TIMEOUT)
- });
- let who_needs = join_all(who_needs_fut).await;
-
- let mut need_nodes = vec![];
- for (node, needed) in who.into_iter().zip(who_needs.iter()) {
- match needed {
- Ok(Message::NeedBlockReply(needed)) => {
- if *needed {
- need_nodes.push(node);
- }
- }
- Err(e) => {
- return Err(Error::Message(format!(
- "Should delete block, but unable to confirm that all other nodes that need it have it: {}",
- e
- )));
- }
- Ok(_) => {
- return Err(Error::Message(format!(
- "Unexpected response to NeedBlockQuery RPC"
- )));
- }
- }
- }
-
- if need_nodes.len() > 0 {
- let put_block_message = self.read_block(hash).await?;
- self.rpc_client
- .try_call_many(
- &need_nodes[..],
- put_block_message,
- RequestStrategy::with_quorum(need_nodes.len())
- .with_timeout(BLOCK_RW_TIMEOUT),
- )
- .await?;
- }
- }
- fs::remove_file(path).await?;
- self.resync_queue.remove(&hash)?;
- }
-
- if needed && !exists {
- // TODO find a way to not do this if they are sending it to us
- // Let's suppose this isn't an issue for now with the BLOCK_RW_TIMEOUT delay
- // between the RC being incremented and this part being called.
- let block_data = self.rpc_get_block(&hash).await?;
- self.write_block(hash, &block_data[..]).await?;
- }
-
- Ok(())
- }
-
- pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
- let who = self.replication.read_nodes(&hash, &self.system);
- let resps = self
- .rpc_client
- .try_call_many(
- &who[..],
- Message::GetBlock(*hash),
- RequestStrategy::with_quorum(1)
- .with_timeout(BLOCK_RW_TIMEOUT)
- .interrupt_after_quorum(true),
- )
- .await?;
-
- for resp in resps {
- if let Message::PutBlock(msg) = resp {
- return Ok(msg.data);
- }
- }
- Err(Error::Message(format!(
- "Unable to read block {:?}: no valid blocks returned",
- hash
- )))
- }
-
- pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
- let who = self.replication.write_nodes(&hash, &self.system);
- self.rpc_client
- .try_call_many(
- &who[..],
- Message::PutBlock(PutBlockMessage { hash, data }),
- RequestStrategy::with_quorum(self.replication.write_quorum())
- .with_timeout(BLOCK_RW_TIMEOUT),
- )
- .await?;
- Ok(())
- }
-
- pub async fn repair_data_store(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
- // 1. Repair blocks from RC table
- let garage = self.garage.load_full().unwrap();
- let mut last_hash = None;
- let mut i = 0usize;
- for entry in garage.block_ref_table.store.iter() {
- let (_k, v_bytes) = entry?;
- let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(v_bytes.as_ref())?;
- if Some(&block_ref.block) == last_hash.as_ref() {
- continue;
- }
- if !block_ref.deleted {
- last_hash = Some(block_ref.block);
- self.put_to_resync(&block_ref.block, 0)?;
- }
- i += 1;
- if i & 0xFF == 0 && *must_exit.borrow() {
- return Ok(());
- }
- }
-
- // 2. Repair blocks actually on disk
- let mut ls_data_dir = fs::read_dir(&self.data_dir).await?;
- while let Some(data_dir_ent) = ls_data_dir.next().await {
- let data_dir_ent = data_dir_ent?;
- let dir_name = data_dir_ent.file_name();
- let dir_name = match dir_name.into_string() {
- Ok(x) => x,
- Err(_) => continue,
- };
- if dir_name.len() != 2 || hex::decode(&dir_name).is_err() {
- continue;
- }
-
- let mut ls_data_dir_2 = match fs::read_dir(data_dir_ent.path()).await {
- Err(e) => {
- warn!(
- "Warning: could not list dir {:?}: {}",
- data_dir_ent.path().to_str(),
- e
- );
- continue;
- }
- Ok(x) => x,
- };
- while let Some(file) = ls_data_dir_2.next().await {
- let file = file?;
- let file_name = file.file_name();
- let file_name = match file_name.into_string() {
- Ok(x) => x,
- Err(_) => continue,
- };
- if file_name.len() != 64 {
- continue;
- }
- let hash_bytes = match hex::decode(&file_name) {
- Ok(h) => h,
- Err(_) => continue,
- };
- let mut hash = [0u8; 32];
- hash.copy_from_slice(&hash_bytes[..]);
- self.put_to_resync(&hash.into(), 0)?;
-
- if *must_exit.borrow() {
- return Ok(());
- }
- }
- }
- Ok(())
- }
-}
-
-fn u64_from_bytes(bytes: &[u8]) -> u64 {
- assert!(bytes.len() == 8);
- let mut x8 = [0u8; 8];
- x8.copy_from_slice(bytes);
- u64::from_be_bytes(x8)
-}
-
-fn rc_merge(_key: &[u8], old: Option<&[u8]>, new: &[u8]) -> Option<Vec<u8>> {
- let old = old.map(u64_from_bytes).unwrap_or(0);
- assert!(new.len() == 1);
- let new = match new[0] {
- 0 => {
- if old > 0 {
- old - 1
- } else {
- 0
- }
- }
- 1 => old + 1,
- _ => unreachable!(),
- };
- if new == 0 {
- None
- } else {
- Some(u64::to_be_bytes(new).to_vec())
- }
-}
diff --git a/src/store/block_ref_table.rs b/src/store/block_ref_table.rs
deleted file mode 100644
index c8a2a2a1..00000000
--- a/src/store/block_ref_table.rs
+++ /dev/null
@@ -1,68 +0,0 @@
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-use std::sync::Arc;
-
-use crate::background::*;
-use crate::data::*;
-use crate::error::Error;
-
-use crate::table::*;
-
-use crate::store::block::*;
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct BlockRef {
- // Primary key
- pub block: Hash,
-
- // Sort key
- pub version: UUID,
-
- // Keep track of deleted status
- pub deleted: bool,
-}
-
-impl Entry<Hash, UUID> for BlockRef {
- fn partition_key(&self) -> &Hash {
- &self.block
- }
- fn sort_key(&self) -> &UUID {
- &self.version
- }
-
- fn merge(&mut self, other: &Self) {
- if other.deleted {
- self.deleted = true;
- }
- }
-}
-
-pub struct BlockRefTable {
- pub background: Arc<BackgroundRunner>,
- pub block_manager: Arc<BlockManager>,
-}
-
-#[async_trait]
-impl TableSchema for BlockRefTable {
- type P = Hash;
- type S = UUID;
- type E = BlockRef;
- type Filter = ();
-
- async fn updated(&self, old: Option<Self::E>, new: Option<Self::E>) -> Result<(), Error> {
- let block = &old.as_ref().or(new.as_ref()).unwrap().block;
- let was_before = old.as_ref().map(|x| !x.deleted).unwrap_or(false);
- let is_after = new.as_ref().map(|x| !x.deleted).unwrap_or(false);
- if is_after && !was_before {
- self.block_manager.block_incref(block)?;
- }
- if was_before && !is_after {
- self.block_manager.block_decref(block)?;
- }
- Ok(())
- }
-
- fn matches_filter(entry: &Self::E, _filter: &Self::Filter) -> bool {
- !entry.deleted
- }
-}
diff --git a/src/store/bucket_table.rs b/src/store/bucket_table.rs
deleted file mode 100644
index a9bdaa70..00000000
--- a/src/store/bucket_table.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-
-use crate::error::Error;
-use crate::table::*;
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct Bucket {
- // Primary key
- pub name: String,
-
- // Timestamp and deletion
- // Upon version increment, all info is replaced
- pub timestamp: u64,
- pub deleted: bool,
-
- // Authorized keys
- authorized_keys: Vec<AllowedKey>,
-}
-
-impl Bucket {
- pub fn new(
- name: String,
- timestamp: u64,
- deleted: bool,
- authorized_keys: Vec<AllowedKey>,
- ) -> Self {
- let mut ret = Bucket {
- name,
- timestamp,
- deleted,
- authorized_keys: vec![],
- };
- for key in authorized_keys {
- ret.add_key(key)
- .expect("Duplicate AllowedKey in Bucket constructor");
- }
- ret
- }
- /// Add a key only if it is not already present
- pub fn add_key(&mut self, key: AllowedKey) -> Result<(), ()> {
- match self
- .authorized_keys
- .binary_search_by(|k| k.key_id.cmp(&key.key_id))
- {
- Err(i) => {
- self.authorized_keys.insert(i, key);
- Ok(())
- }
- Ok(_) => Err(()),
- }
- }
- pub fn authorized_keys(&self) -> &[AllowedKey] {
- &self.authorized_keys[..]
- }
- pub fn clear_keys(&mut self) {
- self.authorized_keys.clear();
- }
-}
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct AllowedKey {
- pub key_id: String,
- pub timestamp: u64,
- pub allow_read: bool,
- pub allow_write: bool,
-}
-
-impl Entry<EmptyKey, String> for Bucket {
- fn partition_key(&self) -> &EmptyKey {
- &EmptyKey
- }
- fn sort_key(&self) -> &String {
- &self.name
- }
-
- fn merge(&mut self, other: &Self) {
- if other.timestamp < self.timestamp {
- *self = other.clone();
- return;
- }
- if self.timestamp > other.timestamp || self.deleted {
- return;
- }
-
- for ak in other.authorized_keys.iter() {
- match self
- .authorized_keys
- .binary_search_by(|our_ak| our_ak.key_id.cmp(&ak.key_id))
- {
- Ok(i) => {
- let our_ak = &mut self.authorized_keys[i];
- if ak.timestamp > our_ak.timestamp {
- *our_ak = ak.clone();
- }
- }
- Err(i) => {
- self.authorized_keys.insert(i, ak.clone());
- }
- }
- }
- }
-}
-
-pub struct BucketTable;
-
-#[async_trait]
-impl TableSchema for BucketTable {
- type P = EmptyKey;
- type S = String;
- type E = Bucket;
- type Filter = ();
-
- async fn updated(&self, _old: Option<Self::E>, _new: Option<Self::E>) -> Result<(), Error> {
- Ok(())
- }
-
- fn matches_filter(entry: &Self::E, _filter: &Self::Filter) -> bool {
- !entry.deleted
- }
-}
diff --git a/src/store/key_table.rs b/src/store/key_table.rs
deleted file mode 100644
index add6ab02..00000000
--- a/src/store/key_table.rs
+++ /dev/null
@@ -1,154 +0,0 @@
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-
-use crate::data::*;
-use crate::error::Error;
-use crate::table::*;
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct Key {
- // Primary key
- pub key_id: String,
-
- // Associated secret key (immutable)
- pub secret_key: String,
-
- // Name
- pub name: String,
- pub name_timestamp: u64,
-
- // Deletion
- pub deleted: bool,
-
- // Authorized keys
- authorized_buckets: Vec<AllowedBucket>,
-}
-
-impl Key {
- pub fn new(name: String, buckets: Vec<AllowedBucket>) -> Self {
- let key_id = format!("GK{}", hex::encode(&rand::random::<[u8; 12]>()[..]));
- let secret_key = hex::encode(&rand::random::<[u8; 32]>()[..]);
- let mut ret = Self {
- key_id,
- secret_key,
- name,
- name_timestamp: now_msec(),
- deleted: false,
- authorized_buckets: vec![],
- };
- for b in buckets {
- ret.add_bucket(b)
- .expect("Duplicate AllowedBucket in Key constructor");
- }
- ret
- }
- pub fn delete(key_id: String) -> Self {
- Self {
- key_id,
- secret_key: "".into(),
- name: "".into(),
- name_timestamp: now_msec(),
- deleted: true,
- authorized_buckets: vec![],
- }
- }
- /// Add an authorized bucket, only if it wasn't there before
- pub fn add_bucket(&mut self, new: AllowedBucket) -> Result<(), ()> {
- match self
- .authorized_buckets
- .binary_search_by(|b| b.bucket.cmp(&new.bucket))
- {
- Err(i) => {
- self.authorized_buckets.insert(i, new);
- Ok(())
- }
- Ok(_) => Err(()),
- }
- }
- pub fn authorized_buckets(&self) -> &[AllowedBucket] {
- &self.authorized_buckets[..]
- }
- pub fn clear_buckets(&mut self) {
- self.authorized_buckets.clear();
- }
- pub fn allow_read(&self, bucket: &str) -> bool {
- self.authorized_buckets
- .iter()
- .find(|x| x.bucket.as_str() == bucket)
- .map(|x| x.allow_read)
- .unwrap_or(false)
- }
- pub fn allow_write(&self, bucket: &str) -> bool {
- self.authorized_buckets
- .iter()
- .find(|x| x.bucket.as_str() == bucket)
- .map(|x| x.allow_write)
- .unwrap_or(false)
- }
-}
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct AllowedBucket {
- pub bucket: String,
- pub timestamp: u64,
- pub allow_read: bool,
- pub allow_write: bool,
-}
-
-impl Entry<EmptyKey, String> for Key {
- fn partition_key(&self) -> &EmptyKey {
- &EmptyKey
- }
- fn sort_key(&self) -> &String {
- &self.key_id
- }
-
- fn merge(&mut self, other: &Self) {
- if other.deleted {
- self.deleted = true;
- }
- if self.deleted {
- self.authorized_buckets.clear();
- return;
- }
- if other.name_timestamp > self.name_timestamp {
- self.name_timestamp = other.name_timestamp;
- self.name = other.name.clone();
- }
-
- for ab in other.authorized_buckets.iter() {
- match self
- .authorized_buckets
- .binary_search_by(|our_ab| our_ab.bucket.cmp(&ab.bucket))
- {
- Ok(i) => {
- let our_ab = &mut self.authorized_buckets[i];
- if ab.timestamp > our_ab.timestamp {
- *our_ab = ab.clone();
- }
- }
- Err(i) => {
- self.authorized_buckets.insert(i, ab.clone());
- }
- }
- }
- }
-}
-
-pub struct KeyTable;
-
-#[async_trait]
-impl TableSchema for KeyTable {
- type P = EmptyKey;
- type S = String;
- type E = Key;
- type Filter = ();
-
- async fn updated(&self, _old: Option<Self::E>, _new: Option<Self::E>) -> Result<(), Error> {
- Ok(())
- }
-
- fn matches_filter(entry: &Self::E, _filter: &Self::Filter) -> bool {
- !entry.deleted
- }
-}
diff --git a/src/store/mod.rs b/src/store/mod.rs
deleted file mode 100644
index 962264c4..00000000
--- a/src/store/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-pub mod block;
-pub mod block_ref_table;
-pub mod bucket_table;
-pub mod key_table;
-pub mod object_table;
-pub mod repair;
-pub mod version_table;
diff --git a/src/store/object_table.rs b/src/store/object_table.rs
deleted file mode 100644
index f329a7f4..00000000
--- a/src/store/object_table.rs
+++ /dev/null
@@ -1,165 +0,0 @@
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-use std::sync::Arc;
-
-use crate::background::BackgroundRunner;
-use crate::data::*;
-use crate::error::Error;
-
-use crate::table::table_sharded::*;
-use crate::table::*;
-
-use crate::store::version_table::*;
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct Object {
- // Primary key
- pub bucket: String,
-
- // Sort key
- pub key: String,
-
- // Data
- versions: Vec<ObjectVersion>,
-}
-
-impl Object {
- pub fn new(bucket: String, key: String, versions: Vec<ObjectVersion>) -> Self {
- let mut ret = Self {
- bucket,
- key,
- versions: vec![],
- };
- for v in versions {
- ret.add_version(v)
- .expect("Twice the same ObjectVersion in Object constructor");
- }
- ret
- }
- /// Adds a version if it wasn't already present
- pub fn add_version(&mut self, new: ObjectVersion) -> Result<(), ()> {
- match self
- .versions
- .binary_search_by(|v| v.cmp_key().cmp(&new.cmp_key()))
- {
- Err(i) => {
- self.versions.insert(i, new);
- Ok(())
- }
- Ok(_) => Err(()),
- }
- }
- pub fn versions(&self) -> &[ObjectVersion] {
- &self.versions[..]
- }
-}
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct ObjectVersion {
- pub uuid: UUID,
- pub timestamp: u64,
-
- pub mime_type: String,
- pub size: u64,
- pub is_complete: bool,
-
- pub data: ObjectVersionData,
-}
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub enum ObjectVersionData {
- DeleteMarker,
- Inline(#[serde(with = "serde_bytes")] Vec<u8>),
- FirstBlock(Hash),
-}
-
-impl ObjectVersion {
- fn cmp_key(&self) -> (u64, &UUID) {
- (self.timestamp, &self.uuid)
- }
-}
-
-impl Entry<String, String> for Object {
- fn partition_key(&self) -> &String {
- &self.bucket
- }
- fn sort_key(&self) -> &String {
- &self.key
- }
-
- fn merge(&mut self, other: &Self) {
- for other_v in other.versions.iter() {
- match self
- .versions
- .binary_search_by(|v| v.cmp_key().cmp(&other_v.cmp_key()))
- {
- Ok(i) => {
- let mut v = &mut self.versions[i];
- if other_v.size > v.size {
- v.size = other_v.size;
- }
- if other_v.is_complete && !v.is_complete {
- v.is_complete = true;
- }
- }
- Err(i) => {
- self.versions.insert(i, other_v.clone());
- }
- }
- }
- let last_complete = self
- .versions
- .iter()
- .enumerate()
- .rev()
- .filter(|(_, v)| v.is_complete)
- .next()
- .map(|(vi, _)| vi);
-
- if let Some(last_vi) = last_complete {
- self.versions = self.versions.drain(last_vi..).collect::<Vec<_>>();
- }
- }
-}
-
-pub struct ObjectTable {
- pub background: Arc<BackgroundRunner>,
- pub version_table: Arc<Table<VersionTable, TableShardedReplication>>,
-}
-
-#[async_trait]
-impl TableSchema for ObjectTable {
- type P = String;
- type S = String;
- type E = Object;
- type Filter = ();
-
- async fn updated(&self, old: Option<Self::E>, new: Option<Self::E>) -> Result<(), Error> {
- let version_table = self.version_table.clone();
- if let (Some(old_v), Some(new_v)) = (old, new) {
- // Propagate deletion of old versions
- for v in old_v.versions.iter() {
- if new_v
- .versions
- .binary_search_by(|nv| nv.cmp_key().cmp(&v.cmp_key()))
- .is_err()
- {
- let deleted_version = Version::new(
- v.uuid,
- old_v.bucket.clone(),
- old_v.key.clone(),
- true,
- vec![],
- );
- version_table.insert(&deleted_version).await?;
- }
- }
- }
- Ok(())
- }
-
- fn matches_filter(_entry: &Self::E, _filter: &Self::Filter) -> bool {
- // TODO
- true
- }
-}
diff --git a/src/store/repair.rs b/src/store/repair.rs
deleted file mode 100644
index 39c57fc1..00000000
--- a/src/store/repair.rs
+++ /dev/null
@@ -1,184 +0,0 @@
-use std::sync::Arc;
-
-use tokio::sync::watch;
-
-use crate::error::Error;
-use crate::server::Garage;
-use crate::table::*;
-
-use crate::store::block_ref_table::*;
-use crate::store::version_table::*;
-
-use crate::*;
-
-pub struct Repair {
- pub garage: Arc<Garage>,
-}
-
-impl Repair {
- pub async fn repair_worker(
- &self,
- opt: RepairOpt,
- must_exit: watch::Receiver<bool>,
- ) -> Result<(), Error> {
- let todo = |x| opt.what.as_ref().map(|y| *y == x).unwrap_or(true);
-
- if todo(RepairWhat::Tables) {
- info!("Launching a full sync of tables");
- self.garage
- .bucket_table
- .syncer
- .load_full()
- .unwrap()
- .add_full_scan()
- .await;
- self.garage
- .object_table
- .syncer
- .load_full()
- .unwrap()
- .add_full_scan()
- .await;
- self.garage
- .version_table
- .syncer
- .load_full()
- .unwrap()
- .add_full_scan()
- .await;
- self.garage
- .block_ref_table
- .syncer
- .load_full()
- .unwrap()
- .add_full_scan()
- .await;
- }
-
- // TODO: wait for full sync to finish before proceeding to the rest?
-
- if todo(RepairWhat::Versions) {
- info!("Repairing the versions table");
- self.repair_versions(&must_exit).await?;
- }
-
- if todo(RepairWhat::BlockRefs) {
- info!("Repairing the block refs table");
- self.repair_block_ref(&must_exit).await?;
- }
-
- if opt.what.is_none() {
- info!("Repairing the RC");
- self.repair_rc(&must_exit).await?;
- }
-
- if todo(RepairWhat::Blocks) {
- info!("Repairing the stored blocks");
- self.garage
- .block_manager
- .repair_data_store(&must_exit)
- .await?;
- }
-
- Ok(())
- }
-
- async fn repair_versions(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
- let mut pos = vec![];
-
- while let Some((item_key, item_bytes)) = self.garage.version_table.store.get_gt(&pos)? {
- pos = item_key.to_vec();
-
- let version = rmp_serde::decode::from_read_ref::<_, Version>(item_bytes.as_ref())?;
- if version.deleted {
- continue;
- }
- let object = self
- .garage
- .object_table
- .get(&version.bucket, &version.key)
- .await?;
- let version_exists = match object {
- Some(o) => o.versions().iter().any(|x| x.uuid == version.uuid),
- None => {
- warn!(
- "Repair versions: object for version {:?} not found",
- version
- );
- false
- }
- };
- if !version_exists {
- info!("Repair versions: marking version as deleted: {:?}", version);
- self.garage
- .version_table
- .insert(&Version::new(
- version.uuid,
- version.bucket,
- version.key,
- true,
- vec![],
- ))
- .await?;
- }
-
- if *must_exit.borrow() {
- break;
- }
- }
- Ok(())
- }
-
- async fn repair_block_ref(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
- let mut pos = vec![];
-
- while let Some((item_key, item_bytes)) = self.garage.block_ref_table.store.get_gt(&pos)? {
- pos = item_key.to_vec();
-
- let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(item_bytes.as_ref())?;
- if block_ref.deleted {
- continue;
- }
- let version = self
- .garage
- .version_table
- .get(&block_ref.version, &EmptyKey)
- .await?;
- let ref_exists = match version {
- Some(v) => !v.deleted,
- None => {
- warn!(
- "Block ref repair: version for block ref {:?} not found",
- block_ref
- );
- false
- }
- };
- if !ref_exists {
- info!(
- "Repair block ref: marking block_ref as deleted: {:?}",
- block_ref
- );
- self.garage
- .block_ref_table
- .insert(&BlockRef {
- block: block_ref.block,
- version: block_ref.version,
- deleted: true,
- })
- .await?;
- }
-
- if *must_exit.borrow() {
- break;
- }
- }
- Ok(())
- }
-
- async fn repair_rc(&self, _must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
- // TODO
- warn!("repair_rc: not implemented");
- Ok(())
- }
-}
diff --git a/src/store/version_table.rs b/src/store/version_table.rs
deleted file mode 100644
index 6d304cda..00000000
--- a/src/store/version_table.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-use async_trait::async_trait;
-use serde::{Deserialize, Serialize};
-use std::sync::Arc;
-
-use crate::background::BackgroundRunner;
-use crate::data::*;
-use crate::error::Error;
-
-use crate::table::table_sharded::*;
-use crate::table::*;
-
-use crate::store::block_ref_table::*;
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct Version {
- // Primary key
- pub uuid: UUID,
-
- // Actual data: the blocks for this version
- pub deleted: bool,
- blocks: Vec<VersionBlock>,
-
- // Back link to bucket+key so that we can figure if
- // this was deleted later on
- pub bucket: String,
- pub key: String,
-}
-
-impl Version {
- pub fn new(
- uuid: UUID,
- bucket: String,
- key: String,
- deleted: bool,
- blocks: Vec<VersionBlock>,
- ) -> Self {
- let mut ret = Self {
- uuid,
- deleted,
- blocks: vec![],
- bucket,
- key,
- };
- for b in blocks {
- ret.add_block(b)
- .expect("Twice the same VersionBlock in Version constructor");
- }
- ret
- }
- /// Adds a block if it wasn't already present
- pub fn add_block(&mut self, new: VersionBlock) -> Result<(), ()> {
- match self.blocks.binary_search_by(|b| b.offset.cmp(&new.offset)) {
- Err(i) => {
- self.blocks.insert(i, new);
- Ok(())
- }
- Ok(_) => Err(()),
- }
- }
- pub fn blocks(&self) -> &[VersionBlock] {
- &self.blocks[..]
- }
-}
-
-#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
-pub struct VersionBlock {
- pub offset: u64,
- pub hash: Hash,
-}
-
-impl Entry<Hash, EmptyKey> for Version {
- fn partition_key(&self) -> &Hash {
- &self.uuid
- }
- fn sort_key(&self) -> &EmptyKey {
- &EmptyKey
- }
-
- fn merge(&mut self, other: &Self) {
- if other.deleted {
- self.deleted = true;
- self.blocks.clear();
- } else if !self.deleted {
- for bi in other.blocks.iter() {
- match self.blocks.binary_search_by(|x| x.offset.cmp(&bi.offset)) {
- Ok(_) => (),
- Err(pos) => {
- self.blocks.insert(pos, bi.clone());
- }
- }
- }
- }
- }
-}
-
-pub struct VersionTable {
- pub background: Arc<BackgroundRunner>,
- pub block_ref_table: Arc<Table<BlockRefTable, TableShardedReplication>>,
-}
-
-#[async_trait]
-impl TableSchema for VersionTable {
- type P = Hash;
- type S = EmptyKey;
- type E = Version;
- type Filter = ();
-
- async fn updated(&self, old: Option<Self::E>, new: Option<Self::E>) -> Result<(), Error> {
- let block_ref_table = self.block_ref_table.clone();
- if let (Some(old_v), Some(new_v)) = (old, new) {
- // Propagate deletion of version blocks
- if new_v.deleted && !old_v.deleted {
- let deleted_block_refs = old_v
- .blocks
- .iter()
- .map(|vb| BlockRef {
- block: vb.hash,
- version: old_v.uuid,
- deleted: true,
- })
- .collect::<Vec<_>>();
- block_ref_table.insert_many(&deleted_block_refs[..]).await?;
- }
- }
- Ok(())
- }
-
- fn matches_filter(entry: &Self::E, _filter: &Self::Filter) -> bool {
- !entry.deleted
- }
-}