diff options
author | Alex Auvolat <alex@adnab.me> | 2023-01-04 11:34:43 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-01-04 11:34:43 +0100 |
commit | 570e5e5bbb7a3eac41350db9433e28ed289b97f4 (patch) | |
tree | a7fc299ba180098be5a3bef28a39256870ce697b /src/model/s3/object_table.rs | |
parent | 6e44369cbc810b8912ca0f7f5fd293e87f10c851 (diff) | |
parent | 4eb8ca3a528dae2848141f5cc3eb607eb7d40114 (diff) | |
download | garage-570e5e5bbb7a3eac41350db9433e28ed289b97f4.tar.gz garage-570e5e5bbb7a3eac41350db9433e28ed289b97f4.zip |
Merge branch 'main' into next
Diffstat (limited to 'src/model/s3/object_table.rs')
-rw-r--r-- | src/model/s3/object_table.rs | 296 |
1 files changed, 137 insertions, 159 deletions
diff --git a/src/model/s3/object_table.rs b/src/model/s3/object_table.rs index 26ff57f6..518acc95 100644 --- a/src/model/s3/object_table.rs +++ b/src/model/s3/object_table.rs @@ -1,10 +1,8 @@ use serde::{Deserialize, Serialize}; -use std::collections::BTreeMap; use std::sync::Arc; use garage_db as db; -use garage_util::background::BackgroundRunner; use garage_util::data::*; use garage_table::crdt::*; @@ -14,25 +12,126 @@ use garage_table::*; use crate::index_counter::*; use crate::s3::version_table::*; -use crate::prev::v051::object_table as old; - pub const OBJECTS: &str = "objects"; pub const UNFINISHED_UPLOADS: &str = "unfinished_uploads"; pub const BYTES: &str = "bytes"; -/// An object -#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] -pub struct Object { - /// The bucket in which the object is stored, used as partition key - pub bucket_id: Uuid, +mod v05 { + use garage_util::data::{Hash, Uuid}; + use serde::{Deserialize, Serialize}; + use std::collections::BTreeMap; + + /// An object + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Object { + /// The bucket in which the object is stored, used as partition key + pub bucket: String, + + /// The key at which the object is stored in its bucket, used as sorting key + pub key: String, + + /// The list of currenty stored versions of the object + pub(super) versions: Vec<ObjectVersion>, + } + + /// Informations about a version of an object + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct ObjectVersion { + /// Id of the version + pub uuid: Uuid, + /// Timestamp of when the object was created + pub timestamp: u64, + /// State of the version + pub state: ObjectVersionState, + } + + /// State of an object version + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub enum ObjectVersionState { + /// The version is being received + Uploading(ObjectVersionHeaders), + /// The version is fully received + Complete(ObjectVersionData), + /// The version uploaded containded errors or the upload was explicitly aborted + Aborted, + } + + /// Data stored in object version + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] + pub enum ObjectVersionData { + /// The object was deleted, this Version is a tombstone to mark it as such + DeleteMarker, + /// The object is short, it's stored inlined + Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec<u8>), + /// The object is not short, Hash of first block is stored here, next segments hashes are + /// stored in the version table + FirstBlock(ObjectVersionMeta, Hash), + } + + /// Metadata about the object version + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] + pub struct ObjectVersionMeta { + /// Headers to send to the client + pub headers: ObjectVersionHeaders, + /// Size of the object + pub size: u64, + /// etag of the object + pub etag: String, + } - /// The key at which the object is stored in its bucket, used as sorting key - pub key: String, + /// Additional headers for an object + #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] + pub struct ObjectVersionHeaders { + /// Content type of the object + pub content_type: String, + /// Any other http headers to send + pub other: BTreeMap<String, String>, + } - /// The list of currenty stored versions of the object - versions: Vec<ObjectVersion>, + impl garage_util::migrate::InitialFormat for Object {} } +mod v08 { + use garage_util::data::Uuid; + use serde::{Deserialize, Serialize}; + + use super::v05; + + pub use v05::{ + ObjectVersion, ObjectVersionData, ObjectVersionHeaders, ObjectVersionMeta, + ObjectVersionState, + }; + + /// An object + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Object { + /// The bucket in which the object is stored, used as partition key + pub bucket_id: Uuid, + + /// The key at which the object is stored in its bucket, used as sorting key + pub key: String, + + /// The list of currenty stored versions of the object + pub(super) versions: Vec<ObjectVersion>, + } + + impl garage_util::migrate::Migrate for Object { + type Previous = v05::Object; + + fn migrate(old: v05::Object) -> Object { + use garage_util::data::blake2sum; + + Object { + bucket_id: blake2sum(old.bucket.as_bytes()), + key: old.key, + versions: old.versions, + } + } + } +} + +pub use v08::*; + impl Object { /// Initialize an Object struct from parts pub fn new(bucket_id: Uuid, key: String, versions: Vec<ObjectVersion>) -> Self { @@ -69,28 +168,6 @@ impl Object { } } -/// Informations about a version of an object -#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] -pub struct ObjectVersion { - /// Id of the version - pub uuid: Uuid, - /// Timestamp of when the object was created - pub timestamp: u64, - /// State of the version - pub state: ObjectVersionState, -} - -/// State of an object version -#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] -pub enum ObjectVersionState { - /// The version is being received - Uploading(ObjectVersionHeaders), - /// The version is fully received - Complete(ObjectVersionData), - /// The version uploaded containded errors or the upload was explicitly aborted - Aborted, -} - impl Crdt for ObjectVersionState { fn merge(&mut self, other: &Self) { use ObjectVersionState::*; @@ -112,42 +189,10 @@ impl Crdt for ObjectVersionState { } } -/// Data stored in object version -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] -pub enum ObjectVersionData { - /// The object was deleted, this Version is a tombstone to mark it as such - DeleteMarker, - /// The object is short, it's stored inlined - Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec<u8>), - /// The object is not short, Hash of first block is stored here, next segments hashes are - /// stored in the version table - FirstBlock(ObjectVersionMeta, Hash), -} - impl AutoCrdt for ObjectVersionData { const WARN_IF_DIFFERENT: bool = true; } -/// Metadata about the object version -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] -pub struct ObjectVersionMeta { - /// Headers to send to the client - pub headers: ObjectVersionHeaders, - /// Size of the object - pub size: u64, - /// etag of the object - pub etag: String, -} - -/// Additional headers for an object -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] -pub struct ObjectVersionHeaders { - /// Content type of the object - pub content_type: String, - /// Any other http headers to send - pub other: BTreeMap<String, String>, -} - impl ObjectVersion { fn cmp_key(&self) -> (u64, Uuid) { (self.timestamp, self.uuid) @@ -221,7 +266,6 @@ impl Crdt for Object { } pub struct ObjectTable { - pub background: Arc<BackgroundRunner>, pub version_table: Arc<Table<VersionTable, TableShardedReplication>>, pub object_counter_table: Arc<IndexCounter<Object>>, } @@ -255,34 +299,34 @@ impl TableSchema for ObjectTable { ); } - // 2. Spawn threads that propagates deletions to version table - let version_table = self.version_table.clone(); - let old = old.cloned(); - let new = new.cloned(); - - self.background.spawn(async move { - if let (Some(old_v), Some(new_v)) = (old, new) { - // Propagate deletion of old versions - for v in old_v.versions.iter() { - let newly_deleted = match new_v - .versions - .binary_search_by(|nv| nv.cmp_key().cmp(&v.cmp_key())) - { - Err(_) => true, - Ok(i) => { - new_v.versions[i].state == ObjectVersionState::Aborted - && v.state != ObjectVersionState::Aborted - } - }; - if newly_deleted { - let deleted_version = - Version::new(v.uuid, old_v.bucket_id, old_v.key.clone(), true); - version_table.insert(&deleted_version).await?; + // 2. Enqueue propagation deletions to version table + if let (Some(old_v), Some(new_v)) = (old, new) { + // Propagate deletion of old versions + for v in old_v.versions.iter() { + let newly_deleted = match new_v + .versions + .binary_search_by(|nv| nv.cmp_key().cmp(&v.cmp_key())) + { + Err(_) => true, + Ok(i) => { + new_v.versions[i].state == ObjectVersionState::Aborted + && v.state != ObjectVersionState::Aborted + } + }; + if newly_deleted { + let deleted_version = + Version::new(v.uuid, old_v.bucket_id, old_v.key.clone(), true); + let res = self.version_table.queue_insert(tx, &deleted_version); + if let Err(e) = db::unabort(res)? { + error!( + "Unable to enqueue version deletion propagation: {}. A repair will be needed.", + e + ); } } } - Ok(()) - }); + } + Ok(()) } @@ -292,11 +336,6 @@ impl TableSchema for ObjectTable { ObjectFilter::IsUploading => entry.versions.iter().any(|v| v.is_uploading()), } } - - fn try_migrate(bytes: &[u8]) -> Option<Self::E> { - let old_obj = rmp_serde::decode::from_read_ref::<_, old::Object>(bytes).ok()?; - Some(migrate_object(old_obj)) - } } impl CountedItem for Object { @@ -341,64 +380,3 @@ impl CountedItem for Object { ] } } - -// vvvvvvvv migration code, stupid stuff vvvvvvvvvvvv -// (we just want to change bucket into bucket_id by hashing it) - -fn migrate_object(o: old::Object) -> Object { - let versions = o - .versions() - .iter() - .cloned() - .map(migrate_object_version) - .collect(); - Object { - bucket_id: blake2sum(o.bucket.as_bytes()), - key: o.key, - versions, - } -} - -fn migrate_object_version(v: old::ObjectVersion) -> ObjectVersion { - ObjectVersion { - uuid: Uuid::try_from(v.uuid.as_slice()).unwrap(), - timestamp: v.timestamp, - state: match v.state { - old::ObjectVersionState::Uploading(h) => { - ObjectVersionState::Uploading(migrate_object_version_headers(h)) - } - old::ObjectVersionState::Complete(d) => { - ObjectVersionState::Complete(migrate_object_version_data(d)) - } - old::ObjectVersionState::Aborted => ObjectVersionState::Aborted, - }, - } -} - -fn migrate_object_version_headers(h: old::ObjectVersionHeaders) -> ObjectVersionHeaders { - ObjectVersionHeaders { - content_type: h.content_type, - other: h.other, - } -} - -fn migrate_object_version_data(d: old::ObjectVersionData) -> ObjectVersionData { - match d { - old::ObjectVersionData::DeleteMarker => ObjectVersionData::DeleteMarker, - old::ObjectVersionData::Inline(m, b) => { - ObjectVersionData::Inline(migrate_object_version_meta(m), b) - } - old::ObjectVersionData::FirstBlock(m, h) => ObjectVersionData::FirstBlock( - migrate_object_version_meta(m), - Hash::try_from(h.as_slice()).unwrap(), - ), - } -} - -fn migrate_object_version_meta(m: old::ObjectVersionMeta) -> ObjectVersionMeta { - ObjectVersionMeta { - headers: migrate_object_version_headers(m.headers), - size: m.size, - etag: m.etag, - } -} |