aboutsummaryrefslogtreecommitdiff
path: root/src/model/s3
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-01-04 11:34:43 +0100
committerAlex Auvolat <alex@adnab.me>2023-01-04 11:34:43 +0100
commit570e5e5bbb7a3eac41350db9433e28ed289b97f4 (patch)
treea7fc299ba180098be5a3bef28a39256870ce697b /src/model/s3
parent6e44369cbc810b8912ca0f7f5fd293e87f10c851 (diff)
parent4eb8ca3a528dae2848141f5cc3eb607eb7d40114 (diff)
downloadgarage-570e5e5bbb7a3eac41350db9433e28ed289b97f4.tar.gz
garage-570e5e5bbb7a3eac41350db9433e28ed289b97f4.zip
Merge branch 'main' into next
Diffstat (limited to 'src/model/s3')
-rw-r--r--src/model/s3/block_ref_table.rs29
-rw-r--r--src/model/s3/object_table.rs296
-rw-r--r--src/model/s3/version_table.rs219
3 files changed, 271 insertions, 273 deletions
diff --git a/src/model/s3/block_ref_table.rs b/src/model/s3/block_ref_table.rs
index c7017409..7b023d87 100644
--- a/src/model/s3/block_ref_table.rs
+++ b/src/model/s3/block_ref_table.rs
@@ -1,4 +1,3 @@
-use serde::{Deserialize, Serialize};
use std::sync::Arc;
use garage_db as db;
@@ -10,19 +9,29 @@ use garage_table::*;
use garage_block::manager::*;
-#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
-pub struct BlockRef {
- /// Hash (blake2 sum) of the block, used as partition key
- pub block: Hash,
+mod v08 {
+ use garage_util::crdt;
+ use garage_util::data::{Hash, Uuid};
+ use serde::{Deserialize, Serialize};
- /// Id of the Version for the object containing this block, used as sorting key
- pub version: Uuid,
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct BlockRef {
+ /// Hash (blake2 sum) of the block, used as partition key
+ pub block: Hash,
- // Keep track of deleted status
- /// Is the Version that contains this block deleted
- pub deleted: crdt::Bool,
+ /// Id of the Version for the object containing this block, used as sorting key
+ pub version: Uuid,
+
+ // Keep track of deleted status
+ /// Is the Version that contains this block deleted
+ pub deleted: crdt::Bool,
+ }
+
+ impl garage_util::migrate::InitialFormat for BlockRef {}
}
+pub use v08::*;
+
impl Entry<Hash, Uuid> for BlockRef {
fn partition_key(&self) -> &Hash {
&self.block
diff --git a/src/model/s3/object_table.rs b/src/model/s3/object_table.rs
index 26ff57f6..518acc95 100644
--- a/src/model/s3/object_table.rs
+++ b/src/model/s3/object_table.rs
@@ -1,10 +1,8 @@
use serde::{Deserialize, Serialize};
-use std::collections::BTreeMap;
use std::sync::Arc;
use garage_db as db;
-use garage_util::background::BackgroundRunner;
use garage_util::data::*;
use garage_table::crdt::*;
@@ -14,25 +12,126 @@ use garage_table::*;
use crate::index_counter::*;
use crate::s3::version_table::*;
-use crate::prev::v051::object_table as old;
-
pub const OBJECTS: &str = "objects";
pub const UNFINISHED_UPLOADS: &str = "unfinished_uploads";
pub const BYTES: &str = "bytes";
-/// An object
-#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
-pub struct Object {
- /// The bucket in which the object is stored, used as partition key
- pub bucket_id: Uuid,
+mod v05 {
+ use garage_util::data::{Hash, Uuid};
+ use serde::{Deserialize, Serialize};
+ use std::collections::BTreeMap;
+
+ /// An object
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct Object {
+ /// The bucket in which the object is stored, used as partition key
+ pub bucket: String,
+
+ /// The key at which the object is stored in its bucket, used as sorting key
+ pub key: String,
+
+ /// The list of currenty stored versions of the object
+ pub(super) versions: Vec<ObjectVersion>,
+ }
+
+ /// Informations about a version of an object
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct ObjectVersion {
+ /// Id of the version
+ pub uuid: Uuid,
+ /// Timestamp of when the object was created
+ pub timestamp: u64,
+ /// State of the version
+ pub state: ObjectVersionState,
+ }
+
+ /// State of an object version
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub enum ObjectVersionState {
+ /// The version is being received
+ Uploading(ObjectVersionHeaders),
+ /// The version is fully received
+ Complete(ObjectVersionData),
+ /// The version uploaded containded errors or the upload was explicitly aborted
+ Aborted,
+ }
+
+ /// Data stored in object version
+ #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
+ pub enum ObjectVersionData {
+ /// The object was deleted, this Version is a tombstone to mark it as such
+ DeleteMarker,
+ /// The object is short, it's stored inlined
+ Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec<u8>),
+ /// The object is not short, Hash of first block is stored here, next segments hashes are
+ /// stored in the version table
+ FirstBlock(ObjectVersionMeta, Hash),
+ }
+
+ /// Metadata about the object version
+ #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
+ pub struct ObjectVersionMeta {
+ /// Headers to send to the client
+ pub headers: ObjectVersionHeaders,
+ /// Size of the object
+ pub size: u64,
+ /// etag of the object
+ pub etag: String,
+ }
- /// The key at which the object is stored in its bucket, used as sorting key
- pub key: String,
+ /// Additional headers for an object
+ #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
+ pub struct ObjectVersionHeaders {
+ /// Content type of the object
+ pub content_type: String,
+ /// Any other http headers to send
+ pub other: BTreeMap<String, String>,
+ }
- /// The list of currenty stored versions of the object
- versions: Vec<ObjectVersion>,
+ impl garage_util::migrate::InitialFormat for Object {}
}
+mod v08 {
+ use garage_util::data::Uuid;
+ use serde::{Deserialize, Serialize};
+
+ use super::v05;
+
+ pub use v05::{
+ ObjectVersion, ObjectVersionData, ObjectVersionHeaders, ObjectVersionMeta,
+ ObjectVersionState,
+ };
+
+ /// An object
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct Object {
+ /// The bucket in which the object is stored, used as partition key
+ pub bucket_id: Uuid,
+
+ /// The key at which the object is stored in its bucket, used as sorting key
+ pub key: String,
+
+ /// The list of currenty stored versions of the object
+ pub(super) versions: Vec<ObjectVersion>,
+ }
+
+ impl garage_util::migrate::Migrate for Object {
+ type Previous = v05::Object;
+
+ fn migrate(old: v05::Object) -> Object {
+ use garage_util::data::blake2sum;
+
+ Object {
+ bucket_id: blake2sum(old.bucket.as_bytes()),
+ key: old.key,
+ versions: old.versions,
+ }
+ }
+ }
+}
+
+pub use v08::*;
+
impl Object {
/// Initialize an Object struct from parts
pub fn new(bucket_id: Uuid, key: String, versions: Vec<ObjectVersion>) -> Self {
@@ -69,28 +168,6 @@ impl Object {
}
}
-/// Informations about a version of an object
-#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
-pub struct ObjectVersion {
- /// Id of the version
- pub uuid: Uuid,
- /// Timestamp of when the object was created
- pub timestamp: u64,
- /// State of the version
- pub state: ObjectVersionState,
-}
-
-/// State of an object version
-#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
-pub enum ObjectVersionState {
- /// The version is being received
- Uploading(ObjectVersionHeaders),
- /// The version is fully received
- Complete(ObjectVersionData),
- /// The version uploaded containded errors or the upload was explicitly aborted
- Aborted,
-}
-
impl Crdt for ObjectVersionState {
fn merge(&mut self, other: &Self) {
use ObjectVersionState::*;
@@ -112,42 +189,10 @@ impl Crdt for ObjectVersionState {
}
}
-/// Data stored in object version
-#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
-pub enum ObjectVersionData {
- /// The object was deleted, this Version is a tombstone to mark it as such
- DeleteMarker,
- /// The object is short, it's stored inlined
- Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec<u8>),
- /// The object is not short, Hash of first block is stored here, next segments hashes are
- /// stored in the version table
- FirstBlock(ObjectVersionMeta, Hash),
-}
-
impl AutoCrdt for ObjectVersionData {
const WARN_IF_DIFFERENT: bool = true;
}
-/// Metadata about the object version
-#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
-pub struct ObjectVersionMeta {
- /// Headers to send to the client
- pub headers: ObjectVersionHeaders,
- /// Size of the object
- pub size: u64,
- /// etag of the object
- pub etag: String,
-}
-
-/// Additional headers for an object
-#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
-pub struct ObjectVersionHeaders {
- /// Content type of the object
- pub content_type: String,
- /// Any other http headers to send
- pub other: BTreeMap<String, String>,
-}
-
impl ObjectVersion {
fn cmp_key(&self) -> (u64, Uuid) {
(self.timestamp, self.uuid)
@@ -221,7 +266,6 @@ impl Crdt for Object {
}
pub struct ObjectTable {
- pub background: Arc<BackgroundRunner>,
pub version_table: Arc<Table<VersionTable, TableShardedReplication>>,
pub object_counter_table: Arc<IndexCounter<Object>>,
}
@@ -255,34 +299,34 @@ impl TableSchema for ObjectTable {
);
}
- // 2. Spawn threads that propagates deletions to version table
- let version_table = self.version_table.clone();
- let old = old.cloned();
- let new = new.cloned();
-
- self.background.spawn(async move {
- if let (Some(old_v), Some(new_v)) = (old, new) {
- // Propagate deletion of old versions
- for v in old_v.versions.iter() {
- let newly_deleted = match new_v
- .versions
- .binary_search_by(|nv| nv.cmp_key().cmp(&v.cmp_key()))
- {
- Err(_) => true,
- Ok(i) => {
- new_v.versions[i].state == ObjectVersionState::Aborted
- && v.state != ObjectVersionState::Aborted
- }
- };
- if newly_deleted {
- let deleted_version =
- Version::new(v.uuid, old_v.bucket_id, old_v.key.clone(), true);
- version_table.insert(&deleted_version).await?;
+ // 2. Enqueue propagation deletions to version table
+ if let (Some(old_v), Some(new_v)) = (old, new) {
+ // Propagate deletion of old versions
+ for v in old_v.versions.iter() {
+ let newly_deleted = match new_v
+ .versions
+ .binary_search_by(|nv| nv.cmp_key().cmp(&v.cmp_key()))
+ {
+ Err(_) => true,
+ Ok(i) => {
+ new_v.versions[i].state == ObjectVersionState::Aborted
+ && v.state != ObjectVersionState::Aborted
+ }
+ };
+ if newly_deleted {
+ let deleted_version =
+ Version::new(v.uuid, old_v.bucket_id, old_v.key.clone(), true);
+ let res = self.version_table.queue_insert(tx, &deleted_version);
+ if let Err(e) = db::unabort(res)? {
+ error!(
+ "Unable to enqueue version deletion propagation: {}. A repair will be needed.",
+ e
+ );
}
}
}
- Ok(())
- });
+ }
+
Ok(())
}
@@ -292,11 +336,6 @@ impl TableSchema for ObjectTable {
ObjectFilter::IsUploading => entry.versions.iter().any(|v| v.is_uploading()),
}
}
-
- fn try_migrate(bytes: &[u8]) -> Option<Self::E> {
- let old_obj = rmp_serde::decode::from_read_ref::<_, old::Object>(bytes).ok()?;
- Some(migrate_object(old_obj))
- }
}
impl CountedItem for Object {
@@ -341,64 +380,3 @@ impl CountedItem for Object {
]
}
}
-
-// vvvvvvvv migration code, stupid stuff vvvvvvvvvvvv
-// (we just want to change bucket into bucket_id by hashing it)
-
-fn migrate_object(o: old::Object) -> Object {
- let versions = o
- .versions()
- .iter()
- .cloned()
- .map(migrate_object_version)
- .collect();
- Object {
- bucket_id: blake2sum(o.bucket.as_bytes()),
- key: o.key,
- versions,
- }
-}
-
-fn migrate_object_version(v: old::ObjectVersion) -> ObjectVersion {
- ObjectVersion {
- uuid: Uuid::try_from(v.uuid.as_slice()).unwrap(),
- timestamp: v.timestamp,
- state: match v.state {
- old::ObjectVersionState::Uploading(h) => {
- ObjectVersionState::Uploading(migrate_object_version_headers(h))
- }
- old::ObjectVersionState::Complete(d) => {
- ObjectVersionState::Complete(migrate_object_version_data(d))
- }
- old::ObjectVersionState::Aborted => ObjectVersionState::Aborted,
- },
- }
-}
-
-fn migrate_object_version_headers(h: old::ObjectVersionHeaders) -> ObjectVersionHeaders {
- ObjectVersionHeaders {
- content_type: h.content_type,
- other: h.other,
- }
-}
-
-fn migrate_object_version_data(d: old::ObjectVersionData) -> ObjectVersionData {
- match d {
- old::ObjectVersionData::DeleteMarker => ObjectVersionData::DeleteMarker,
- old::ObjectVersionData::Inline(m, b) => {
- ObjectVersionData::Inline(migrate_object_version_meta(m), b)
- }
- old::ObjectVersionData::FirstBlock(m, h) => ObjectVersionData::FirstBlock(
- migrate_object_version_meta(m),
- Hash::try_from(h.as_slice()).unwrap(),
- ),
- }
-}
-
-fn migrate_object_version_meta(m: old::ObjectVersionMeta) -> ObjectVersionMeta {
- ObjectVersionMeta {
- headers: migrate_object_version_headers(m.headers),
- size: m.size,
- etag: m.etag,
- }
-}
diff --git a/src/model/s3/version_table.rs b/src/model/s3/version_table.rs
index 6bc2ecd1..6edc83f4 100644
--- a/src/model/s3/version_table.rs
+++ b/src/model/s3/version_table.rs
@@ -1,9 +1,7 @@
-use serde::{Deserialize, Serialize};
use std::sync::Arc;
use garage_db as db;
-use garage_util::background::BackgroundRunner;
use garage_util::data::*;
use garage_table::crdt::*;
@@ -12,32 +10,108 @@ use garage_table::*;
use crate::s3::block_ref_table::*;
-use crate::prev::v051::version_table as old;
-
-/// A version of an object
-#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
-pub struct Version {
- /// UUID of the version, used as partition key
- pub uuid: Uuid,
-
- // Actual data: the blocks for this version
- // In the case of a multipart upload, also store the etags
- // of individual parts and check them when doing CompleteMultipartUpload
- /// Is this version deleted
- pub deleted: crdt::Bool,
- /// list of blocks of data composing the version
- pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
- /// Etag of each part in case of a multipart upload, empty otherwise
- pub parts_etags: crdt::Map<u64, String>,
-
- // Back link to bucket+key so that we can figure if
- // this was deleted later on
- /// Bucket in which the related object is stored
- pub bucket_id: Uuid,
- /// Key in which the related object is stored
- pub key: String,
+mod v05 {
+ use garage_util::crdt;
+ use garage_util::data::{Hash, Uuid};
+ use serde::{Deserialize, Serialize};
+
+ /// A version of an object
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct Version {
+ /// UUID of the version, used as partition key
+ pub uuid: Uuid,
+
+ // Actual data: the blocks for this version
+ // In the case of a multipart upload, also store the etags
+ // of individual parts and check them when doing CompleteMultipartUpload
+ /// Is this version deleted
+ pub deleted: crdt::Bool,
+ /// list of blocks of data composing the version
+ pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
+ /// Etag of each part in case of a multipart upload, empty otherwise
+ pub parts_etags: crdt::Map<u64, String>,
+
+ // Back link to bucket+key so that we can figure if
+ // this was deleted later on
+ /// Bucket in which the related object is stored
+ pub bucket: String,
+ /// Key in which the related object is stored
+ pub key: String,
+ }
+
+ #[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
+ pub struct VersionBlockKey {
+ /// Number of the part
+ pub part_number: u64,
+ /// Offset of this sub-segment in its part
+ pub offset: u64,
+ }
+
+ /// Informations about a single block
+ #[derive(PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Debug, Serialize, Deserialize)]
+ pub struct VersionBlock {
+ /// Blake2 sum of the block
+ pub hash: Hash,
+ /// Size of the block
+ pub size: u64,
+ }
+
+ impl garage_util::migrate::InitialFormat for Version {}
}
+mod v08 {
+ use garage_util::crdt;
+ use garage_util::data::Uuid;
+ use serde::{Deserialize, Serialize};
+
+ use super::v05;
+
+ /// A version of an object
+ #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
+ pub struct Version {
+ /// UUID of the version, used as partition key
+ pub uuid: Uuid,
+
+ // Actual data: the blocks for this version
+ // In the case of a multipart upload, also store the etags
+ // of individual parts and check them when doing CompleteMultipartUpload
+ /// Is this version deleted
+ pub deleted: crdt::Bool,
+ /// list of blocks of data composing the version
+ pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
+ /// Etag of each part in case of a multipart upload, empty otherwise
+ pub parts_etags: crdt::Map<u64, String>,
+
+ // Back link to bucket+key so that we can figure if
+ // this was deleted later on
+ /// Bucket in which the related object is stored
+ pub bucket_id: Uuid,
+ /// Key in which the related object is stored
+ pub key: String,
+ }
+
+ pub use v05::{VersionBlock, VersionBlockKey};
+
+ impl garage_util::migrate::Migrate for Version {
+ type Previous = v05::Version;
+
+ fn migrate(old: v05::Version) -> Version {
+ use garage_util::data::blake2sum;
+
+ Version {
+ uuid: old.uuid,
+ deleted: old.deleted,
+ blocks: old.blocks,
+ parts_etags: old.parts_etags,
+ bucket_id: blake2sum(old.bucket.as_bytes()),
+ key: old.key,
+ }
+ }
+ }
+}
+
+pub use v08::*;
+
impl Version {
pub fn new(uuid: Uuid, bucket_id: Uuid, key: String, deleted: bool) -> Self {
Self {
@@ -65,14 +139,6 @@ impl Version {
}
}
-#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
-pub struct VersionBlockKey {
- /// Number of the part
- pub part_number: u64,
- /// Offset of this sub-segment in its part
- pub offset: u64,
-}
-
impl Ord for VersionBlockKey {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.part_number
@@ -87,15 +153,6 @@ impl PartialOrd for VersionBlockKey {
}
}
-/// Informations about a single block
-#[derive(PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Debug, Serialize, Deserialize)]
-pub struct VersionBlock {
- /// Blake2 sum of the block
- pub hash: Hash,
- /// Size of the block
- pub size: u64,
-}
-
impl AutoCrdt for VersionBlock {
const WARN_IF_DIFFERENT: bool = true;
}
@@ -127,7 +184,6 @@ impl Crdt for Version {
}
pub struct VersionTable {
- pub background: Arc<BackgroundRunner>,
pub block_ref_table: Arc<Table<BlockRefTable, TableShardedReplication>>,
}
@@ -141,33 +197,26 @@ impl TableSchema for VersionTable {
fn updated(
&self,
- _tx: &mut db::Transaction,
+ tx: &mut db::Transaction,
old: Option<&Self::E>,
new: Option<&Self::E>,
) -> db::TxOpResult<()> {
- let block_ref_table = self.block_ref_table.clone();
- let old = old.cloned();
- let new = new.cloned();
-
- self.background.spawn(async move {
- if let (Some(old_v), Some(new_v)) = (old, new) {
- // Propagate deletion of version blocks
- if new_v.deleted.get() && !old_v.deleted.get() {
- let deleted_block_refs = old_v
- .blocks
- .items()
- .iter()
- .map(|(_k, vb)| BlockRef {
- block: vb.hash,
- version: old_v.uuid,
- deleted: true.into(),
- })
- .collect::<Vec<_>>();
- block_ref_table.insert_many(&deleted_block_refs[..]).await?;
+ if let (Some(old_v), Some(new_v)) = (old, new) {
+ // Propagate deletion of version blocks
+ if new_v.deleted.get() && !old_v.deleted.get() {
+ let deleted_block_refs = old_v.blocks.items().iter().map(|(_k, vb)| BlockRef {
+ block: vb.hash,
+ version: old_v.uuid,
+ deleted: true.into(),
+ });
+ for block_ref in deleted_block_refs {
+ let res = self.block_ref_table.queue_insert(tx, &block_ref);
+ if let Err(e) = db::unabort(res)? {
+ error!("Unable to enqueue block ref deletion propagation: {}. A repair will be needed.", e);
+ }
}
}
- Ok(())
- });
+ }
Ok(())
}
@@ -175,42 +224,4 @@ impl TableSchema for VersionTable {
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
filter.apply(entry.deleted.get())
}
-
- fn try_migrate(bytes: &[u8]) -> Option<Self::E> {
- let old = rmp_serde::decode::from_read_ref::<_, old::Version>(bytes).ok()?;
-
- let blocks = old
- .blocks
- .items()
- .iter()
- .map(|(k, v)| {
- (
- VersionBlockKey {
- part_number: k.part_number,
- offset: k.offset,
- },
- VersionBlock {
- hash: Hash::try_from(v.hash.as_slice()).unwrap(),
- size: v.size,
- },
- )
- })
- .collect::<crdt::Map<_, _>>();
-
- let parts_etags = old
- .parts_etags
- .items()
- .iter()
- .map(|(k, v)| (*k, v.clone()))
- .collect::<crdt::Map<_, _>>();
-
- Some(Version {
- uuid: Hash::try_from(old.uuid.as_slice()).unwrap(),
- deleted: crdt::Bool::new(old.deleted.get()),
- blocks,
- parts_etags,
- bucket_id: blake2sum(old.bucket.as_bytes()),
- key: old.key,
- })
- }
}