1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
|
use std::sync::Arc;
use garage_db as db;
use garage_util::data::*;
use garage_util::error::*;
use garage_table::crdt::*;
use garage_table::replication::TableShardedReplication;
use garage_table::*;
use crate::s3::block_ref_table::*;
mod v08 {
use garage_util::crdt;
use garage_util::data::{Hash, Uuid};
use serde::{Deserialize, Serialize};
/// A version of an object
#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
pub struct Version {
/// UUID of the version, used as partition key
pub uuid: Uuid,
// Actual data: the blocks for this version
// In the case of a multipart upload, also store the etags
// of individual parts and check them when doing CompleteMultipartUpload
/// Is this version deleted
pub deleted: crdt::Bool,
/// list of blocks of data composing the version
pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
/// Etag of each part in case of a multipart upload, empty otherwise
pub parts_etags: crdt::Map<u64, String>,
// Back link to bucket+key so that we can figure if
// this was deleted later on
/// Bucket in which the related object is stored
pub bucket_id: Uuid,
/// Key in which the related object is stored
pub key: String,
}
#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
pub struct VersionBlockKey {
/// Number of the part
pub part_number: u64,
/// Offset of this sub-segment in its part as sent by the client
/// (before any kind of compression or encryption)
pub offset: u64,
}
/// Informations about a single block
#[derive(PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Debug, Serialize, Deserialize)]
pub struct VersionBlock {
/// Blake2 sum of the block
pub hash: Hash,
/// Size of the block, before any kind of compression or encryption
pub size: u64,
}
impl garage_util::migrate::InitialFormat for Version {}
}
pub(crate) mod v09 {
use garage_util::crdt;
use garage_util::data::Uuid;
use serde::{Deserialize, Serialize};
use super::v08;
pub use v08::{VersionBlock, VersionBlockKey};
/// A version of an object
#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
pub struct Version {
/// UUID of the version, used as partition key
pub uuid: Uuid,
// Actual data: the blocks for this version
// In the case of a multipart upload, also store the etags
// of individual parts and check them when doing CompleteMultipartUpload
/// Is this version deleted
pub deleted: crdt::Bool,
/// list of blocks of data composing the version
pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
// Back link to owner of this version (either an object or a multipart
// upload), used to find whether it has been deleted and this version
// should in turn be deleted (see versions repair procedure)
pub backlink: VersionBacklink,
}
#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
pub enum VersionBacklink {
Object {
/// Bucket in which the related object is stored
bucket_id: Uuid,
/// Key in which the related object is stored
key: String,
},
MultipartUpload {
upload_id: Uuid,
},
}
impl garage_util::migrate::Migrate for Version {
const VERSION_MARKER: &'static [u8] = b"G09s3v";
type Previous = v08::Version;
fn migrate(old: v08::Version) -> Version {
Version {
uuid: old.uuid,
deleted: old.deleted,
blocks: old.blocks,
backlink: VersionBacklink::Object {
bucket_id: old.bucket_id,
key: old.key,
},
}
}
}
}
pub use v09::*;
impl Version {
pub fn new(uuid: Uuid, backlink: VersionBacklink, deleted: bool) -> Self {
Self {
uuid,
deleted: deleted.into(),
blocks: crdt::Map::new(),
backlink,
}
}
pub fn has_part_number(&self, part_number: u64) -> bool {
self.blocks
.items()
.binary_search_by(|(k, _)| k.part_number.cmp(&part_number))
.is_ok()
}
pub fn n_parts(&self) -> Result<u64, Error> {
Ok(self
.blocks
.items()
.last()
.ok_or_message("version has no parts")?
.0
.part_number)
}
}
impl Ord for VersionBlockKey {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.part_number
.cmp(&other.part_number)
.then(self.offset.cmp(&other.offset))
}
}
impl PartialOrd for VersionBlockKey {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl AutoCrdt for VersionBlock {
const WARN_IF_DIFFERENT: bool = true;
}
impl Entry<Uuid, EmptyKey> for Version {
fn partition_key(&self) -> &Uuid {
&self.uuid
}
fn sort_key(&self) -> &EmptyKey {
&EmptyKey
}
fn is_tombstone(&self) -> bool {
self.deleted.get()
}
}
impl Crdt for Version {
fn merge(&mut self, other: &Self) {
self.deleted.merge(&other.deleted);
if self.deleted.get() {
self.blocks.clear();
} else {
self.blocks.merge(&other.blocks);
}
}
}
pub struct VersionTable {
pub block_ref_table: Arc<Table<BlockRefTable, TableShardedReplication>>,
}
impl TableSchema for VersionTable {
const TABLE_NAME: &'static str = "version";
type P = Uuid;
type S = EmptyKey;
type E = Version;
type Filter = DeletedFilter;
fn updated(
&self,
tx: &mut db::Transaction,
old: Option<&Self::E>,
new: Option<&Self::E>,
) -> db::TxOpResult<()> {
if let (Some(old_v), Some(new_v)) = (old, new) {
// Propagate deletion of version blocks
if new_v.deleted.get() && !old_v.deleted.get() {
let deleted_block_refs = old_v.blocks.items().iter().map(|(_k, vb)| BlockRef {
block: vb.hash,
version: old_v.uuid,
deleted: true.into(),
});
for block_ref in deleted_block_refs {
let res = self.block_ref_table.queue_insert(tx, &block_ref);
if let Err(e) = db::unabort(res)? {
error!("Unable to enqueue block ref deletion propagation: {}. A repair will be needed.", e);
}
}
}
}
Ok(())
}
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
filter.apply(entry.deleted.get())
}
}
|