aboutsummaryrefslogtreecommitdiff
path: root/src/model/s3/version_table.rs
blob: b4662a557d354380093b15cc4846ba55911af30f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
use std::sync::Arc;

use garage_db as db;

use garage_util::data::*;
use garage_util::error::*;

use garage_table::crdt::*;
use garage_table::replication::TableShardedReplication;
use garage_table::*;

use crate::s3::block_ref_table::*;

mod v05 {
	use garage_util::crdt;
	use garage_util::data::{Hash, Uuid};
	use serde::{Deserialize, Serialize};

	/// A version of an object
	#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
	pub struct Version {
		/// UUID of the version, used as partition key
		pub uuid: Uuid,

		// Actual data: the blocks for this version
		// In the case of a multipart upload, also store the etags
		// of individual parts and check them when doing CompleteMultipartUpload
		/// Is this version deleted
		pub deleted: crdt::Bool,
		/// list of blocks of data composing the version
		pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
		/// Etag of each part in case of a multipart upload, empty otherwise
		pub parts_etags: crdt::Map<u64, String>,

		// Back link to bucket+key so that we can figure if
		// this was deleted later on
		/// Bucket in which the related object is stored
		pub bucket: String,
		/// Key in which the related object is stored
		pub key: String,
	}

	#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
	pub struct VersionBlockKey {
		/// Number of the part
		pub part_number: u64,
		/// Offset of this sub-segment in its part as sent by the client
		/// (before any kind of compression or encryption)
		pub offset: u64,
	}

	/// Informations about a single block
	#[derive(PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Debug, Serialize, Deserialize)]
	pub struct VersionBlock {
		/// Blake2 sum of the block
		pub hash: Hash,
		/// Size of the block, before any kind of compression or encryption
		pub size: u64,
	}

	impl garage_util::migrate::InitialFormat for Version {}
}

mod v08 {
	use garage_util::crdt;
	use garage_util::data::Uuid;
	use serde::{Deserialize, Serialize};

	use super::v05;

	pub use v05::{VersionBlock, VersionBlockKey};

	/// A version of an object
	#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
	pub struct Version {
		/// UUID of the version, used as partition key
		pub uuid: Uuid,

		// Actual data: the blocks for this version
		// In the case of a multipart upload, also store the etags
		// of individual parts and check them when doing CompleteMultipartUpload
		/// Is this version deleted
		pub deleted: crdt::Bool,
		/// list of blocks of data composing the version
		pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
		/// Etag of each part in case of a multipart upload, empty otherwise
		pub parts_etags: crdt::Map<u64, String>,

		// Back link to bucket+key so that we can figure if
		// this was deleted later on
		/// Bucket in which the related object is stored
		pub bucket_id: Uuid,
		/// Key in which the related object is stored
		pub key: String,
	}

	impl garage_util::migrate::Migrate for Version {
		type Previous = v05::Version;

		fn migrate(old: v05::Version) -> Version {
			use garage_util::data::blake2sum;

			Version {
				uuid: old.uuid,
				deleted: old.deleted,
				blocks: old.blocks,
				parts_etags: old.parts_etags,
				bucket_id: blake2sum(old.bucket.as_bytes()),
				key: old.key,
			}
		}
	}
}

pub(crate) mod v09 {
	use garage_util::crdt;
	use garage_util::data::Uuid;
	use serde::{Deserialize, Serialize};

	use super::v08;

	pub use v08::{VersionBlock, VersionBlockKey};

	/// A version of an object
	#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
	pub struct Version {
		/// UUID of the version, used as partition key
		pub uuid: Uuid,

		// Actual data: the blocks for this version
		// In the case of a multipart upload, also store the etags
		// of individual parts and check them when doing CompleteMultipartUpload
		/// Is this version deleted
		pub deleted: crdt::Bool,
		/// list of blocks of data composing the version
		pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,

		// Back link to owner of this version (either an object or a multipart
		// upload), used to find whether it has been deleted and this version
		// should in turn be deleted (see versions repair procedure)
		pub backlink: VersionBacklink,
	}

	#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
	pub enum VersionBacklink {
		Object {
			/// Bucket in which the related object is stored
			bucket_id: Uuid,
			/// Key in which the related object is stored
			key: String,
		},
		MultipartUpload {
			upload_id: Uuid,
		},
	}

	impl garage_util::migrate::Migrate for Version {
		const VERSION_MARKER: &'static [u8] = b"G09s3v";

		type Previous = v08::Version;

		fn migrate(old: v08::Version) -> Version {
			Version {
				uuid: old.uuid,
				deleted: old.deleted,
				blocks: old.blocks,
				backlink: VersionBacklink::Object {
					bucket_id: old.bucket_id,
					key: old.key,
				},
			}
		}
	}
}

pub use v09::*;

impl Version {
	pub fn new(uuid: Uuid, backlink: VersionBacklink, deleted: bool) -> Self {
		Self {
			uuid,
			deleted: deleted.into(),
			blocks: crdt::Map::new(),
			backlink,
		}
	}

	pub fn has_part_number(&self, part_number: u64) -> bool {
		self.blocks
			.items()
			.binary_search_by(|(k, _)| k.part_number.cmp(&part_number))
			.is_ok()
	}

	pub fn n_parts(&self) -> Result<u64, Error> {
		Ok(self
			.blocks
			.items()
			.last()
			.ok_or_message("version has no parts")?
			.0
			.part_number)
	}
}

impl Ord for VersionBlockKey {
	fn cmp(&self, other: &Self) -> std::cmp::Ordering {
		self.part_number
			.cmp(&other.part_number)
			.then(self.offset.cmp(&other.offset))
	}
}

impl PartialOrd for VersionBlockKey {
	fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
		Some(self.cmp(other))
	}
}

impl AutoCrdt for VersionBlock {
	const WARN_IF_DIFFERENT: bool = true;
}

impl Entry<Uuid, EmptyKey> for Version {
	fn partition_key(&self) -> &Uuid {
		&self.uuid
	}
	fn sort_key(&self) -> &EmptyKey {
		&EmptyKey
	}
	fn is_tombstone(&self) -> bool {
		self.deleted.get()
	}
}

impl Crdt for Version {
	fn merge(&mut self, other: &Self) {
		self.deleted.merge(&other.deleted);

		if self.deleted.get() {
			self.blocks.clear();
		} else {
			self.blocks.merge(&other.blocks);
		}
	}
}

pub struct VersionTable {
	pub block_ref_table: Arc<Table<BlockRefTable, TableShardedReplication>>,
}

impl TableSchema for VersionTable {
	const TABLE_NAME: &'static str = "version";

	type P = Uuid;
	type S = EmptyKey;
	type E = Version;
	type Filter = DeletedFilter;

	fn updated(
		&self,
		tx: &mut db::Transaction,
		old: Option<&Self::E>,
		new: Option<&Self::E>,
	) -> db::TxOpResult<()> {
		if let (Some(old_v), Some(new_v)) = (old, new) {
			// Propagate deletion of version blocks
			if new_v.deleted.get() && !old_v.deleted.get() {
				let deleted_block_refs = old_v.blocks.items().iter().map(|(_k, vb)| BlockRef {
					block: vb.hash,
					version: old_v.uuid,
					deleted: true.into(),
				});
				for block_ref in deleted_block_refs {
					let res = self.block_ref_table.queue_insert(tx, &block_ref);
					if let Err(e) = db::unabort(res)? {
						error!("Unable to enqueue block ref deletion propagation: {}. A repair will be needed.", e);
					}
				}
			}
		}

		Ok(())
	}

	fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
		filter.apply(entry.deleted.get())
	}
}