aboutsummaryrefslogtreecommitdiff
path: root/src/model/s3/version_table.rs
blob: 0cfaa954723410206e8f0577ccbcf32daf5173bd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
use serde::{Deserialize, Serialize};
use std::sync::Arc;

use garage_db as db;

use garage_util::background::BackgroundRunner;
use garage_util::data::*;

use garage_table::crdt::*;
use garage_table::replication::TableShardedReplication;
use garage_table::*;

use crate::s3::block_ref_table::*;

use crate::prev::v051::version_table as old;

/// A version of an object
#[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)]
pub struct Version {
	/// UUID of the version, used as partition key
	pub uuid: Uuid,

	// Actual data: the blocks for this version
	// In the case of a multipart upload, also store the etags
	// of individual parts and check them when doing CompleteMultipartUpload
	/// Is this version deleted
	pub deleted: crdt::Bool,
	/// list of blocks of data composing the version
	pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
	/// Etag of each part in case of a multipart upload, empty otherwise
	pub parts_etags: crdt::Map<u64, String>,

	// Back link to bucket+key so that we can figure if
	// this was deleted later on
	/// Bucket in which the related object is stored
	pub bucket_id: Uuid,
	/// Key in which the related object is stored
	pub key: String,
}

impl Version {
	pub fn new(uuid: Uuid, bucket_id: Uuid, key: String, deleted: bool) -> Self {
		Self {
			uuid,
			deleted: deleted.into(),
			blocks: crdt::Map::new(),
			parts_etags: crdt::Map::new(),
			bucket_id,
			key,
		}
	}

	pub fn has_part_number(&self, part_number: u64) -> bool {
		let case1 = self
			.parts_etags
			.items()
			.binary_search_by(|(k, _)| k.cmp(&part_number))
			.is_ok();
		let case2 = self
			.blocks
			.items()
			.binary_search_by(|(k, _)| k.part_number.cmp(&part_number))
			.is_ok();
		case1 || case2
	}
}

#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
pub struct VersionBlockKey {
	/// Number of the part
	pub part_number: u64,
	/// Offset of this sub-segment in its part
	pub offset: u64,
}

impl Ord for VersionBlockKey {
	fn cmp(&self, other: &Self) -> std::cmp::Ordering {
		self.part_number
			.cmp(&other.part_number)
			.then(self.offset.cmp(&other.offset))
	}
}

impl PartialOrd for VersionBlockKey {
	fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
		Some(self.cmp(other))
	}
}

/// Informations about a single block
#[derive(PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Debug, Serialize, Deserialize)]
pub struct VersionBlock {
	/// Blake2 sum of the block
	pub hash: Hash,
	/// Size of the block
	pub size: u64,
}

impl AutoCrdt for VersionBlock {
	const WARN_IF_DIFFERENT: bool = true;
}

impl Entry<Uuid, EmptyKey> for Version {
	fn partition_key(&self) -> &Uuid {
		&self.uuid
	}
	fn sort_key(&self) -> &EmptyKey {
		&EmptyKey
	}
	fn is_tombstone(&self) -> bool {
		self.deleted.get()
	}
}

impl Crdt for Version {
	fn merge(&mut self, other: &Self) {
		self.deleted.merge(&other.deleted);

		if self.deleted.get() {
			self.blocks.clear();
			self.parts_etags.clear();
		} else {
			self.blocks.merge(&other.blocks);
			self.parts_etags.merge(&other.parts_etags);
		}
	}
}

pub struct VersionTable {
	pub background: Arc<BackgroundRunner>,
	pub block_ref_table: Arc<Table<BlockRefTable, TableShardedReplication>>,
}

impl TableSchema for VersionTable {
	const TABLE_NAME: &'static str = "version";

	type P = Uuid;
	type S = EmptyKey;
	type E = Version;
	type Filter = DeletedFilter;

	fn updated(
		&self,
		tx: &mut db::Transaction,
		old: Option<&Self::E>,
		new: Option<&Self::E>,
	) -> db::TxOpResult<()> {
		if let (Some(old_v), Some(new_v)) = (old, new) {
			// Propagate deletion of version blocks
			if new_v.deleted.get() && !old_v.deleted.get() {
				let deleted_block_refs = old_v.blocks.items().iter().map(|(_k, vb)| BlockRef {
					block: vb.hash,
					version: old_v.uuid,
					deleted: true.into(),
				});
				for block_ref in deleted_block_refs {
					let res = self.block_ref_table.queue_insert(tx, &block_ref);
					if let Err(e) = db::unabort(res)? {
						error!("Unable to enqueue block ref deletion propagation: {}. A repair will be needed.", e);
					}
				}
			}
		}

		Ok(())
	}

	fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
		filter.apply(entry.deleted.get())
	}

	fn try_migrate(bytes: &[u8]) -> Option<Self::E> {
		let old = rmp_serde::decode::from_read_ref::<_, old::Version>(bytes).ok()?;

		let blocks = old
			.blocks
			.items()
			.iter()
			.map(|(k, v)| {
				(
					VersionBlockKey {
						part_number: k.part_number,
						offset: k.offset,
					},
					VersionBlock {
						hash: Hash::try_from(v.hash.as_slice()).unwrap(),
						size: v.size,
					},
				)
			})
			.collect::<crdt::Map<_, _>>();

		let parts_etags = old
			.parts_etags
			.items()
			.iter()
			.map(|(k, v)| (*k, v.clone()))
			.collect::<crdt::Map<_, _>>();

		Some(Version {
			uuid: Hash::try_from(old.uuid.as_slice()).unwrap(),
			deleted: crdt::Bool::new(old.deleted.get()),
			blocks,
			parts_etags,
			bucket_id: blake2sum(old.bucket.as_bytes()),
			key: old.key,
		})
	}
}