path: root/doc/talks/2024-01-12-seed/talk.tex
blob: e7b4e2c270b32242167167779b4b36f4158ae766 (plain) (tree)

% \usepackage[frenchb]{babel}



\setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$}

\setbeamercolor{normal text}{fg=verygrey}




\subtitle{a lightweight and robust geo-distributed data storage system}
\author{Alex Auvolat, Deuxfleurs}
\date{SEED webinar, 2024-01-12}


	%\frametitle{Who I am}
			\adjincludegraphics[width=.4\linewidth, valign=t]{assets/alex.jpg}
			\textbf{Alex Auvolat}\\
			Member of Deuxfleurs, lead developer of Garage

			\adjincludegraphics[width=.6\linewidth, valign=t]{../../logo/garage-notext.png}
			A self-hosted alternative to S3 for object storage

			\adjincludegraphics[width=.5\linewidth, valign=t]{assets/deuxfleurs.pdf}
			A non-profit self-hosting collective,\\
			member of the CHATONS network
			\adjincludegraphics[width=.7\linewidth, valign=t]{assets/logo_chatons.png}


	\frametitle{Stable vs Resilient}

		\textbf{Building a "stable" system:}

		Enterprise-grade systems typically employ:
			\item RAID
			\item Redundant power grid + UPS
			\item Redundant Internet connections
			\item Low-latency links
			\item ... 
		$\to$ costly, only worth at DC scale\\
		$\to$ still risk of DC-level incident...
		\textbf{Building a \underline{resilient} system:}

		An alternative, cheaper way:
			\item Commodity hardware \\(e.g. old desktop PCs)
			\item Commodity Internet \\(e.g. FTTB, FTTH) and power grid
			\item \textbf{Geographical redundancy} \\(multi-site replication)

	\frametitle{Example: our infrastructure at Deuxfleurs}

	\frametitle{Object storage: simpler than file systems}

		Only two operations:
			\item Put an object at a key
			\item Retrieve an object from its key
		{\footnotesize (and a few others)}

		Sufficient for many applications!


	\frametitle{The data model of object storage}
	Object storage is basically a key-value store:

			\textbf{Key: file path + name} & \textbf{Value: file data + metadata} \\
			\texttt{index.html} &
				\texttt{Content-Type: text/html; charset=utf-8} \newline
				\texttt{Content-Length: 24929} \newline
				\texttt{<binary blob>} \\ 
			\texttt{img/logo.svg} &
				\texttt{Content-Type: text/svg+xml} \newline
				\texttt{Content-Length: 13429} \newline
				\texttt{<binary blob>} \\ 
			\texttt{download/index.html} &
				\texttt{Content-Type: text/html; charset=utf-8} \newline
				\texttt{Content-Length: 26563} \newline
				\texttt{<binary blob>} \\ 

	\frametitle{Implementation: consensus vs weak consistency}

		\textbf{Consensus-based systems:}
			\item \textbf{Leader-based:} a leader is elected to coordinate
				all reads and writes
			\item Allows for \textbf{sequential reasoning}:
				program as if running on a single machine
			\item Serializability is one of the \\
				\textbf{strongest consistency guarantees}
			\item \textbf{Costly}, the leader is a bottleneck;
				leader elections on failure take time
	\begin{minipage}{7cm} \visible<2->{
		\textbf{Weakly consistent systems:}
			\item \textbf{Nodes are equivalent}, any node
				can originate a read or write operation
			\item \textbf{Operations must be independent},
				conflicts are resolved after the fact
			\item Strongest achievable consistency:\\
				\textbf{read-after-write consistency}\\(using quorums)
			\item \textbf{Fast}, no single bottleneck;\\
				works transparently with offline nodes
	} \end{minipage}

	\frametitle{Why avoid consensus?}
	Consensus can be implemented reasonably well in practice, so why avoid it?
		\item \textbf{Software complexity:} RAFT and PAXOS are complex beasts;\\
			harder to prove, harder to reason about
		\item \textbf{Performance issues:}
				\item Taking a decision may take an \textbf{arbitrary number of steps} (in adverse scenarios)
				\item The leader is a \textbf{bottleneck} for all requests;\\
					even in leaderless approaches, \textbf{all nodes must process all operations in order}
				\item Particularly \textbf{sensitive to higher latency} between nodes

	\frametitle{Objective: the right level of consistency for Garage}

	\underline{Constraints:} slow network (geographical distance), node unavailability/crashes\\
	\underline{Objective:} maximize availability, maintain an \emph{appropriate level of consistency}\\
		\item<2-> \textbf{Weak consistency for most things}\\
			\underline{Example:} \texttt{PutObject}\\
			If two clients write the same
			object at the same time, one of the two is implicitly overwritten.
			No need to coordinate, use a \emph{last-writer-wins register}.
		\item<3-> \textbf{Stronger consistency only when necessary}\\
			\underline{Example:} \texttt{CreateBucket}\\
			A bucket is a reserved name in a shared namespace,
			two clients should be prevented from both creating the same bucket
			(\emph{mutual exclusion}).

	\frametitle{The possibility of \emph{leaderless consensus}}
	Currently, Garage \emph{only has weak consistency}. Is fast, but \texttt{CreateBucket} is broken!

		Leaderless consensus (Antoniadis et al., 2023) alleviates issues with RAFT and PAXOS:
			\item \textbf{No leader.} All nodes participate equally at each time step,
				and different nodes can be unavailable at different times without issues.
				\\ \vspace{.5em} $\to$ better tolerance to the high latency (remove bottleneck issue)
				\\ $\to$ tolerates crash transparently
			\item \textbf{Simpler formalization.} The algorithm is very simple to express and to analyze in mathematical terms.
		One of the possible subjects for this PhD:
		\\$\to$ \emph{integration of leaderless consensus in Garage} + testing + perf eval, etc.

		\texttt{\#garage:deuxfleurs.fr} on Matrix



