\nonstopmode \documentclass[aspectratio=169]{beamer} \usepackage[utf8]{inputenc} % \usepackage[frenchb]{babel} \usepackage{amsmath} \usepackage{mathtools} \usepackage{breqn} \usepackage{multirow} \usetheme{boxes} \usepackage{graphicx} \usepackage{import} \usepackage{adjustbox} %\useoutertheme[footline=authortitle,subsection=false]{miniframes} %\useoutertheme[footline=authorinstitute,subsection=false]{miniframes} \useoutertheme{infolines} \setbeamertemplate{headline}{} \beamertemplatenavigationsymbolsempty \definecolor{TitleOrange}{RGB}{255,137,0} \setbeamercolor{title}{fg=TitleOrange} \setbeamercolor{frametitle}{fg=TitleOrange} \definecolor{ListOrange}{RGB}{255,145,5} \setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$} \definecolor{verygrey}{RGB}{70,70,70} \setbeamercolor{normal text}{fg=verygrey} \usepackage{tabu} \usepackage{multicol} \usepackage{vwcol} \usepackage{stmaryrd} \usepackage{graphicx} \usepackage[normalem]{ulem} \AtBeginSection[]{ \begin{frame} \vfill \centering \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title} \usebeamerfont{title}\insertsectionhead\par% \end{beamercolorbox} \vfill \end{frame} } \title{Garage} \subtitle{a lightweight and robust geo-distributed data storage system} \author{Alex Auvolat, Deuxfleurs} \date{SEED webinar, 2024-01-12} \begin{document} % \begin{frame} % \centering % \includegraphics[width=.3\linewidth]{../../sticker/Garage.png} % \vspace{1em} % % {\large\bf Alex Auvolat, Deuxfleurs Association} % \vspace{1em} % % \url{https://garagehq.deuxfleurs.fr/} % % %Matrix channel: \texttt{\#garage:deuxfleurs.fr} % \end{frame} \begin{frame} %\frametitle{Who I am} \begin{columns}[t] \begin{column}{.2\textwidth} \centering \adjincludegraphics[width=.4\linewidth, valign=t]{../assets/alex.jpg} \end{column} \begin{column}{.6\textwidth} \textbf{Alex Auvolat}\\ Member of Deuxfleurs, lead developer of Garage \end{column} \begin{column}{.2\textwidth} ~ \end{column} \end{columns} \vspace{.5em} \begin{columns}[t] \begin{column}{.2\textwidth} \centering \adjincludegraphics[width=.6\linewidth, valign=t]{../../logo/garage-notext.png} \end{column} \begin{column}{.6\textwidth} \\\textbf{Garage}\\ A self-hosted alternative to S3 for object storage \end{column} \begin{column}{.2\textwidth} ~ \end{column} \end{columns} \vspace{2em} \begin{columns}[t] \begin{column}{.2\textwidth} \centering \adjincludegraphics[width=.5\linewidth, valign=t]{../assets/deuxfleurs.pdf} \end{column} \begin{column}{.6\textwidth} \textbf{Deuxfleurs}\\ A non-profit self-hosting collective,\\ member of the CHATONS network \end{column} \begin{column}{.2\textwidth} \centering \adjincludegraphics[width=.7\linewidth, valign=t]{../assets/logo_chatons.png} \end{column} \end{columns} \end{frame} \begin{frame} \frametitle{Stable vs Resilient} \hspace{1em} \begin{minipage}{7cm} \textbf{Building a "stable" system:} \vspace{1em} Enterprise-grade systems typically employ: \vspace{1em} \begin{itemize} \item RAID \item Redundant power grid + UPS \item Redundant Internet connections \item Low-latency links \item ... \end{itemize} \vspace{1em} $\to$ costly, only worth at DC scale\\ $\to$ still risk of DC-level incident... \end{minipage} \hfill \begin{minipage}{7cm} \textbf{Building a \underline{resilient} system:} \vspace{1em} An alternative, cheaper way: \vspace{1em} \begin{itemize} \item Commodity hardware \\(e.g. old desktop PCs) \vspace{.5em} \item Commodity Internet \\(e.g. FTTB, FTTH) and power grid \vspace{.5em} \item \textbf{Geographical redundancy} \\(multi-site replication) \end{itemize} \vspace{1.5em} \end{minipage} \hspace{1em} \end{frame} \begin{frame} \frametitle{Example: our infrastructure at Deuxfleurs} \only<1>{ \begin{center} \includegraphics[width=.8\linewidth]{../assets/neptune.jpg} \end{center} } \only<2>{ \begin{center} \includegraphics[width=.8\linewidth]{../assets/atuin.jpg} \end{center} } \only<3>{ \begin{center} \includegraphics[width=.8\linewidth]{../assets/inframap_jdll2023.pdf} \end{center} } \end{frame} \begin{frame} \frametitle{Object storage: simpler than file systems} \begin{minipage}{6cm} Only two operations: \vspace{1em} \begin{itemize} \item Put an object at a key \vspace{1em} \item Retrieve an object from its key \end{itemize} \vspace{1em} {\footnotesize (and a few others)} \vspace{1em} Sufficient for many applications! \end{minipage} \hfill \begin{minipage}{8cm} \begin{center} \vspace{2em} \includegraphics[height=6em]{../2020-12-02_wide-team/img/Amazon-S3.jpg} \hspace{2em} \includegraphics[height=5em]{../assets/minio.png} \vspace{2em} \includegraphics[height=6em]{../../logo/garage_hires_crop.png} \end{center} \vspace{1em} \end{minipage} \end{frame} \begin{frame} \frametitle{The data model of object storage} Object storage is basically a key-value store: \vspace{1em} \begin{center} \begin{tabular}{|l|p{8cm}|} \hline \textbf{Key: file path + name} & \textbf{Value: file data + metadata} \\ \hline \hline \texttt{index.html} & \texttt{Content-Type: text/html; charset=utf-8} \newline \texttt{Content-Length: 24929} \newline \texttt{<binary blob>} \\ \hline \texttt{img/logo.svg} & \texttt{Content-Type: text/svg+xml} \newline \texttt{Content-Length: 13429} \newline \texttt{<binary blob>} \\ \hline \texttt{download/index.html} & \texttt{Content-Type: text/html; charset=utf-8} \newline \texttt{Content-Length: 26563} \newline \texttt{<binary blob>} \\ \hline \end{tabular} \end{center} \end{frame} \begin{frame} \frametitle{Implementation: consensus vs weak consistency} \hspace{1em} \begin{minipage}{7cm} \textbf{Consensus-based systems:} \vspace{1em} \begin{itemize} \item \textbf{Leader-based:} a leader is elected to coordinate all reads and writes \vspace{1em} \item Allows for \textbf{sequential reasoning}: program as if running on a single machine \vspace{1em} \item Serializability is one of the \\ \textbf{strongest consistency guarantees} \vspace{1em} \item \textbf{Costly}, the leader is a bottleneck; leader elections on failure take time \end{itemize} \end{minipage} \hfill \begin{minipage}{7cm} \visible<2->{ \textbf{Weakly consistent systems:} \vspace{1em} \begin{itemize} \item \textbf{Nodes are equivalent}, any node can originate a read or write operation \vspace{1em} \item \textbf{Operations must be independent}, conflicts are resolved after the fact \vspace{1em} \item Strongest achievable consistency:\\ \textbf{read-after-write consistency}\\(using quorums) \vspace{1em} \item \textbf{Fast}, no single bottleneck;\\ works transparently with offline nodes \end{itemize} } \end{minipage} \hspace{1em} \end{frame} \begin{frame} \frametitle{Why avoid consensus?} Consensus can be implemented reasonably well in practice, so why avoid it? \vspace{2em} \begin{itemize} \item \textbf{Software complexity:} RAFT and PAXOS are complex beasts;\\ harder to prove, harder to reason about \vspace{1.5em} \item \textbf{Performance issues:} \vspace{1em} \begin{itemize} \item Taking a decision may take an \textbf{arbitrary number of steps} (in adverse scenarios) \vspace{1em} \item The leader is a \textbf{bottleneck} for all requests;\\ even in leaderless approaches, \textbf{all nodes must process all operations in order} \vspace{1em} \item Particularly \textbf{sensitive to higher latency} between nodes \end{itemize} \end{itemize} \end{frame} \begin{frame} \frametitle{Objective: the right level of consistency for Garage} \underline{Constraints:} slow network (geographical distance), node unavailability/crashes\\ \underline{Objective:} maximize availability, maintain an \emph{appropriate level of consistency}\\ \vspace{1em} \begin{enumerate} \item<2-> \textbf{Weak consistency for most things}\\ \vspace{1em} \underline{Example:} \texttt{PutObject}\\ \vspace{.5em} If two clients write the same object at the same time, one of the two is implicitly overwritten. No need to coordinate, use a \emph{last-writer-wins register}. \vspace{1em} \item<3-> \textbf{Stronger consistency only when necessary}\\ \vspace{1em} \underline{Example:} \texttt{CreateBucket}\\ \vspace{.5em} A bucket is a reserved name in a shared namespace, two clients should be prevented from both creating the same bucket (\emph{mutual exclusion}). \end{enumerate} \end{frame} \begin{frame} \frametitle{The possibility of \emph{leaderless consensus}} Currently, Garage \emph{only has weak consistency}. Is fast, but \texttt{CreateBucket} is broken! \visible<2->{ \vspace{1em} Leaderless consensus (Antoniadis et al., 2023) alleviates issues with RAFT and PAXOS: \vspace{1em} \begin{itemize} \item \textbf{No leader.} All nodes participate equally at each time step, and different nodes can be unavailable at different times without issues. \\ \vspace{.5em} $\to$ better tolerance to the high latency (remove bottleneck issue) \\ $\to$ tolerates crash transparently \vspace{1em} \item \textbf{Simpler formalization.} The algorithm is very simple to express and to analyze in mathematical terms. \end{itemize} } \visible<3->{ \vspace{1em} One of the possible subjects for this PhD: \\$\to$ \emph{integration of leaderless consensus in Garage} + testing + perf eval, etc. } \end{frame} \begin{frame} \begin{center} \includegraphics[width=.25\linewidth]{../../logo/garage_hires.png}\\ \vspace{-1em} \url{https://garagehq.deuxfleurs.fr/}\\ \url{mailto:garagehq@deuxfleurs.fr}\\ \texttt{\#garage:deuxfleurs.fr} on Matrix \vspace{1.5em} \includegraphics[width=.06\linewidth]{../assets/rust_logo.png} \includegraphics[width=.13\linewidth]{../assets/AGPLv3_Logo.png} \end{center} \end{frame} \end{document} %% vim: set ts=4 sw=4 tw=0 noet spelllang=en :