aboutsummaryrefslogtreecommitdiff
path: root/doc/talks/2024-01-12-seed/talk.tex
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2024-01-11 11:40:44 +0100
committerAlex Auvolat <alex@adnab.me>2024-01-11 11:40:44 +0100
commit60f0bd03b68c43fb1181497b028c91edfb1efbb1 (patch)
tree92b8515d2fedf947683b79ec8ef7347b631a262d /doc/talks/2024-01-12-seed/talk.tex
parenta8b0e01f88b947bc34c05d818d51860b4d171967 (diff)
downloadgarage-60f0bd03b68c43fb1181497b028c91edfb1efbb1.tar.gz
garage-60f0bd03b68c43fb1181497b028c91edfb1efbb1.zip
doc: add talk for SEED webinarprez-seed-webinar-202401
Diffstat (limited to 'doc/talks/2024-01-12-seed/talk.tex')
-rw-r--r--doc/talks/2024-01-12-seed/talk.tex370
1 files changed, 370 insertions, 0 deletions
diff --git a/doc/talks/2024-01-12-seed/talk.tex b/doc/talks/2024-01-12-seed/talk.tex
new file mode 100644
index 00000000..e7b4e2c2
--- /dev/null
+++ b/doc/talks/2024-01-12-seed/talk.tex
@@ -0,0 +1,370 @@
+\nonstopmode
+\documentclass[aspectratio=169]{beamer}
+\usepackage[utf8]{inputenc}
+% \usepackage[frenchb]{babel}
+\usepackage{amsmath}
+\usepackage{mathtools}
+\usepackage{breqn}
+\usepackage{multirow}
+\usetheme{boxes}
+\usepackage{graphicx}
+\usepackage{import}
+\usepackage{adjustbox}
+%\useoutertheme[footline=authortitle,subsection=false]{miniframes}
+%\useoutertheme[footline=authorinstitute,subsection=false]{miniframes}
+\useoutertheme{infolines}
+\setbeamertemplate{headline}{}
+
+\beamertemplatenavigationsymbolsempty
+
+\definecolor{TitleOrange}{RGB}{255,137,0}
+\setbeamercolor{title}{fg=TitleOrange}
+\setbeamercolor{frametitle}{fg=TitleOrange}
+
+\definecolor{ListOrange}{RGB}{255,145,5}
+\setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$}
+
+\definecolor{verygrey}{RGB}{70,70,70}
+\setbeamercolor{normal text}{fg=verygrey}
+
+
+\usepackage{tabu}
+\usepackage{multicol}
+\usepackage{vwcol}
+\usepackage{stmaryrd}
+\usepackage{graphicx}
+
+\usepackage[normalem]{ulem}
+
+\AtBeginSection[]{
+ \begin{frame}
+ \vfill
+ \centering
+ \begin{beamercolorbox}[sep=8pt,center,shadow=true,rounded=true]{title}
+ \usebeamerfont{title}\insertsectionhead\par%
+ \end{beamercolorbox}
+ \vfill
+ \end{frame}
+}
+
+\title{Garage}
+\subtitle{a lightweight and robust geo-distributed data storage system}
+\author{Alex Auvolat, Deuxfleurs}
+\date{SEED webinar, 2024-01-12}
+
+\begin{document}
+
+% \begin{frame}
+% \centering
+% \includegraphics[width=.3\linewidth]{../../sticker/Garage.png}
+% \vspace{1em}
+%
+% {\large\bf Alex Auvolat, Deuxfleurs Association}
+% \vspace{1em}
+%
+% \url{https://garagehq.deuxfleurs.fr/}
+%
+% %Matrix channel: \texttt{\#garage:deuxfleurs.fr}
+% \end{frame}
+
+\begin{frame}
+ %\frametitle{Who I am}
+ \begin{columns}[t]
+ \begin{column}{.2\textwidth}
+ \centering
+ \adjincludegraphics[width=.4\linewidth, valign=t]{assets/alex.jpg}
+ \end{column}
+ \begin{column}{.6\textwidth}
+ \textbf{Alex Auvolat}\\
+ Member of Deuxfleurs, lead developer of Garage
+ \end{column}
+ \begin{column}{.2\textwidth}
+ ~
+ \end{column}
+ \end{columns}
+ \vspace{.5em}
+
+ \begin{columns}[t]
+ \begin{column}{.2\textwidth}
+ \centering
+ \adjincludegraphics[width=.6\linewidth, valign=t]{../../logo/garage-notext.png}
+ \end{column}
+ \begin{column}{.6\textwidth}
+ \\\textbf{Garage}\\
+ A self-hosted alternative to S3 for object storage
+ \end{column}
+ \begin{column}{.2\textwidth}
+ ~
+ \end{column}
+ \end{columns}
+ \vspace{2em}
+
+ \begin{columns}[t]
+ \begin{column}{.2\textwidth}
+ \centering
+ \adjincludegraphics[width=.5\linewidth, valign=t]{assets/deuxfleurs.pdf}
+ \end{column}
+ \begin{column}{.6\textwidth}
+ \textbf{Deuxfleurs}\\
+ A non-profit self-hosting collective,\\
+ member of the CHATONS network
+ \end{column}
+ \begin{column}{.2\textwidth}
+ \centering
+ \adjincludegraphics[width=.7\linewidth, valign=t]{assets/logo_chatons.png}
+ \end{column}
+ \end{columns}
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{Stable vs Resilient}
+
+ \hspace{1em}
+ \begin{minipage}{7cm}
+ \textbf{Building a "stable" system:}
+ \vspace{1em}
+
+ Enterprise-grade systems typically employ:
+ \vspace{1em}
+ \begin{itemize}
+ \item RAID
+ \item Redundant power grid + UPS
+ \item Redundant Internet connections
+ \item Low-latency links
+ \item ...
+ \end{itemize}
+ \vspace{1em}
+ $\to$ costly, only worth at DC scale\\
+ $\to$ still risk of DC-level incident...
+ \end{minipage}
+ \hfill
+ \begin{minipage}{7cm}
+ \textbf{Building a \underline{resilient} system:}
+ \vspace{1em}
+
+ An alternative, cheaper way:
+ \vspace{1em}
+ \begin{itemize}
+ \item Commodity hardware \\(e.g. old desktop PCs)
+ \vspace{.5em}
+ \item Commodity Internet \\(e.g. FTTB, FTTH) and power grid
+ \vspace{.5em}
+ \item \textbf{Geographical redundancy} \\(multi-site replication)
+ \end{itemize}
+ \vspace{1.5em}
+ \end{minipage}
+ \hspace{1em}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Example: our infrastructure at Deuxfleurs}
+ \only<1>{
+ \begin{center}
+ \includegraphics[width=.8\linewidth]{assets/neptune.jpg}
+ \end{center}
+ }
+ \only<2>{
+ \begin{center}
+ \includegraphics[width=.8\linewidth]{assets/atuin.jpg}
+ \end{center}
+ }
+ \only<3>{
+ \begin{center}
+ \includegraphics[width=.8\linewidth]{assets/inframap_jdll2023.pdf}
+ \end{center}
+ }
+\end{frame}
+
+\begin{frame}
+ \frametitle{Object storage: simpler than file systems}
+
+ \begin{minipage}{6cm}
+ Only two operations:
+ \vspace{1em}
+ \begin{itemize}
+ \item Put an object at a key
+ \vspace{1em}
+ \item Retrieve an object from its key
+ \end{itemize}
+ \vspace{1em}
+ {\footnotesize (and a few others)}
+
+ \vspace{1em}
+ Sufficient for many applications!
+ \end{minipage}
+ \hfill
+ \begin{minipage}{8cm}
+ \begin{center}
+ \vspace{2em}
+ \includegraphics[height=6em]{../2020-12-02_wide-team/img/Amazon-S3.jpg}
+ \hspace{2em}
+ \includegraphics[height=5em]{assets/minio.png}
+
+ \vspace{2em}
+ \includegraphics[height=6em]{../../logo/garage_hires_crop.png}
+ \end{center}
+ \vspace{1em}
+ \end{minipage}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{The data model of object storage}
+ Object storage is basically a key-value store:
+ \vspace{1em}
+
+ \begin{center}
+ \begin{tabular}{|l|p{8cm}|}
+ \hline
+ \textbf{Key: file path + name} & \textbf{Value: file data + metadata} \\
+ \hline
+ \hline
+ \texttt{index.html} &
+ \texttt{Content-Type: text/html; charset=utf-8} \newline
+ \texttt{Content-Length: 24929} \newline
+ \texttt{<binary blob>} \\
+ \hline
+ \texttt{img/logo.svg} &
+ \texttt{Content-Type: text/svg+xml} \newline
+ \texttt{Content-Length: 13429} \newline
+ \texttt{<binary blob>} \\
+ \hline
+ \texttt{download/index.html} &
+ \texttt{Content-Type: text/html; charset=utf-8} \newline
+ \texttt{Content-Length: 26563} \newline
+ \texttt{<binary blob>} \\
+ \hline
+ \end{tabular}
+ \end{center}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Implementation: consensus vs weak consistency}
+
+ \hspace{1em}
+ \begin{minipage}{7cm}
+ \textbf{Consensus-based systems:}
+ \vspace{1em}
+ \begin{itemize}
+ \item \textbf{Leader-based:} a leader is elected to coordinate
+ all reads and writes
+ \vspace{1em}
+ \item Allows for \textbf{sequential reasoning}:
+ program as if running on a single machine
+ \vspace{1em}
+ \item Serializability is one of the \\
+ \textbf{strongest consistency guarantees}
+ \vspace{1em}
+ \item \textbf{Costly}, the leader is a bottleneck;
+ leader elections on failure take time
+ \end{itemize}
+ \end{minipage}
+ \hfill
+ \begin{minipage}{7cm} \visible<2->{
+ \textbf{Weakly consistent systems:}
+ \vspace{1em}
+ \begin{itemize}
+ \item \textbf{Nodes are equivalent}, any node
+ can originate a read or write operation
+ \vspace{1em}
+ \item \textbf{Operations must be independent},
+ conflicts are resolved after the fact
+ \vspace{1em}
+ \item Strongest achievable consistency:\\
+ \textbf{read-after-write consistency}\\(using quorums)
+ \vspace{1em}
+ \item \textbf{Fast}, no single bottleneck;\\
+ works transparently with offline nodes
+ \end{itemize}
+ } \end{minipage}
+ \hspace{1em}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Why avoid consensus?}
+ Consensus can be implemented reasonably well in practice, so why avoid it?
+ \vspace{2em}
+ \begin{itemize}
+ \item \textbf{Software complexity:} RAFT and PAXOS are complex beasts;\\
+ harder to prove, harder to reason about
+ \vspace{1.5em}
+ \item \textbf{Performance issues:}
+ \vspace{1em}
+ \begin{itemize}
+ \item Taking a decision may take an \textbf{arbitrary number of steps} (in adverse scenarios)
+ \vspace{1em}
+ \item The leader is a \textbf{bottleneck} for all requests;\\
+ even in leaderless approaches, \textbf{all nodes must process all operations in order}
+ \vspace{1em}
+ \item Particularly \textbf{sensitive to higher latency} between nodes
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Objective: the right level of consistency for Garage}
+
+ \underline{Constraints:} slow network (geographical distance), node unavailability/crashes\\
+ \underline{Objective:} maximize availability, maintain an \emph{appropriate level of consistency}\\
+ \vspace{1em}
+ \begin{enumerate}
+ \item<2-> \textbf{Weak consistency for most things}\\
+ \vspace{1em}
+ \underline{Example:} \texttt{PutObject}\\
+ \vspace{.5em}
+ If two clients write the same
+ object at the same time, one of the two is implicitly overwritten.
+ No need to coordinate, use a \emph{last-writer-wins register}.
+ \vspace{1em}
+ \item<3-> \textbf{Stronger consistency only when necessary}\\
+ \vspace{1em}
+ \underline{Example:} \texttt{CreateBucket}\\
+ \vspace{.5em}
+ A bucket is a reserved name in a shared namespace,
+ two clients should be prevented from both creating the same bucket
+ (\emph{mutual exclusion}).
+ \end{enumerate}
+\end{frame}
+
+\begin{frame}
+ \frametitle{The possibility of \emph{leaderless consensus}}
+ Currently, Garage \emph{only has weak consistency}. Is fast, but \texttt{CreateBucket} is broken!
+
+ \visible<2->{
+ \vspace{1em}
+ Leaderless consensus (Antoniadis et al., 2023) alleviates issues with RAFT and PAXOS:
+ \vspace{1em}
+ \begin{itemize}
+ \item \textbf{No leader.} All nodes participate equally at each time step,
+ and different nodes can be unavailable at different times without issues.
+ \\ \vspace{.5em} $\to$ better tolerance to the high latency (remove bottleneck issue)
+ \\ $\to$ tolerates crash transparently
+ \vspace{1em}
+ \item \textbf{Simpler formalization.} The algorithm is very simple to express and to analyze in mathematical terms.
+ \end{itemize}
+ }
+ \visible<3->{
+ \vspace{1em}
+ One of the possible subjects for this PhD:
+ \\$\to$ \emph{integration of leaderless consensus in Garage} + testing + perf eval, etc.
+ }
+\end{frame}
+
+\begin{frame}
+ \begin{center}
+ \includegraphics[width=.25\linewidth]{../../logo/garage_hires.png}\\
+ \vspace{-1em}
+ \url{https://garagehq.deuxfleurs.fr/}\\
+ \url{mailto:garagehq@deuxfleurs.fr}\\
+ \texttt{\#garage:deuxfleurs.fr} on Matrix
+
+ \vspace{1.5em}
+ \includegraphics[width=.06\linewidth]{assets/rust_logo.png}
+ \includegraphics[width=.13\linewidth]{assets/AGPLv3_Logo.png}
+ \end{center}
+\end{frame}
+
+\end{document}
+
+%% vim: set ts=4 sw=4 tw=0 noet spelllang=en :