aboutsummaryrefslogtreecommitdiff
path: root/doc/talks/2023-01-18-tocatta
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-01-12 16:27:02 +0100
committerAlex Auvolat <alex@adnab.me>2023-01-12 16:27:02 +0100
commitfe850f62c908492d2c3cbe4c55c3cf3b3d097de0 (patch)
tree468a071ea00206ee6a700cb3a953032f9b8313e0 /doc/talks/2023-01-18-tocatta
parent7416ba97ef8e5f9592c32dae6caaf46c1dbd7610 (diff)
downloadgarage-fe850f62c908492d2c3cbe4c55c3cf3b3d097de0.tar.gz
garage-fe850f62c908492d2c3cbe4c55c3cf3b3d097de0.zip
Talk 2023-01-18: some WIP talking about consensus
Diffstat (limited to 'doc/talks/2023-01-18-tocatta')
-rw-r--r--doc/talks/2023-01-18-tocatta/Makefile4
-rw-r--r--doc/talks/2023-01-18-tocatta/assets/consensus.svg137
-rw-r--r--doc/talks/2023-01-18-tocatta/assets/garage.drawio.pdfbin26098 -> 0 bytes
-rw-r--r--doc/talks/2023-01-18-tocatta/assets/garage.drawio.pngbin13463 -> 0 bytes
-rw-r--r--doc/talks/2023-01-18-tocatta/talk.pdfbin2572439 -> 2594434 bytes
-rw-r--r--doc/talks/2023-01-18-tocatta/talk.tex187
6 files changed, 311 insertions, 17 deletions
diff --git a/doc/talks/2023-01-18-tocatta/Makefile b/doc/talks/2023-01-18-tocatta/Makefile
index 4d600178..4a967d24 100644
--- a/doc/talks/2023-01-18-tocatta/Makefile
+++ b/doc/talks/2023-01-18-tocatta/Makefile
@@ -3,6 +3,7 @@ ASSETS=assets/consistent_hashing_1.pdf \
assets/consistent_hashing_3.pdf \
assets/consistent_hashing_4.pdf \
assets/garage_tables.pdf \
+ assets/consensus.pdf_tex \
assets/deuxfleurs.pdf
talk.pdf: talk.tex $(ASSETS)
@@ -10,3 +11,6 @@ talk.pdf: talk.tex $(ASSETS)
assets/%.pdf: assets/%.svg
inkscape -D -z --file=$^ --export-pdf=$@
+
+assets/%.pdf_tex: assets/%.svg
+ inkscape -D -z --file=$^ --export-pdf=$@ --export-latex
diff --git a/doc/talks/2023-01-18-tocatta/assets/consensus.svg b/doc/talks/2023-01-18-tocatta/assets/consensus.svg
new file mode 100644
index 00000000..8321e383
--- /dev/null
+++ b/doc/talks/2023-01-18-tocatta/assets/consensus.svg
@@ -0,0 +1,137 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ width="800"
+ height="300"
+ viewBox="0 0 211.66666 79.374999"
+ version="1.1"
+ id="svg5"
+ inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
+ sodipodi:docname="consensus.svg"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:svg="http://www.w3.org/2000/svg">
+ <sodipodi:namedview
+ id="namedview7"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:showpageshadow="2"
+ inkscape:pageopacity="0.0"
+ inkscape:pagecheckerboard="0"
+ inkscape:deskcolor="#d1d1d1"
+ inkscape:document-units="mm"
+ showgrid="false"
+ inkscape:zoom="1.4734708"
+ inkscape:cx="310.49139"
+ inkscape:cy="179.1688"
+ inkscape:window-width="1920"
+ inkscape:window-height="999"
+ inkscape:window-x="0"
+ inkscape:window-y="0"
+ inkscape:window-maximized="1"
+ inkscape:current-layer="layer1" />
+ <defs
+ id="defs2">
+ <marker
+ style="overflow:visible"
+ id="Arrow2"
+ refX="0"
+ refY="0"
+ orient="auto-start-reverse"
+ inkscape:stockid="Arrow2"
+ markerWidth="7.6999998"
+ markerHeight="5.5999999"
+ viewBox="0 0 7.7 5.6"
+ inkscape:isstock="true"
+ inkscape:collect="always"
+ preserveAspectRatio="xMidYMid">
+ <path
+ transform="scale(0.7)"
+ d="M -2,-4 9,0 -2,4 c 2,-2.33 2,-5.66 0,-8 z"
+ style="fill:context-stroke;fill-rule:evenodd;stroke:none"
+ id="arrow2L" />
+ </marker>
+ </defs>
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1">
+ <g
+ id="g1218"
+ transform="translate(-8.9161476,-12.502301)">
+ <circle
+ style="fill:#ffffff;stroke:#000000;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
+ id="path111"
+ cx="38.904896"
+ cy="37.936272"
+ r="13.474442" />
+ <text
+ xml:space="preserve"
+ style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
+ x="38.879501"
+ y="40.908073"
+ id="text1105"><tspan
+ sodipodi:role="line"
+ id="tspan1103"
+ style="stroke-width:0.264583;fill:#000000"
+ x="38.879501"
+ y="40.908073">$\bot$</tspan></text>
+ </g>
+ <g
+ id="g1218-3"
+ transform="translate(127.41938,-12.502301)">
+ <circle
+ style="fill:#ffffff;stroke:#000000;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
+ id="path111-5"
+ cx="38.904896"
+ cy="37.936272"
+ r="13.474442" />
+ <text
+ xml:space="preserve"
+ style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
+ x="38.879501"
+ y="40.908073"
+ id="text1105-6"><tspan
+ sodipodi:role="line"
+ id="tspan1103-2"
+ style="stroke-width:0.264583;fill:#000000"
+ x="38.879501"
+ y="40.908073">$x$</tspan></text>
+ </g>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow2)"
+ d="M 44.289635,25.433971 H 145.90576"
+ id="path1414"
+ sodipodi:nodetypes="cc" />
+ <text
+ xml:space="preserve"
+ style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
+ x="92.729836"
+ y="21.781803"
+ id="text2092"><tspan
+ sodipodi:role="line"
+ id="tspan2090"
+ style="stroke-width:0.264583;fill:#000000"
+ x="92.729836"
+ y="21.781803">$propose(x) / x$</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
+ x="166.29887"
+ y="69.89299"
+ id="text2092-9"><tspan
+ sodipodi:role="line"
+ id="tspan2090-1"
+ style="stroke-width:0.264583;fill:#000000"
+ x="166.29887"
+ y="69.89299">$propose(y) / x$</tspan></text>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow2)"
+ d="m 155.82329,35.899857 c -8.35129,12.319651 0.54055,24.640898 11.72797,24.072085 8.65403,-0.440005 18.59818,-11.705963 11.8146,-20.570891"
+ id="path2150"
+ sodipodi:nodetypes="csc" />
+ </g>
+</svg>
diff --git a/doc/talks/2023-01-18-tocatta/assets/garage.drawio.pdf b/doc/talks/2023-01-18-tocatta/assets/garage.drawio.pdf
deleted file mode 100644
index a54a163c..00000000
--- a/doc/talks/2023-01-18-tocatta/assets/garage.drawio.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/talks/2023-01-18-tocatta/assets/garage.drawio.png b/doc/talks/2023-01-18-tocatta/assets/garage.drawio.png
deleted file mode 100644
index 386dd862..00000000
--- a/doc/talks/2023-01-18-tocatta/assets/garage.drawio.png
+++ /dev/null
Binary files differ
diff --git a/doc/talks/2023-01-18-tocatta/talk.pdf b/doc/talks/2023-01-18-tocatta/talk.pdf
index 5acb9198..ba9bde3d 100644
--- a/doc/talks/2023-01-18-tocatta/talk.pdf
+++ b/doc/talks/2023-01-18-tocatta/talk.pdf
Binary files differ
diff --git a/doc/talks/2023-01-18-tocatta/talk.tex b/doc/talks/2023-01-18-tocatta/talk.tex
index 566f56ec..ac9b4077 100644
--- a/doc/talks/2023-01-18-tocatta/talk.tex
+++ b/doc/talks/2023-01-18-tocatta/talk.tex
@@ -8,6 +8,7 @@
\usepackage{multirow}
\usetheme{boxes}
\usepackage{graphicx}
+\usepackage{import}
\usepackage{adjustbox}
%\useoutertheme[footline=authortitle,subsection=false]{miniframes}
%\useoutertheme[footline=authorinstitute,subsection=false]{miniframes}
@@ -479,33 +480,185 @@
\section{Problem 2: ensuring consistency}
-%\begin{frame}
-% \frametitle{Garage's architecture}
-% \begin{center}
-% \includegraphics[width=.35\linewidth]{assets/garage.drawio.pdf}
-% \end{center}
-%\end{frame}
+\begin{frame}
+ \frametitle{Consensus vs weak consistency}
+
+ \hspace{1em}
+ \begin{minipage}{7cm}
+ \textbf{Consensus-based systems:}
+ \vspace{1em}
+ \begin{itemize}
+ \item \textbf{Leader-based:} a leader is elected to coordinate
+ all reads and writes
+ \vspace{1em}
+ \item \textbf{Linearizability} of all operations\\
+ (strongest consistency guarantee)
+ \vspace{1em}
+ \item \textbf{Replicated state machines} that can implement
+ any sequential specification
+ \vspace{1em}
+ \item \textbf{Costly}, the leader is a bottleneck;
+ leader elections on failure take time
+ \end{itemize}
+ \end{minipage}
+ \hfill
+ \begin{minipage}{7cm} \visible<2->{
+ \textbf{Weakly consistent systems:}
+ \vspace{1em}
+ \begin{itemize}
+ \item \textbf{Nodes are equivalent}, any node
+ can originate a read or write operation
+ \vspace{1em}
+ \item \textbf{Read-after-write consistency} with quorums,
+ eventual consistency without
+ \vspace{1em}
+ \item \textbf{Operations have to commute}, i.e.~we
+ can only implement CRDTs
+ \vspace{1em}
+ \item \textbf{Fast}, no node is a bottleneck;\\
+ works the same with offline nodes
+ \end{itemize}
+ } \end{minipage}
+ \hspace{1em}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Consensus vs weak consistency}
+ \begin{center}
+ \textbf{The same objects cannot be implemented in both models.}
+ \end{center}
+ \vspace{2em}
+
+ \hspace{1em}
+ \begin{minipage}{7cm}
+ \underline{Consensus-based systems:}
+
+ \vspace{1em}
+
+ \textbf{Any sequential specification}\\~
+ \end{minipage}
+ \hfill
+ \begin{minipage}{7cm}
+ \underline{Weakly consistent systems:}
+
+ \vspace{1em}
+
+ \textbf{CRDTs only}\\(conflict-free replicated data types)
+ \end{minipage}
+ \hspace{1em}
+
+ \vspace{3em}
+ \begin{center}
+ Part of the complexity is \textbf{reported to the consumer of the API}
+ \end{center}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Consensus vs weak consistency}
+ \begin{center}
+ \textbf{From a theoretical point of view:}\\
+
+ \end{center}
+ \vspace{2em}
+
+ \hspace{1em}
+ \begin{minipage}{6.5cm}
+ \underline{Consensus-based systems:}
+
+ \vspace{1em}
+
+ Require \textbf{additionnal assumptions} such as a fault detector or a strong RNG\\~
+ \end{minipage}
+ \hfill
+ \begin{minipage}{6.5cm}
+ \underline{Weakly consistent systems:}
+
+ \vspace{1em}
+
+ Can be implemented in \textbf{any asynchronous message passing distributed system}
+ \end{minipage}
+ \hspace{1em}
+
+ \vspace{3em}
+ \begin{center}
+ They represent \textbf{different classes of computational capability}
+ \end{center}
+\end{frame}
\begin{frame}
- \frametitle{Garage is \emph{coordination-free}:}
+ \frametitle{Understanding the power of consensus}
+ \textbf{Consensus:} an API with a single operation, $propose(x)$
+ \begin{enumerate}
+ \item nodes all call $propose(x)$ with their proposed value;
+ \item nodes all receive the same value as a return value, which is one of the proposed values
+ \end{enumerate}
+ \vspace{1em}
+
+ \visible<2->{
+ \textbf{Equivalent to} a distributed algorithm that gives a total order on all requests
+ }
+ \vspace{1em}
+
+ \visible<3->{
+ \textbf{Implemented by} this simple replicated state machine:
+ \vspace{.5em}
+ \begin{figure}
+ \centering
+ \def\svgwidth{.5\textwidth}
+ \large
+ \import{assets/}{consensus.pdf_tex}
+ \end{figure}
+ \vspace{1em}
+ }
+\end{frame}
+
+\begin{frame}
+ \frametitle{Can my object be implemented without consensus?}
+ \underline{Given the specification of an API:}
+ \vspace{2em}
\begin{itemize}
- \item No Raft or Paxos
- \vspace{1em}
- \item Internal data types are CRDTs
+ \item \textbf{Using this API, we can implement the consensus object} (the $propose$ function)\\
+ $\to$ the API is equivalent to consensus/total ordering of messages\\
+ $\to$ the API cannot be implemented in a weakly consistent system
+ \vspace{2em}
+ \item \textbf{This API can be implemented using only weak primitives}\\
+ (e.g. a bunch of atomic registers)\\
+ $\to$ the API is strictly weaker than consensus\\
+ $\to$ we can implement it in Garage!
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Why avoid consensus?}
+ Consensus can be implemented reasonably well in practice, so why avoid it?
+ \vspace{2em}
+ \begin{itemize}
+ \item \textbf{Software complexity:} RAFT and PAXOS are complex beasts;\\
+ harder to prove, harder to reason about
+ \vspace{1.5em}
+ \item \textbf{Performance issues:}
\vspace{1em}
- \item All nodes are equivalent (no master/leader/index node)
+ \begin{itemize}
+ \item The leader is a \textbf{bottleneck} for all requests
+ \vspace{1em}
+ \item Particularly \textbf{sensitive to higher latency} between nodes
+ \end{itemize}
\end{itemize}
- \vspace{2em}
- $\to$ less sensitive to higher latencies between nodes
\end{frame}
\begin{frame}
- \frametitle{Consistency model}
+ \frametitle{What can we implement without consensus?}
\begin{itemize}
- \item Not ACID (not required by S3 spec) / not linearizable
+ \item Any \textbf{conflict-free replicated data type} (CRDT)
+ \vspace{1em}
+ \item Non-transactional key-value stores such as S3 are equivalent to a simple CRDT:\\
+ a \textbf{last-writer-wins registry}
+ \vspace{1em}
+ \item \textbf{Read-after-write consistency} can be implemented
+ using quorums on read and write operations
\vspace{1em}
- \item \textbf{Read-after-write consistency}\\
- {\footnotesize (stronger than eventual consistency)}
+ \item \textbf{Monotonicity of reads} can be implemented with repair-on-read\\
+ (makes reads more costly, not implemented in Garage)
\end{itemize}
\end{frame}