From 74e50eddddf319ce1a32a9b57b3825ea40db3a6c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 19 Oct 2023 14:34:19 +0200 Subject: jepsen: refactoring --- script/jepsen.garage/README.md | 33 +++++++++++++++++++++++ script/jepsen.garage/src/jepsen/garage.clj | 8 +++--- script/jepsen.garage/src/jepsen/garage/daemon.clj | 2 +- script/jepsen.garage/src/jepsen/garage/reg.clj | 27 +++++++++---------- script/jepsen.garage/src/jepsen/garage/set.clj | 30 +++++++++++++-------- 5 files changed, 70 insertions(+), 30 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 1bba32ec..5cb98e4d 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -31,6 +31,39 @@ lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrenc lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2 ``` +## Results + +**Register linear, without timestamp patch** + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100` + +Results: fails with a simple clock-scramble nemesis. + +Explanation: without the timestamp patch, nodes will create objects using their +local clock only as a timestamp, so the ordering will be all over the place if +clocks are scrambled. + +**Register linear, with timestamp patch** + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100 -I` + +Results: + +- No failure with clock-scramble nemesis +- Fails with clock-scramble nemesis + partition nemesis + +Explanation: S3 objects are not meant to behave like linearizable registers. TODO explain using a counter-example + +**Read-after-write CRDT register model**: TODO: determine the expected semantics of such a register, code a checker and show that results are correct + +**Set, basic test** + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload set1 --ops-per-key 100` + +Results: + +- ListObjects returns objects not within prefix???? + ## License Copyright © 2023 Alex Auvolat diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 891fdf12..c8865248 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -56,12 +56,12 @@ (gen/stagger (/ (:rate opts))) (gen/nemesis (cycle [(gen/sleep 5) - ;{:type :info, :f :partition-start} - ;(gen/sleep 5) + {:type :info, :f :partition-start} + (gen/sleep 5) {:type :info, :f :clock-scramble} (gen/sleep 5) - ;{:type :info, :f :partition-stop} - ;(gen/sleep 5) + {:type :info, :f :partition-stop} + (gen/sleep 5) {:type :info, :f :clock-scramble}])) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index 2dcfa94f..f924dcd6 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -43,7 +43,7 @@ "rpc_bind_addr = \"0.0.0.0:3901\"\n" "rpc_public_addr = \"" node ":3901\"\n" "db_engine = \"lmdb\"\n" - "replication_mode = \"3\"\n" + "replication_mode = \"2\"\n" "data_dir = \"" data-dir "\"\n" "metadata_dir = \"" meta-dir "\"\n" "[s3_api]\n" diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index d5404c6b..f3d5cec5 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -9,6 +9,7 @@ [generator :as gen] [independent :as independent] [nemesis :as nemesis] + [util :as util] [tests :as tests]] [jepsen.checker.timeline :as timeline] [jepsen.control.util :as cu] @@ -25,33 +26,31 @@ (defrecord RegClient [creds] client/Client (open! [this test node] - (let [creds (grg/creds node)] - (info node "s3 credentials:" creds) - (assoc this :creds creds))) + (assoc this :creds (grg/creds node))) (setup! [this test]) (invoke! [this test op] (let [[k v] (:value op)] (case (:f op) :read - (try+ + (util/timeout + 10000 + (assoc op :type :fail, :error ::timeout) (let [value (s3/get (:creds this) k)] - (assoc op :type :ok, :value (independent/tuple k value))) - (catch (re-find #"Unavailable" (.getMessage %)) ex - (assoc op :type :fail, :error [:s3-error (.getMessage ex)]))) + (assoc op :type :ok, :value (independent/tuple k value)))) :write - (try+ + (util/timeout + 10000 + (assoc op :type :info, :error ::timeout) (do (s3/put (:creds this) k v) - (assoc op :type :ok)) - (catch (re-find #"Unavailable" (.getMessage %)) ex - (assoc op :type :fail, :error [:s3-error (.getMessage ex)])))))) + (assoc op :type :ok)))))) (teardown! [this test]) (close! [this test])) (defn workload "Tests linearizable reads and writes" [opts] - {:client (client/timeout 10 (RegClient. nil)) + {:client (RegClient. nil) :checker (independent/checker (checker/compose {:linear (checker/linearizable @@ -59,8 +58,8 @@ :algorithm :linear}) :timeline (timeline/html)})) :generator (independent/concurrent-generator - (/ (:concurrency opts) 10) ; divide threads in 10 groups - (range) ; working on 10 keys + 10 + (range) (fn [k] (->> (gen/mix [op-get op-put op-del]) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index 6bbc1ee0..ff597095 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -11,6 +11,7 @@ [generator :as gen] [independent :as independent] [nemesis :as nemesis] + [util :as util] [tests :as tests]] [jepsen.checker.timeline :as timeline] [jepsen.control.util :as cu] @@ -26,23 +27,29 @@ (defrecord SetClient [creds] client/Client (open! [this test node] - (let [creds (grg/creds node)] - (info node "s3 credentials:" creds) - (assoc this :creds creds))) + (assoc this :creds (grg/creds node))) (setup! [this test]) (invoke! [this test op] (let [[k v] (:value op) prefix (str "set" k "/")] (case (:f op) :add - (do - (s3/put (:creds this) (str prefix v) "present") - (assoc op :type :ok)) + (util/timeout + 10000 + (assoc op :type :info, :error ::timeout) + (do + (s3/put (:creds this) (str prefix v) "present") + (assoc op :type :ok))) :read - (let [items (s3/list (:creds this) prefix) - items-stripped (map (fn [o] (str/replace-first o prefix "")) items) - items-set (set (map read-string items-stripped))] - (assoc op :type :ok, :value (independent/tuple k items-set)))))) + (util/timeout + 10000 + (assoc op :type :fail, :error ::timeout) + (let [items (s3/list (:creds this) prefix) + items-stripped (map (fn [o] + (assert (str/starts-with? o prefix)) + (str/replace-first o prefix "")) items) + items-set (set (map parse-long items-stripped))] + (assoc op :type :ok, :value (independent/tuple k items-set))))))) (teardown! [this test]) (close! [this test])) @@ -110,6 +117,7 @@ 10 (range) (fn [k] - (gen/mix [op-add-rand100 op-read])))}) + (->> (gen/mix [op-add-rand100 op-read]) + (gen/limit (:ops-per-key opts)))))}) -- cgit v1.2.3