From b3bf16ee27b061a3a091022e718b2994365f945c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 17:51:34 +0200 Subject: make jepsen test more robust: handle errors and timeouts, fixed access key --- script/jepsen.garage/src/jepsen/garage.clj | 17 ++++++++++++----- script/jepsen.garage/src/jepsen/garage/daemon.clj | 23 ++++++++++------------- script/jepsen.garage/src/jepsen/garage/reg.clj | 22 ++++++++++++++-------- 3 files changed, 36 insertions(+), 26 deletions(-) (limited to 'script/jepsen.garage/src') diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 5816512b..891fdf12 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -41,7 +41,7 @@ [opts] (let [workload ((get workloads (:workload opts)) opts) garage-version (if (:increasing-timestamps opts) - "03490d41d58576d7b3bcf977b2726d72a3a66ada" + "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" "v0.9.0")] (merge tests/noop-test opts @@ -56,16 +56,23 @@ (gen/stagger (/ (:rate opts))) (gen/nemesis (cycle [(gen/sleep 5) - {:type :info, :f :start} + ;{:type :info, :f :partition-start} + ;(gen/sleep 5) + {:type :info, :f :clock-scramble} (gen/sleep 5) - {:type :info, :f :stop}])) + ;{:type :info, :f :partition-stop} + ;(gen/sleep 5) + {:type :info, :f :clock-scramble}])) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") - (gen/nemesis (gen/once {:type :info, :f :stop})) + (gen/nemesis (gen/once {:type :info, :f :partition-stop})) (gen/log "Waiting for recovery") (gen/sleep 10) (gen/clients (:final-generator workload))) - :nemesis (nemesis/partition-random-halves) + :nemesis (nemesis/compose + {{:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)}) :checker (checker/compose {:perf (checker/perf) :workload (:checker workload)}) diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index 81163521..2dcfa94f 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -15,7 +15,8 @@ (def pidfile (str base-dir "/garage.pid")) (def admin-token "icanhazadmin") -(def access-key "jepsen") +(def access-key-id "GK8bfb6a51286071c6c9cd8bc3") +(def secret-access-key "b0be95f71c1c6f16858a9edf395078b75c12ecb6b1c03385c4ae92076e4994a3") (def bucket-name "jepsen") ; THE GARAGE DB @@ -78,10 +79,10 @@ (c/trace (c/exec binary :layout :apply :--version 1) (info node "garage status:" (c/exec binary :status)) - (c/exec binary :key :create access-key) + (c/exec binary :key :import access-key-id secret-access-key :--yes) (c/exec binary :bucket :create bucket-name) - (c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key) - (info node "key info: " (c/exec binary :key :info access-key)))) + (c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key-id) + (info node "key info: " (c/exec binary :key :info access-key-id)))) (defn db "Garage DB for a particular version" @@ -122,13 +123,9 @@ (defn creds "Obtain Garage credentials for node" [node] - (let [key-info (c/on node (c/exec binary :key :info access-key :--show-secret)) - [_ ak sk] (re-matches - #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" - key-info)] - {:access-key ak - :secret-key sk - :endpoint (str "http://" node ":3900") - :bucket bucket-name - :client-config {:path-style-access-enabled true}})) + {:access-key access-key-id + :secret-key secret-access-key + :endpoint (str "http://" node ":3900") + :bucket bucket-name + :client-config {:path-style-access-enabled true}}) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index 3a2c1d55..d5404c6b 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -33,19 +33,25 @@ (let [[k v] (:value op)] (case (:f op) :read - (let [value (s3/get (:creds this) k)] - (assoc op :type :ok, :value (independent/tuple k value))) + (try+ + (let [value (s3/get (:creds this) k)] + (assoc op :type :ok, :value (independent/tuple k value))) + (catch (re-find #"Unavailable" (.getMessage %)) ex + (assoc op :type :fail, :error [:s3-error (.getMessage ex)]))) :write - (do - (s3/put (:creds this) k v) - (assoc op :type :ok))))) + (try+ + (do + (s3/put (:creds this) k v) + (assoc op :type :ok)) + (catch (re-find #"Unavailable" (.getMessage %)) ex + (assoc op :type :fail, :error [:s3-error (.getMessage ex)])))))) (teardown! [this test]) (close! [this test])) (defn workload "Tests linearizable reads and writes" [opts] - {:client (RegClient. nil) + {:client (client/timeout 10 (RegClient. nil)) :checker (independent/checker (checker/compose {:linear (checker/linearizable @@ -53,8 +59,8 @@ :algorithm :linear}) :timeline (timeline/html)})) :generator (independent/concurrent-generator - 10 - (range) + (/ (:concurrency opts) 10) ; divide threads in 10 groups + (range) ; working on 10 keys (fn [k] (->> (gen/mix [op-get op-put op-del]) -- cgit v1.2.3