diff options
author | Alex Auvolat <alex@adnab.me> | 2023-10-20 15:48:37 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-10-20 15:48:37 +0200 |
commit | 654775308ed03abd68941b07b2ad367a4de5c57f (patch) | |
tree | 70c947029aba6b7fe40f286be2335b2db322c587 /script/jepsen.garage/src/jepsen | |
parent | f5b09727815523a1bd4ba5f62d892b2b45b5bed6 (diff) | |
download | garage-654775308ed03abd68941b07b2ad367a4de5c57f.tar.gz garage-654775308ed03abd68941b07b2ad367a4de5c57f.zip |
jepsen: add cluster reconfiguration nemesis
Diffstat (limited to 'script/jepsen.garage/src/jepsen')
-rw-r--r-- | script/jepsen.garage/src/jepsen/garage.clj | 27 | ||||
-rw-r--r-- | script/jepsen.garage/src/jepsen/garage/nemesis.clj | 91 |
2 files changed, 104 insertions, 14 deletions
diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index a566d9be..65a92a76 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -10,6 +10,7 @@ [jepsen.os.debian :as debian] [jepsen.garage [daemon :as grg] + [nemesis :as grgNemesis] [reg :as reg] [set :as set]])) @@ -20,6 +21,11 @@ "set1" set/workload1 "set2" set/workload2}) +(def scenari + "A map of scenari to the associated nemesis" + {"cp" grgNemesis/scenario-cp + "r" grgNemesis/scenario-r}) + (def patches "A map of patch names to Garage builds" {"default" "v0.9.0" @@ -31,6 +37,9 @@ [["-p" "--patch NAME" "Garage patch to use" :default "default" :validate [patches (cli/one-of patches)]] + ["-s" "--scenario NAME" "Nemesis scenario to run" + :default "cp" + :validate [scenari (cli/one-of scenari)]] ["-r" "--rate HZ" "Approximate number of requests per second, per thread." :default 10 :parse-fn read-string @@ -40,7 +49,7 @@ :parse-fn parse-long :validate [pos? "Must be a positive integer."]] ["-w" "--workload NAME" "Workload of test to run" - :default "reg" + :default "reg1" :validate [workloads (cli/one-of workloads)]]]) (defn garage-test @@ -48,6 +57,7 @@ :concurrency, ...), constructs a test map." [opts] (let [workload ((get workloads (:workload opts)) opts) + scenario ((get scenari (:scenario opts)) opts) garage-version (get patches (:patch opts))] (merge tests/noop-test opts @@ -60,25 +70,14 @@ (->> (:generator workload) (gen/stagger (/ (:rate opts))) - (gen/nemesis - (cycle [(gen/sleep 5) - {:type :info, :f :partition-start} - (gen/sleep 5) - {:type :info, :f :clock-scramble} - (gen/sleep 5) - {:type :info, :f :partition-stop} - (gen/sleep 5) - {:type :info, :f :clock-scramble}])) + (gen/nemesis (:generator scenario)) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") (gen/nemesis (gen/once {:type :info, :f :partition-stop})) (gen/log "Waiting for recovery") (gen/sleep 10) (gen/clients (:final-generator workload))) - :nemesis (nemesis/compose - {{:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)}) + :nemesis (:nemesis scenario) :checker (checker/compose {:perf (checker/perf) :workload (:checker workload)}) diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj new file mode 100644 index 00000000..7cd9306e --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -0,0 +1,91 @@ +(ns jepsen.garage.nemesis + (:require [clojure.tools.logging :refer :all] + [jepsen [control :as c] + [core :as jepsen] + [generator :as gen] + [nemesis :as nemesis]] + [jepsen.garage.daemon :as grg] + [jepsen.control.util :as cu])) + +(defn configure-present! + "Configure node to be active in new cluster layout" + [test node] + (info "configure-present!" node) + (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] + (c/on + (jepsen/primary test) + (c/exec grg/binary :layout :assign (subs node-id 0 16) :-c :1G)))) + +(defn configure-absent! + "Configure node to be active in new cluster layout" + [test node] + (info "configure-absent!" node) + (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] + (c/on + (jepsen/primary test) + (c/exec grg/binary :layout :assign (subs node-id 0 16) :-g)))) + +(defn finalize-config! + "Apply the proposed cluster layout" + [test] + (let [layout-show (c/on (jepsen/primary test) (c/exec grg/binary :layout :show)) + [_ layout-next-version] (re-find #"apply --version (\d+)\n" layout-show)] + (info "layout show: " layout-show "; next-version: " layout-next-version) + (c/on (jepsen/primary test) + (c/exec grg/binary :layout :apply :--version layout-next-version)))) + +(defn reconfigure-subset + "Reconfigure cluster with only a subset of nodes" + [cnt] + (reify nemesis/Nemesis + (setup! [this test] this) + + (invoke! [this test op] op + (case (:f op) + :start + (let [[keep-nodes remove-nodes] + (->> (:nodes test) + shuffle + (split-at cnt))] + (info "layout split: keep " keep-nodes ", remove " remove-nodes) + (run! #(configure-present! test %) keep-nodes) + (run! #(configure-absent! test %) remove-nodes) + (finalize-config! test) + (assoc op :value keep-nodes)) + :stop + (do + (info "layout un-split: all nodes=" (:nodes test)) + (run! #(configure-present! test %) (:nodes test)) + (finalize-config! test) + (assoc op :value (:nodes test))))) + + (teardown! [this test] this))) + +(defn scenario-cp + "Clock scramble + parittion scenario" + [opts] + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :partition-start} + (gen/sleep 5) + {:type :info, :f :clock-scramble} + (gen/sleep 5) + {:type :info, :f :partition-stop} + (gen/sleep 5) + {:type :info, :f :clock-scramble}]) + :nemesis (nemesis/compose + {{:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + +(defn scenario-r + "Cluster reconfiguration scenario" + [opts] + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-stop}]) + :nemesis (nemesis/compose + {{:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)})}) |