From 654775308ed03abd68941b07b2ad367a4de5c57f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 15:48:37 +0200 Subject: jepsen: add cluster reconfiguration nemesis --- script/jepsen.garage/Vagrantfile | 1 + script/jepsen.garage/nodes.vagrant | 1 + script/jepsen.garage/src/jepsen/garage.clj | 27 ++++--- script/jepsen.garage/src/jepsen/garage/nemesis.clj | 91 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 14 deletions(-) create mode 100644 script/jepsen.garage/src/jepsen/garage/nemesis.clj diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile index d0e545d3..1125bccf 100644 --- a/script/jepsen.garage/Vagrantfile +++ b/script/jepsen.garage/Vagrantfile @@ -27,4 +27,5 @@ Vagrant.configure("2") do |config| config.vm.define "n3" do |config| vm(config, "n3", "192.168.56.23") end config.vm.define "n4" do |config| vm(config, "n4", "192.168.56.24") end config.vm.define "n5" do |config| vm(config, "n5", "192.168.56.25") end + config.vm.define "n6" do |config| vm(config, "n6", "192.168.56.26") end end diff --git a/script/jepsen.garage/nodes.vagrant b/script/jepsen.garage/nodes.vagrant index df7c4622..3f7e2b42 100644 --- a/script/jepsen.garage/nodes.vagrant +++ b/script/jepsen.garage/nodes.vagrant @@ -3,3 +3,4 @@ 192.168.56.23 192.168.56.24 192.168.56.25 +192.168.56.26 diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index a566d9be..65a92a76 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -10,6 +10,7 @@ [jepsen.os.debian :as debian] [jepsen.garage [daemon :as grg] + [nemesis :as grgNemesis] [reg :as reg] [set :as set]])) @@ -20,6 +21,11 @@ "set1" set/workload1 "set2" set/workload2}) +(def scenari + "A map of scenari to the associated nemesis" + {"cp" grgNemesis/scenario-cp + "r" grgNemesis/scenario-r}) + (def patches "A map of patch names to Garage builds" {"default" "v0.9.0" @@ -31,6 +37,9 @@ [["-p" "--patch NAME" "Garage patch to use" :default "default" :validate [patches (cli/one-of patches)]] + ["-s" "--scenario NAME" "Nemesis scenario to run" + :default "cp" + :validate [scenari (cli/one-of scenari)]] ["-r" "--rate HZ" "Approximate number of requests per second, per thread." :default 10 :parse-fn read-string @@ -40,7 +49,7 @@ :parse-fn parse-long :validate [pos? "Must be a positive integer."]] ["-w" "--workload NAME" "Workload of test to run" - :default "reg" + :default "reg1" :validate [workloads (cli/one-of workloads)]]]) (defn garage-test @@ -48,6 +57,7 @@ :concurrency, ...), constructs a test map." [opts] (let [workload ((get workloads (:workload opts)) opts) + scenario ((get scenari (:scenario opts)) opts) garage-version (get patches (:patch opts))] (merge tests/noop-test opts @@ -60,25 +70,14 @@ (->> (:generator workload) (gen/stagger (/ (:rate opts))) - (gen/nemesis - (cycle [(gen/sleep 5) - {:type :info, :f :partition-start} - (gen/sleep 5) - {:type :info, :f :clock-scramble} - (gen/sleep 5) - {:type :info, :f :partition-stop} - (gen/sleep 5) - {:type :info, :f :clock-scramble}])) + (gen/nemesis (:generator scenario)) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") (gen/nemesis (gen/once {:type :info, :f :partition-stop})) (gen/log "Waiting for recovery") (gen/sleep 10) (gen/clients (:final-generator workload))) - :nemesis (nemesis/compose - {{:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)}) + :nemesis (:nemesis scenario) :checker (checker/compose {:perf (checker/perf) :workload (:checker workload)}) diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj new file mode 100644 index 00000000..7cd9306e --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -0,0 +1,91 @@ +(ns jepsen.garage.nemesis + (:require [clojure.tools.logging :refer :all] + [jepsen [control :as c] + [core :as jepsen] + [generator :as gen] + [nemesis :as nemesis]] + [jepsen.garage.daemon :as grg] + [jepsen.control.util :as cu])) + +(defn configure-present! + "Configure node to be active in new cluster layout" + [test node] + (info "configure-present!" node) + (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] + (c/on + (jepsen/primary test) + (c/exec grg/binary :layout :assign (subs node-id 0 16) :-c :1G)))) + +(defn configure-absent! + "Configure node to be active in new cluster layout" + [test node] + (info "configure-absent!" node) + (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] + (c/on + (jepsen/primary test) + (c/exec grg/binary :layout :assign (subs node-id 0 16) :-g)))) + +(defn finalize-config! + "Apply the proposed cluster layout" + [test] + (let [layout-show (c/on (jepsen/primary test) (c/exec grg/binary :layout :show)) + [_ layout-next-version] (re-find #"apply --version (\d+)\n" layout-show)] + (info "layout show: " layout-show "; next-version: " layout-next-version) + (c/on (jepsen/primary test) + (c/exec grg/binary :layout :apply :--version layout-next-version)))) + +(defn reconfigure-subset + "Reconfigure cluster with only a subset of nodes" + [cnt] + (reify nemesis/Nemesis + (setup! [this test] this) + + (invoke! [this test op] op + (case (:f op) + :start + (let [[keep-nodes remove-nodes] + (->> (:nodes test) + shuffle + (split-at cnt))] + (info "layout split: keep " keep-nodes ", remove " remove-nodes) + (run! #(configure-present! test %) keep-nodes) + (run! #(configure-absent! test %) remove-nodes) + (finalize-config! test) + (assoc op :value keep-nodes)) + :stop + (do + (info "layout un-split: all nodes=" (:nodes test)) + (run! #(configure-present! test %) (:nodes test)) + (finalize-config! test) + (assoc op :value (:nodes test))))) + + (teardown! [this test] this))) + +(defn scenario-cp + "Clock scramble + parittion scenario" + [opts] + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :partition-start} + (gen/sleep 5) + {:type :info, :f :clock-scramble} + (gen/sleep 5) + {:type :info, :f :partition-stop} + (gen/sleep 5) + {:type :info, :f :clock-scramble}]) + :nemesis (nemesis/compose + {{:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + +(defn scenario-r + "Cluster reconfiguration scenario" + [opts] + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-stop}]) + :nemesis (nemesis/compose + {{:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)})}) -- cgit v1.2.3