aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-10-20 15:48:37 +0200
committerAlex Auvolat <alex@adnab.me>2023-10-20 15:48:37 +0200
commit654775308ed03abd68941b07b2ad367a4de5c57f (patch)
tree70c947029aba6b7fe40f286be2335b2db322c587
parentf5b09727815523a1bd4ba5f62d892b2b45b5bed6 (diff)
downloadgarage-654775308ed03abd68941b07b2ad367a4de5c57f.tar.gz
garage-654775308ed03abd68941b07b2ad367a4de5c57f.zip
jepsen: add cluster reconfiguration nemesis
-rw-r--r--script/jepsen.garage/Vagrantfile1
-rw-r--r--script/jepsen.garage/nodes.vagrant1
-rw-r--r--script/jepsen.garage/src/jepsen/garage.clj27
-rw-r--r--script/jepsen.garage/src/jepsen/garage/nemesis.clj91
4 files changed, 106 insertions, 14 deletions
diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile
index d0e545d3..1125bccf 100644
--- a/script/jepsen.garage/Vagrantfile
+++ b/script/jepsen.garage/Vagrantfile
@@ -27,4 +27,5 @@ Vagrant.configure("2") do |config|
config.vm.define "n3" do |config| vm(config, "n3", "192.168.56.23") end
config.vm.define "n4" do |config| vm(config, "n4", "192.168.56.24") end
config.vm.define "n5" do |config| vm(config, "n5", "192.168.56.25") end
+ config.vm.define "n6" do |config| vm(config, "n6", "192.168.56.26") end
end
diff --git a/script/jepsen.garage/nodes.vagrant b/script/jepsen.garage/nodes.vagrant
index df7c4622..3f7e2b42 100644
--- a/script/jepsen.garage/nodes.vagrant
+++ b/script/jepsen.garage/nodes.vagrant
@@ -3,3 +3,4 @@
192.168.56.23
192.168.56.24
192.168.56.25
+192.168.56.26
diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj
index a566d9be..65a92a76 100644
--- a/script/jepsen.garage/src/jepsen/garage.clj
+++ b/script/jepsen.garage/src/jepsen/garage.clj
@@ -10,6 +10,7 @@
[jepsen.os.debian :as debian]
[jepsen.garage
[daemon :as grg]
+ [nemesis :as grgNemesis]
[reg :as reg]
[set :as set]]))
@@ -20,6 +21,11 @@
"set1" set/workload1
"set2" set/workload2})
+(def scenari
+ "A map of scenari to the associated nemesis"
+ {"cp" grgNemesis/scenario-cp
+ "r" grgNemesis/scenario-r})
+
(def patches
"A map of patch names to Garage builds"
{"default" "v0.9.0"
@@ -31,6 +37,9 @@
[["-p" "--patch NAME" "Garage patch to use"
:default "default"
:validate [patches (cli/one-of patches)]]
+ ["-s" "--scenario NAME" "Nemesis scenario to run"
+ :default "cp"
+ :validate [scenari (cli/one-of scenari)]]
["-r" "--rate HZ" "Approximate number of requests per second, per thread."
:default 10
:parse-fn read-string
@@ -40,7 +49,7 @@
:parse-fn parse-long
:validate [pos? "Must be a positive integer."]]
["-w" "--workload NAME" "Workload of test to run"
- :default "reg"
+ :default "reg1"
:validate [workloads (cli/one-of workloads)]]])
(defn garage-test
@@ -48,6 +57,7 @@
:concurrency, ...), constructs a test map."
[opts]
(let [workload ((get workloads (:workload opts)) opts)
+ scenario ((get scenari (:scenario opts)) opts)
garage-version (get patches (:patch opts))]
(merge tests/noop-test
opts
@@ -60,25 +70,14 @@
(->>
(:generator workload)
(gen/stagger (/ (:rate opts)))
- (gen/nemesis
- (cycle [(gen/sleep 5)
- {:type :info, :f :partition-start}
- (gen/sleep 5)
- {:type :info, :f :clock-scramble}
- (gen/sleep 5)
- {:type :info, :f :partition-stop}
- (gen/sleep 5)
- {:type :info, :f :clock-scramble}]))
+ (gen/nemesis (:generator scenario))
(gen/time-limit (:time-limit opts)))
(gen/log "Healing cluster")
(gen/nemesis (gen/once {:type :info, :f :partition-stop}))
(gen/log "Waiting for recovery")
(gen/sleep 10)
(gen/clients (:final-generator workload)))
- :nemesis (nemesis/compose
- {{:partition-start :start
- :partition-stop :stop} (nemesis/partition-random-halves)
- {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})
+ :nemesis (:nemesis scenario)
:checker (checker/compose
{:perf (checker/perf)
:workload (:checker workload)})
diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj
new file mode 100644
index 00000000..7cd9306e
--- /dev/null
+++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj
@@ -0,0 +1,91 @@
+(ns jepsen.garage.nemesis
+ (:require [clojure.tools.logging :refer :all]
+ [jepsen [control :as c]
+ [core :as jepsen]
+ [generator :as gen]
+ [nemesis :as nemesis]]
+ [jepsen.garage.daemon :as grg]
+ [jepsen.control.util :as cu]))
+
+(defn configure-present!
+ "Configure node to be active in new cluster layout"
+ [test node]
+ (info "configure-present!" node)
+ (let [node-id (c/on node (c/exec grg/binary :node :id :-q))]
+ (c/on
+ (jepsen/primary test)
+ (c/exec grg/binary :layout :assign (subs node-id 0 16) :-c :1G))))
+
+(defn configure-absent!
+ "Configure node to be active in new cluster layout"
+ [test node]
+ (info "configure-absent!" node)
+ (let [node-id (c/on node (c/exec grg/binary :node :id :-q))]
+ (c/on
+ (jepsen/primary test)
+ (c/exec grg/binary :layout :assign (subs node-id 0 16) :-g))))
+
+(defn finalize-config!
+ "Apply the proposed cluster layout"
+ [test]
+ (let [layout-show (c/on (jepsen/primary test) (c/exec grg/binary :layout :show))
+ [_ layout-next-version] (re-find #"apply --version (\d+)\n" layout-show)]
+ (info "layout show: " layout-show "; next-version: " layout-next-version)
+ (c/on (jepsen/primary test)
+ (c/exec grg/binary :layout :apply :--version layout-next-version))))
+
+(defn reconfigure-subset
+ "Reconfigure cluster with only a subset of nodes"
+ [cnt]
+ (reify nemesis/Nemesis
+ (setup! [this test] this)
+
+ (invoke! [this test op] op
+ (case (:f op)
+ :start
+ (let [[keep-nodes remove-nodes]
+ (->> (:nodes test)
+ shuffle
+ (split-at cnt))]
+ (info "layout split: keep " keep-nodes ", remove " remove-nodes)
+ (run! #(configure-present! test %) keep-nodes)
+ (run! #(configure-absent! test %) remove-nodes)
+ (finalize-config! test)
+ (assoc op :value keep-nodes))
+ :stop
+ (do
+ (info "layout un-split: all nodes=" (:nodes test))
+ (run! #(configure-present! test %) (:nodes test))
+ (finalize-config! test)
+ (assoc op :value (:nodes test)))))
+
+ (teardown! [this test] this)))
+
+(defn scenario-cp
+ "Clock scramble + parittion scenario"
+ [opts]
+ {:generator (cycle [(gen/sleep 5)
+ {:type :info, :f :partition-start}
+ (gen/sleep 5)
+ {:type :info, :f :clock-scramble}
+ (gen/sleep 5)
+ {:type :info, :f :partition-stop}
+ (gen/sleep 5)
+ {:type :info, :f :clock-scramble}])
+ :nemesis (nemesis/compose
+ {{:partition-start :start
+ :partition-stop :stop} (nemesis/partition-random-halves)
+ {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})})
+
+(defn scenario-r
+ "Cluster reconfiguration scenario"
+ [opts]
+ {:generator (cycle [(gen/sleep 5)
+ {:type :info, :f :reconfigure-start}
+ (gen/sleep 5)
+ {:type :info, :f :reconfigure-start}
+ (gen/sleep 5)
+ {:type :info, :f :reconfigure-stop}])
+ :nemesis (nemesis/compose
+ {{:reconfigure-start :start
+ :reconfigure-stop :stop} (reconfigure-subset 3)})})