aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-10-24 15:44:05 +0200
committerAlex Auvolat <alex@adnab.me>2023-10-24 15:44:05 +0200
commitd13bde5e26098313e789dd3793368a635cf1cc16 (patch)
treef82094e98a310992c62790f9e0c935fa9455c33b
parentd2c365767b0a4cb70dcbb1d20b75f41e0f9c20c8 (diff)
downloadgarage-d13bde5e26098313e789dd3793368a635cf1cc16.tar.gz
garage-d13bde5e26098313e789dd3793368a635cf1cc16.zip
jepsen: set1 and set2 don't fail anymore ??
-rw-r--r--script/jepsen.garage/README.md20
-rw-r--r--script/jepsen.garage/src/jepsen/garage.clj3
-rw-r--r--script/jepsen.garage/src/jepsen/garage/nemesis.clj64
3 files changed, 47 insertions, 40 deletions
diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md
index 06379d25..e1dc6953 100644
--- a/script/jepsen.garage/README.md
+++ b/script/jepsen.garage/README.md
@@ -69,9 +69,9 @@ Results with timestamp patch (`--patch tsfix2`):
- No failures with clock-scramble nemesis + partition nemesis (`--scenario cp`).
This proves that `tsfix2` (PR#543) does improve consistency.
-- **Fails with layout reconfiguration nemesis** (`--scenario r`)
- (TODO: note down the run id of a failed run)
- (TODO: test more and investigate).
+- **Fails with layout reconfiguration nemesis** (`--scenario r`).
+ Example of a failed run: `garage reg2/20231024T120806.899+0200`.
+ TODO: investigate.
This is the failure mode we are looking for and trying to fix for NLnet task 3.
@@ -83,12 +83,11 @@ Results:
- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run
-- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be.
+- Does not seem to fail with only the layout reconfiguation nemesis (>20 runs), although theoretically it could
-- **Fails with partition + layout reconfiguration nemesis** (`--scenario pr`)
- (TODO: note down the run id of a failed run)
- (TODO: test more and investigate).
- This is the failure mode we are looking for and trying to fix for NLnet task 3.
+- Does not seem to fail with the layout reconfiguation + partition nemesis (<10 runs), although theoretically it could
+
+TODO: make it fail!!!
### Set, continuous test (interspersed reads and writes)
@@ -99,10 +98,9 @@ Results:
- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run
-- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be.
+- Does not seem to fail with the clock scrambler + partition + layout reconfiguation nemesis (>10 runs), although theoretically it could
-- TODO: failures should be achieved with `--scenario pr`? Even with 4 or 5 consecutive test runs, no failures were achieved, why?
- (TODO: note down the run id of a failed run)
+TODO: make it fail!!!
## Investigating (and fixing) errors
diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj
index 6d64a1b8..a67399e0 100644
--- a/script/jepsen.garage/src/jepsen/garage.clj
+++ b/script/jepsen.garage/src/jepsen/garage.clj
@@ -26,7 +26,8 @@
{"c" grgNemesis/scenario-c
"cp" grgNemesis/scenario-cp
"r" grgNemesis/scenario-r
- "pr" grgNemesis/scenario-pr})
+ "pr" grgNemesis/scenario-pr
+ "cpr" grgNemesis/scenario-cpr})
(def patches
"A map of patch names to Garage builds"
diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj
index e64bcaf1..07083038 100644
--- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj
+++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj
@@ -76,30 +76,24 @@
(defn scenario-cp
"Clock scramble + partition scenario"
[opts]
- {:generator (cycle [(gen/sleep 5)
- {:type :info, :f :partition-start}
- (gen/sleep 5)
- {:type :info, :f :clock-scramble}
- (gen/sleep 5)
- {:type :info, :f :partition-stop}
- (gen/sleep 5)
- {:type :info, :f :clock-scramble}])
+ {:generator (->>
+ (gen/mix [{:type :info, :f :clock-scramble}
+ {:type :info, :f :partition-stop}
+ {:type :info, :f :partition-start}])
+ (gen/stagger 3))
:final-generator (gen/once {:type :info, :f :partition-stop})
:nemesis (nemesis/compose
- {{:partition-start :start
- :partition-stop :stop} (nemesis/partition-random-halves)
- {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})})
+ {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)
+ {:partition-start :start
+ :partition-stop :stop} (nemesis/partition-random-halves)})})
(defn scenario-r
"Cluster reconfiguration scenario"
[opts]
- {:generator (cycle [(gen/sleep 5)
- {:type :info, :f :reconfigure-start}
- (gen/sleep 5)
- {:type :info, :f :reconfigure-start}
- (gen/sleep 5)
- {:type :info, :f :reconfigure-stop}])
- :final-generator (gen/once {:type :info, :f :reconfigure-stop})
+ {:generator (->>
+ (gen/mix [{:type :info, :f :reconfigure-start}
+ {:type :info, :f :reconfigure-stop}])
+ (gen/stagger 3))
:nemesis (nemesis/compose
{{:reconfigure-start :start
:reconfigure-stop :stop} (reconfigure-subset 3)})})
@@ -107,19 +101,33 @@
(defn scenario-pr
"Partition + cluster reconfiguration scenario"
[opts]
- {:generator (cycle [(gen/sleep 3)
- {:type :info, :f :reconfigure-start}
- (gen/sleep 3)
- {:type :info, :f :partition-start}
- (gen/sleep 3)
- {:type :info, :f :reconfigure-start}
- (gen/sleep 3)
- {:type :info, :f :partition-stop}
- (gen/sleep 3)
- {:type :info, :f :reconfigure-stop}])
+ {:generator (->>
+ (gen/mix [{:type :info, :f :partition-start}
+ {:type :info, :f :partition-stop}
+ {:type :info, :f :reconfigure-start}
+ {:type :info, :f :reconfigure-stop}])
+ (gen/stagger 3))
:final-generator (gen/once {:type :info, :f :partition-stop})
:nemesis (nemesis/compose
{{:partition-start :start
:partition-stop :stop} (nemesis/partition-random-halves)
{:reconfigure-start :start
:reconfigure-stop :stop} (reconfigure-subset 3)})})
+
+(defn scenario-cpr
+ "Clock scramble + partition + cluster reconfiguration scenario"
+ [opts]
+ {:generator (->>
+ (gen/mix [{:type :info, :f :clock-scramble}
+ {:type :info, :f :partition-start}
+ {:type :info, :f :partition-stop}
+ {:type :info, :f :reconfigure-start}
+ {:type :info, :f :reconfigure-stop}])
+ (gen/stagger 3))
+ :final-generator (gen/once {:type :info, :f :partition-stop})
+ :nemesis (nemesis/compose
+ {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)
+ {:partition-start :start
+ :partition-stop :stop} (nemesis/partition-random-halves)
+ {:reconfigure-start :start
+ :reconfigure-stop :stop} (reconfigure-subset 3)})})