From da8b1707489b70c25395ee49383ecbbd8c9f9404 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 19 Oct 2023 16:45:24 +0200 Subject: jepsen: investigating listobjects error --- script/jepsen.garage/README.md | 11 ++++++++++- script/jepsen.garage/jaeger.sh | 13 +++++++++++++ script/jepsen.garage/src/jepsen/garage/daemon.clj | 7 +++++-- script/jepsen.garage/src/jepsen/garage/s3api.clj | 1 + script/jepsen.garage/src/jepsen/garage/set.clj | 17 ++++++++++------- 5 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 script/jepsen.garage/jaeger.sh diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 5cb98e4d..f6fb3a59 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -62,7 +62,16 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 -- Results: -- ListObjects returns objects not within prefix???? +- ListObjects returns objects not within prefix???? -> BAD, definitely a bug, but maybe it's in the instrumentation code? + +In `store/garage set1/20231019T163358.615+0200`: + +``` +INFO [2023-10-19 16:35:20,977] clojure-agent-send-off-pool-207 - jepsen.garage.set list results for prefix set20/ : (set13/0 set13/1 set13/10 set13/11 set13/12 set13/13 set13/14 set13/15 set13/16 set13/17 set13/18 set13/19 set13/2 set13/20 set13/21 set13/22 set13/23 set13/24 set13/25 set13/26 set13/27 set13/28 set13/29 set13/3 set13/30 set13/31 set13/32 set13/33 set13/34 set13/35 set13/36 set13/37 set13/38 set13/39 set13/4 set13/40 set13/41 set13/42 set13/43 set13/44 set13/45 set13/46 set13/47 set13/48 set13/49 set13/5 set13/50 set13/51 set13/52 set13/53 set13/54 set13/55 set13/56 set13/57 set13/58 set13/59 set13/6 set13/60 set13/61 set13/62 set13/63 set13/64 set13/65 set13/66 set13/67 set13/68 set13/69 set13/7 set13/70 set13/71 set13/72 set13/73 set13/74 set13/75 set13/76 set13/77 set13/78 set13/79 set13/8 set13/80 set13/81 set13/82 set13/83 set13/84 set13/85 set13/86 set13/87 set13/88 set13/89 set13/9 set13/90 set13/91 set13/92 set13/93 set13/94 set13/95 set13/96 set13/97 set13/98 set13/99) (node: http://192.168.56.25:3900 ) + +``` + +- Sometimes ListObjects returns an empty list???? -> BAD, quorums should ensure this doesn't happen ## License diff --git a/script/jepsen.garage/jaeger.sh b/script/jepsen.garage/jaeger.sh new file mode 100644 index 00000000..7f67b61b --- /dev/null +++ b/script/jepsen.garage/jaeger.sh @@ -0,0 +1,13 @@ +docker stop jaeger +docker rm jaeger + +# UI is on localhost:16686 +# otel-grpc collector is on localhost:4317 +# otel-http collector is on localhost:4318 + +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 4317:4317 \ + -p 4318:4318 \ + -p 16686:16686 \ + jaegertracing/all-in-one:1.50 diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index f924dcd6..7c581ba1 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -53,7 +53,8 @@ "api_bind_addr = \"0.0.0.0:3902\"\n" "[admin]\n" "api_bind_addr = \"0.0.0.0:3903\"\n" - "admin_token = \"" admin-token "\"\n") + "admin_token = \"" admin-token "\"\n" + "trace_sink = \"http://192.168.56.1:4317\"\n") "/etc/garage.toml")))) (defn connect-node! @@ -94,7 +95,8 @@ (cu/start-daemon! {:logfile logfile :pidfile pidfile - :chdir base-dir} + :chdir base-dir + :env {:RUST_LOG "garage=debug,garage_api=trace"}} binary :server) (c/exec :sleep 3) @@ -113,6 +115,7 @@ (info node "tearing down garage" version) (c/su (cu/stop-daemon! binary pidfile) + (c/exec :rm :-rf logfile) (c/exec :rm :-rf data-dir) (c/exec :rm :-rf meta-dir))) diff --git a/script/jepsen.garage/src/jepsen/garage/s3api.clj b/script/jepsen.garage/src/jepsen/garage/s3api.clj index 239d5423..977de7dc 100644 --- a/script/jepsen.garage/src/jepsen/garage/s3api.clj +++ b/script/jepsen.garage/src/jepsen/garage/s3api.clj @@ -42,6 +42,7 @@ new-object-summaries (:object-summaries list-result) new-objects (map (fn [d] (:key d)) new-object-summaries) objects (concat new-objects accum)] + (info (:endpoint creds) "ListObjectsV2 prefix(" prefix "), ct(" ct "): " new-objects) (if (:truncated? list-result) (list-inner (:next-continuation-token list-result) objects) objects))) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index ff597095..8a1ab83f 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -44,12 +44,13 @@ (util/timeout 10000 (assoc op :type :fail, :error ::timeout) - (let [items (s3/list (:creds this) prefix) - items-stripped (map (fn [o] + (let [items (s3/list (:creds this) prefix)] + (info "list results for prefix" prefix ":" items " (node:" (:endpoint (:creds this)) ")") + (let [items-stripped (map (fn [o] (assert (str/starts-with? o prefix)) (str/replace-first o prefix "")) items) - items-set (set (map parse-long items-stripped))] - (assoc op :type :ok, :value (independent/tuple k items-set))))))) + items-set (set (map parse-long items-stripped))] + (assoc op :type :ok, :value (independent/tuple k items-set)))))))) (teardown! [this test]) (close! [this test])) @@ -100,9 +101,11 @@ (->> (range) (map (fn [x] {:type :invoke, :f :add, :value x})) (gen/limit (:ops-per-key opts))))) - :final-generator (independent/sequential-generator - (range 100) - (fn [k] (gen/once op-read)))}) + :final-generator (gen/phases + (independent/sequential-generator + (range 100) + (fn [k] (gen/once op-read))) + (gen/sleep 5))}) (defn workload2 "Tests insertions and deletions" -- cgit v1.2.3