From 7011b71fbd782e199417ce9afa44a8c220885b4a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 12:14:13 +0200 Subject: jepsen: wip --- script/jepsen.garage/.envrc | 1 + script/jepsen.garage/.gitignore | 15 ++ script/jepsen.garage/CHANGELOG.md | 24 ++ script/jepsen.garage/LICENSE | 280 +++++++++++++++++++++++ script/jepsen.garage/README.md | 22 ++ script/jepsen.garage/doc/intro.md | 3 + script/jepsen.garage/flake.nix | 22 ++ script/jepsen.garage/garage-cluster.nix | 64 ++++++ script/jepsen.garage/project.clj | 9 + script/jepsen.garage/shell.nix | 6 + script/jepsen.garage/src/jepsen/garage.clj | 18 ++ script/jepsen.garage/test/jepsen/garage_test.clj | 7 + 12 files changed, 471 insertions(+) create mode 100644 script/jepsen.garage/.envrc create mode 100644 script/jepsen.garage/.gitignore create mode 100644 script/jepsen.garage/CHANGELOG.md create mode 100644 script/jepsen.garage/LICENSE create mode 100644 script/jepsen.garage/README.md create mode 100644 script/jepsen.garage/doc/intro.md create mode 100644 script/jepsen.garage/flake.nix create mode 100644 script/jepsen.garage/garage-cluster.nix create mode 100644 script/jepsen.garage/project.clj create mode 100644 script/jepsen.garage/shell.nix create mode 100644 script/jepsen.garage/src/jepsen/garage.clj create mode 100644 script/jepsen.garage/test/jepsen/garage_test.clj diff --git a/script/jepsen.garage/.envrc b/script/jepsen.garage/.envrc new file mode 100644 index 00000000..1d953f4b --- /dev/null +++ b/script/jepsen.garage/.envrc @@ -0,0 +1 @@ +use nix diff --git a/script/jepsen.garage/.gitignore b/script/jepsen.garage/.gitignore new file mode 100644 index 00000000..44e41749 --- /dev/null +++ b/script/jepsen.garage/.gitignore @@ -0,0 +1,15 @@ +/target +/classes +/checkouts +profiles.clj +pom.xml +pom.xml.asc +*.jar +*.class +/.lein-* +/.nrepl-port +/.prepl-port +.hgignore +.hg/ +.direnv +/store diff --git a/script/jepsen.garage/CHANGELOG.md b/script/jepsen.garage/CHANGELOG.md new file mode 100644 index 00000000..c1abdb4d --- /dev/null +++ b/script/jepsen.garage/CHANGELOG.md @@ -0,0 +1,24 @@ +# Change Log +All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). + +## [Unreleased] +### Changed +- Add a new arity to `make-widget-async` to provide a different widget shape. + +## [0.1.1] - 2023-04-18 +### Changed +- Documentation on how to make the widgets. + +### Removed +- `make-widget-sync` - we're all async, all the time. + +### Fixed +- Fixed widget maker to keep working when daylight savings switches over. + +## 0.1.0 - 2023-04-18 +### Added +- Files from the new template. +- Widget maker public API - `make-widget-sync`. + +[Unreleased]: https://sourcehost.site/your-name/jepsen.garage/compare/0.1.1...HEAD +[0.1.1]: https://sourcehost.site/your-name/jepsen.garage/compare/0.1.0...0.1.1 diff --git a/script/jepsen.garage/LICENSE b/script/jepsen.garage/LICENSE new file mode 100644 index 00000000..23151265 --- /dev/null +++ b/script/jepsen.garage/LICENSE @@ -0,0 +1,280 @@ +Eclipse Public License - v 2.0 + + THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE + PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION + OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + + a) in the case of the initial Contributor, the initial content + Distributed under this Agreement, and + + b) in the case of each subsequent Contributor: + i) changes to the Program, and + ii) additions to the Program; + where such changes and/or additions to the Program originate from + and are Distributed by that particular Contributor. A Contribution + "originates" from a Contributor if it was added to the Program by + such Contributor itself or anyone acting on such Contributor's behalf. + Contributions do not include changes or additions to the Program that + are not Modified Works. + +"Contributor" means any person or entity that Distributes the Program. + +"Licensed Patents" mean patent claims licensable by a Contributor which +are necessarily infringed by the use or sale of its Contribution alone +or when combined with the Program. + +"Program" means the Contributions Distributed in accordance with this +Agreement. + +"Recipient" means anyone who receives the Program under this Agreement +or any Secondary License (as applicable), including Contributors. + +"Derivative Works" shall mean any work, whether in Source Code or other +form, that is based on (or derived from) the Program and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. + +"Modified Works" shall mean any work in Source Code or other form that +results from an addition to, deletion from, or modification of the +contents of the Program, including, for purposes of clarity any new file +in Source Code form that contains any contents of the Program. Modified +Works shall not include works that contain only declarations, +interfaces, types, classes, structures, or files of the Program solely +in each case in order to link to, bind by name, or subclass the Program +or Modified Works thereof. + +"Distribute" means the acts of a) distributing or b) making available +in any manner that enables the transfer of a copy. + +"Source Code" means the form of a Program preferred for making +modifications, including but not limited to software source code, +documentation source, and configuration files. + +"Secondary License" means either the GNU General Public License, +Version 2.0, or any later versions of that license, including any +exceptions or additional permissions as identified by the initial +Contributor. + +2. GRANT OF RIGHTS + + a) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free copyright + license to reproduce, prepare Derivative Works of, publicly display, + publicly perform, Distribute and sublicense the Contribution of such + Contributor, if any, and such Derivative Works. + + b) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free patent + license under Licensed Patents to make, use, sell, offer to sell, + import and otherwise transfer the Contribution of such Contributor, + if any, in Source Code or other form. This patent license shall + apply to the combination of the Contribution and the Program if, at + the time the Contribution is added by the Contributor, such addition + of the Contribution causes such combination to be covered by the + Licensed Patents. The patent license shall not apply to any other + combinations which include the Contribution. No hardware per se is + licensed hereunder. + + c) Recipient understands that although each Contributor grants the + licenses to its Contributions set forth herein, no assurances are + provided by any Contributor that the Program does not infringe the + patent or other intellectual property rights of any other entity. + Each Contributor disclaims any liability to Recipient for claims + brought by any other entity based on infringement of intellectual + property rights or otherwise. As a condition to exercising the + rights and licenses granted hereunder, each Recipient hereby + assumes sole responsibility to secure any other intellectual + property rights needed, if any. For example, if a third party + patent license is required to allow Recipient to Distribute the + Program, it is Recipient's responsibility to acquire that license + before distributing the Program. + + d) Each Contributor represents that to its knowledge it has + sufficient copyright rights in its Contribution, if any, to grant + the copyright license set forth in this Agreement. + + e) Notwithstanding the terms of any Secondary License, no + Contributor makes additional grants to any Recipient (other than + those set forth in this Agreement) as a result of such Recipient's + receipt of the Program under the terms of a Secondary License + (if permitted under the terms of Section 3). + +3. REQUIREMENTS + +3.1 If a Contributor Distributes the Program in any form, then: + + a) the Program must also be made available as Source Code, in + accordance with section 3.2, and the Contributor must accompany + the Program with a statement that the Source Code for the Program + is available under this Agreement, and informs Recipients how to + obtain it in a reasonable manner on or through a medium customarily + used for software exchange; and + + b) the Contributor may Distribute the Program under a license + different than this Agreement, provided that such license: + i) effectively disclaims on behalf of all other Contributors all + warranties and conditions, express and implied, including + warranties or conditions of title and non-infringement, and + implied warranties or conditions of merchantability and fitness + for a particular purpose; + + ii) effectively excludes on behalf of all other Contributors all + liability for damages, including direct, indirect, special, + incidental and consequential damages, such as lost profits; + + iii) does not attempt to limit or alter the recipients' rights + in the Source Code under section 3.2; and + + iv) requires any subsequent distribution of the Program by any + party to be under a license that satisfies the requirements + of this section 3. + +3.2 When the Program is Distributed as Source Code: + + a) it must be made available under this Agreement, or if the + Program (i) is combined with other material in a separate file or + files made available under a Secondary License, and (ii) the initial + Contributor attached to the Source Code the notice described in + Exhibit A of this Agreement, then the Program may be made available + under the terms of such Secondary Licenses, and + + b) a copy of this Agreement must be included with each copy of + the Program. + +3.3 Contributors may not remove or alter any copyright, patent, +trademark, attribution notices, disclaimers of warranty, or limitations +of liability ("notices") contained within the Program from any copy of +the Program which they Distribute, provided that Contributors may add +their own appropriate notices. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities +with respect to end users, business partners and the like. While this +license is intended to facilitate the commercial use of the Program, +the Contributor who includes the Program in a commercial product +offering should do so in a manner which does not create potential +liability for other Contributors. Therefore, if a Contributor includes +the Program in a commercial product offering, such Contributor +("Commercial Contributor") hereby agrees to defend and indemnify every +other Contributor ("Indemnified Contributor") against any losses, +damages and costs (collectively "Losses") arising from claims, lawsuits +and other legal actions brought by a third party against the Indemnified +Contributor to the extent caused by the acts or omissions of such +Commercial Contributor in connection with its distribution of the Program +in a commercial product offering. The obligations in this section do not +apply to any claims or Losses relating to any actual or alleged +intellectual property infringement. In order to qualify, an Indemnified +Contributor must: a) promptly notify the Commercial Contributor in +writing of such claim, and b) allow the Commercial Contributor to control, +and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may +participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial +product offering, Product X. That Contributor is then a Commercial +Contributor. If that Commercial Contributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Contributor's responsibility +alone. Under this section, the Commercial Contributor would have to +defend claims against the other Contributors related to those performance +claims and warranties, and if a court requires any other Contributor to +pay any damages as a result, the Commercial Contributor must pay +those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" +BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF +TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR +PURPOSE. Each Recipient is solely responsible for determining the +appropriateness of using and distributing the Program and assumes all +risks associated with its exercise of rights under this Agreement, +including but not limited to the risks and costs of program errors, +compliance with applicable laws, damage to or loss of data, programs +or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS +SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE +EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further +action by the parties hereto, such provision shall be reformed to the +minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the +Program itself (excluding combinations of the Program with other software +or hardware) infringes such Recipient's patent(s), then such Recipient's +rights granted under Section 2(b) shall terminate as of the date such +litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it +fails to comply with any of the material terms or conditions of this +Agreement and does not cure such failure in a reasonable period of +time after becoming aware of such noncompliance. If all Recipient's +rights under this Agreement terminate, Recipient agrees to cease use +and distribution of the Program as soon as reasonably practicable. +However, Recipient's obligations under this Agreement and any licenses +granted by Recipient relating to the Program shall continue and survive. + +Everyone is permitted to copy and distribute copies of this Agreement, +but in order to avoid inconsistency the Agreement is copyrighted and +may only be modified in the following manner. The Agreement Steward +reserves the right to publish new versions (including revisions) of +this Agreement from time to time. No one other than the Agreement +Steward has the right to modify this Agreement. The Eclipse Foundation +is the initial Agreement Steward. The Eclipse Foundation may assign the +responsibility to serve as the Agreement Steward to a suitable separate +entity. Each new version of the Agreement will be given a distinguishing +version number. The Program (including Contributions) may always be +Distributed subject to the version of the Agreement under which it was +received. In addition, after a new version of the Agreement is published, +Contributor may elect to Distribute the Program (including its +Contributions) under the new version. + +Except as expressly stated in Sections 2(a) and 2(b) above, Recipient +receives no rights or licenses to the intellectual property of any +Contributor under this Agreement, whether expressly, by implication, +estoppel or otherwise. All rights in the Program not expressly granted +under this Agreement are reserved. Nothing in this Agreement is intended +to be enforceable by any entity that is not a Contributor or Recipient. +No third-party beneficiary rights are created under this Agreement. + +Exhibit A - Form of Secondary Licenses Notice + +"This Source Code may also be made available under the following +Secondary Licenses when the conditions for such availability set forth +in the Eclipse Public License, v. 2.0 are satisfied: GNU General Public +License as published by the Free Software Foundation, either version 2 +of the License, or (at your option) any later version, with the GNU +Classpath Exception which is available at +https://www.gnu.org/software/classpath/license.html." + + Simply including a copy of this Agreement, including this Exhibit A + is not sufficient to license the Source Code under Secondary Licenses. + + If it is not possible or desirable to put the notice in a particular + file, then You may include the notice in a location (such as a LICENSE + file in a relevant directory) where a recipient would be likely to + look for such a notice. + + You may add additional accurate notices of copyright ownership. diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md new file mode 100644 index 00000000..e1d1a555 --- /dev/null +++ b/script/jepsen.garage/README.md @@ -0,0 +1,22 @@ +# jepsen.garage + +A Clojure library designed to ... well, that part is up to you. + +## Usage + +FIXME + +## License + +Copyright © 2023 FIXME + +This program and the accompanying materials are made available under the +terms of the Eclipse Public License 2.0 which is available at +http://www.eclipse.org/legal/epl-2.0. + +This Source Code may also be made available under the following Secondary +Licenses when the conditions for such availability set forth in the Eclipse +Public License, v. 2.0 are satisfied: GNU General Public License as published by +the Free Software Foundation, either version 2 of the License, or (at your +option) any later version, with the GNU Classpath Exception which is available +at https://www.gnu.org/software/classpath/license.html. diff --git a/script/jepsen.garage/doc/intro.md b/script/jepsen.garage/doc/intro.md new file mode 100644 index 00000000..ee727bbc --- /dev/null +++ b/script/jepsen.garage/doc/intro.md @@ -0,0 +1,3 @@ +# Introduction to jepsen.garage + +TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) diff --git a/script/jepsen.garage/flake.nix b/script/jepsen.garage/flake.nix new file mode 100644 index 00000000..76ee8ebf --- /dev/null +++ b/script/jepsen.garage/flake.nix @@ -0,0 +1,22 @@ +# Example flake.nix +{ + inputs.nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; + inputs.microvm.url = "github:astro/microvm.nix"; + inputs.microvm.inputs.nixpkgs.follows = "nixpkgs"; + + outputs = { self, nixpkgs, microvm }: { + # Example nixosConfigurations entry + nixosConfigurations.my-microvm = nixpkgs.lib.nixosSystem { + system = "x86_64-linux"; + modules = [ + # Include the microvm module + microvm.nixosModules.microvm + # Add more modules here + { + networking.hostName = "my-microvm"; + microvm.hypervisor = "cloud-hypervisor"; + } + ]; + }; + }; +} diff --git a/script/jepsen.garage/garage-cluster.nix b/script/jepsen.garage/garage-cluster.nix new file mode 100644 index 00000000..32fedc04 --- /dev/null +++ b/script/jepsen.garage/garage-cluster.nix @@ -0,0 +1,64 @@ +{ config, lib, pkgs, ... }: +let + unstable = import ./unstable.nix; + addressMap = + { + "n1" = { localAddress = "10.233.0.101"; hostAddress = "10.233.1.101"; }; + "n2" = { localAddress = "10.233.0.102"; hostAddress = "10.233.1.102"; }; + "n3" = { localAddress = "10.233.0.103"; hostAddress = "10.233.1.103"; }; + "n4" = { localAddress = "10.233.0.104"; hostAddress = "10.233.1.104"; }; + "n5" = { localAddress = "10.233.0.105"; hostAddress = "10.233.1.105"; }; + }; + toHostsEntry = name: { localAddress, ... }: "${localAddress} ${name}"; + extraHosts = + builtins.concatStringsSep "\n" + (lib.attrsets.mapAttrsToList toHostsEntry addressMap); + nodeConfig = hostName: { localAddress, hostAddress }: { + inherit localAddress hostAddress; + + ephemeral = true; + autoStart = true; + privateNetwork = true; + + config = { config, pkgs, ... }: + { + networking = { + inherit hostName extraHosts; + }; + + services.openssh = { + enable = true; + permitRootLogin = "yes"; + }; + users.users.root.initialPassword = "root"; + + system.stateVersion = "22.11"; + + services.garage = { + enable = true; + logLevel = "debug"; + settings.replication_mode = "3"; + }; + + # Workaround for nixos-container issue + # (see https://github.com/NixOS/nixpkgs/issues/67265 and + # https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099). + # The etcd service is of type "notify", which means that + # etcd would not be considered started until etcd is fully online; + # however, since NixOS container networking only works sometime *after* + # multi-user.target, we forgo etcd's notification entirely. + systemd.services.etcd.serviceConfig.Type = lib.mkForce "exec"; + + systemd.services.etcd.serviceConfig.StandardOutput = "file:/var/log/etcd.log"; + systemd.services.etcd.serviceConfig.StandardError = "file:/var/log/etcd.log"; + + networking.firewall.allowedTCPPorts = [ 2379 2380 ]; + }; + }; +in +{ + containers = lib.attrsets.mapAttrs nodeConfig addressMap; + networking = { + inherit extraHosts; + }; +} diff --git a/script/jepsen.garage/project.clj b/script/jepsen.garage/project.clj new file mode 100644 index 00000000..ac3df57e --- /dev/null +++ b/script/jepsen.garage/project.clj @@ -0,0 +1,9 @@ +(defproject jepsen.garage "0.1.0-SNAPSHOT" + :description "Jepsen testing for Garage" + :url "https://git.deuxfleurs.fr/Deuxfleurs/garage" + :license {:name "GPLv3" + :url "https://www.gnu.org/licenses/gpl-3.0.en.html"} + :main jepsen.garage + :dependencies [[org.clojure/clojure "1.11.1"] + [jepsen "0.2.1-SNAPSHOT"]] + :repl-options {:init-ns jepsen.garage}) diff --git a/script/jepsen.garage/shell.nix b/script/jepsen.garage/shell.nix new file mode 100644 index 00000000..4f0af839 --- /dev/null +++ b/script/jepsen.garage/shell.nix @@ -0,0 +1,6 @@ +{ pkgs ? import {} }: + pkgs.mkShell { + nativeBuildInputs = [ + pkgs.leiningen + ]; +} diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj new file mode 100644 index 00000000..1351c4a7 --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -0,0 +1,18 @@ +(ns jepsen.garage + (:require [jepsen.cli :as cli] + [jepsen.tests :as tests])) + +(defn garage-test + "Given an options map from the command line runner (e.g. :nodes, :ssh, + :concurrency, ...), constructs a test map." + [opts] + (merge tests/noop-test + {:pure-generators true} + opts)) + +(defn -main + "Handles command line arguments. Can either run a test, or a web server for + browsing results." + [& args] + (cli/run! (cli/single-test-cmd {:test-fn garage-test}) + args)) diff --git a/script/jepsen.garage/test/jepsen/garage_test.clj b/script/jepsen.garage/test/jepsen/garage_test.clj new file mode 100644 index 00000000..055392a1 --- /dev/null +++ b/script/jepsen.garage/test/jepsen/garage_test.clj @@ -0,0 +1,7 @@ +(ns jepsen.garage-test + (:require [clojure.test :refer :all] + [jepsen.garage :refer :all])) + +(deftest a-test + (testing "FIXME, I fail." + (is (= 0 1)))) -- cgit v1.2.3 From 17ebb652730f69f53fe7d2b31fbf2db2075b9be7 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 13:27:22 +0200 Subject: jepsen ssh into containers seem to work ? --- script/jepsen.garage/destroy-tap.sh | 9 +++++ script/jepsen.garage/flake.lock | 64 +++++++++++++++++++++++++++++++++ script/jepsen.garage/flake.nix | 45 ++++++++++++++++++++--- script/jepsen.garage/garage-cluster.nix | 34 ++++++++++-------- script/jepsen.garage/nodes | 5 +++ script/jepsen.garage/project.clj | 2 +- script/jepsen.garage/setup-tap.sh | 10 ++++++ 7 files changed, 150 insertions(+), 19 deletions(-) create mode 100755 script/jepsen.garage/destroy-tap.sh create mode 100644 script/jepsen.garage/flake.lock create mode 100644 script/jepsen.garage/nodes create mode 100755 script/jepsen.garage/setup-tap.sh diff --git a/script/jepsen.garage/destroy-tap.sh b/script/jepsen.garage/destroy-tap.sh new file mode 100755 index 00000000..544b3053 --- /dev/null +++ b/script/jepsen.garage/destroy-tap.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +USER=$(whoami) + +for NODE in 1 2 3 4 5; do + sudo ip link delete microvm-n$NODE +done + + diff --git a/script/jepsen.garage/flake.lock b/script/jepsen.garage/flake.lock new file mode 100644 index 00000000..63dd88f8 --- /dev/null +++ b/script/jepsen.garage/flake.lock @@ -0,0 +1,64 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1678901627, + "narHash": "sha256-U02riOqrKKzwjsxc/400XnElV+UtPUQWpANPlyazjH0=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "93a2b84fc4b70d9e089d029deacc3583435c2ed6", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "microvm": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1681747916, + "narHash": "sha256-tpWJMHWbTrFD2Nmj3Y3qYXoaTP4LFT0P0wt5zW8/aI8=", + "owner": "astro", + "repo": "microvm.nix", + "rev": "68f1b9ece0f116d5ea1d1ecaf17f7b526303df81", + "type": "github" + }, + "original": { + "owner": "astro", + "repo": "microvm.nix", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1681737997, + "narHash": "sha256-pHhjgsIkRMu80LmVe8QoKIZB6VZGRRxFmIvsC5S89k4=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "f00994e78cd39e6fc966f0c4103f908e63284780", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "microvm": "microvm", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/script/jepsen.garage/flake.nix b/script/jepsen.garage/flake.nix index 76ee8ebf..2afa9ea1 100644 --- a/script/jepsen.garage/flake.nix +++ b/script/jepsen.garage/flake.nix @@ -4,19 +4,56 @@ inputs.microvm.url = "github:astro/microvm.nix"; inputs.microvm.inputs.nixpkgs.follows = "nixpkgs"; - outputs = { self, nixpkgs, microvm }: { - # Example nixosConfigurations entry - nixosConfigurations.my-microvm = nixpkgs.lib.nixosSystem { + outputs = { self, nixpkgs, microvm }: + with nixpkgs.lib; + let + addressMap = + { + "n1" = { ip = "10.1.0.10"; mac = "02:00:00:00:99:01"; }; + "n2" = { ip = "10.2.0.10"; mac = "02:00:00:00:99:02"; }; + "n3" = { ip = "10.3.0.10"; mac = "02:00:00:00:99:03"; }; + "n4" = { ip = "10.4.0.10"; mac = "02:00:00:00:99:04"; }; + "n5" = { ip = "10.5.0.10"; mac = "02:00:00:00:99:05"; }; + }; + toHostsEntry = name: { ip, ... }: "${ip} ${name}"; + extraHosts = + builtins.concatStringsSep "\n" + (attrsets.mapAttrsToList toHostsEntry addressMap); + + nodeConfig = hostName: { ip, mac }: nixosSystem { system = "x86_64-linux"; modules = [ # Include the microvm module microvm.nixosModules.microvm # Add more modules here { - networking.hostName = "my-microvm"; + networking = { + inherit hostName extraHosts; + }; + microvm.hypervisor = "cloud-hypervisor"; + microvm.interfaces = [ { + inherit mac; + type = "tap"; + id = "microvm-${hostName}"; + } ]; + + services.openssh = { + enable = true; + permitRootLogin = "yes"; + }; + users.users.root.initialPassword = "root"; + + #services.garage = { + # enable = true; + # logLevel = "debug"; + # settings.replication_mode = "3"; + #}; } ]; }; + in + { + nixosConfigurations = mapAttrs nodeConfig addressMap; }; } diff --git a/script/jepsen.garage/garage-cluster.nix b/script/jepsen.garage/garage-cluster.nix index 32fedc04..ebc73aaf 100644 --- a/script/jepsen.garage/garage-cluster.nix +++ b/script/jepsen.garage/garage-cluster.nix @@ -29,30 +29,36 @@ let services.openssh = { enable = true; permitRootLogin = "yes"; + kexAlgorithms = [ "curve25519-sha256@libssh.org" "ecdh-sha2-nistp256" "ecdh-sha2-nistp384" "ecdh-sha2-nistp521" "diffie-hellman-group-exchange-sha256" "diffie-hellman-group14-sha1" "diffie-hellman-group-exchange-sha1" "diffie-hellman-group1-sha1" ]; }; users.users.root.initialPassword = "root"; + users.users.root.openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJpaBZdYxHqMxhv2RExAOa7nkKhPBOHupMP3mYaZ73w9" + ]; system.stateVersion = "22.11"; services.garage = { enable = true; logLevel = "debug"; - settings.replication_mode = "3"; + settings = { + replication_mode = "3"; + db_engine = "lmdb"; + rpc_secret = "b597bb28ebdc90cdc4f15712733ca678cfb9a7e0311e0b9e93db9610fc3685e6"; + rpc_bind_addr = "0.0.0.0:3901"; + s3_api = { + region = "garage"; + api_bind_addr = "0.0.0.0:3900"; + }; + k2v_api.api_bind_addr = "0.0.0.0:3902"; + admin = { + api_bind_addr = "0.0.0.0:3903"; + admin_token = "icanhazadmin"; + }; + }; }; - # Workaround for nixos-container issue - # (see https://github.com/NixOS/nixpkgs/issues/67265 and - # https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099). - # The etcd service is of type "notify", which means that - # etcd would not be considered started until etcd is fully online; - # however, since NixOS container networking only works sometime *after* - # multi-user.target, we forgo etcd's notification entirely. - systemd.services.etcd.serviceConfig.Type = lib.mkForce "exec"; - - systemd.services.etcd.serviceConfig.StandardOutput = "file:/var/log/etcd.log"; - systemd.services.etcd.serviceConfig.StandardError = "file:/var/log/etcd.log"; - - networking.firewall.allowedTCPPorts = [ 2379 2380 ]; + networking.firewall.allowedTCPPorts = [ 3901 3900 3902 3903 ]; }; }; in diff --git a/script/jepsen.garage/nodes b/script/jepsen.garage/nodes new file mode 100644 index 00000000..b8f3eeb1 --- /dev/null +++ b/script/jepsen.garage/nodes @@ -0,0 +1,5 @@ +n1.containers +n2.containers +n3.containers +n4.containers +n5.containers diff --git a/script/jepsen.garage/project.clj b/script/jepsen.garage/project.clj index ac3df57e..2ab03e49 100644 --- a/script/jepsen.garage/project.clj +++ b/script/jepsen.garage/project.clj @@ -5,5 +5,5 @@ :url "https://www.gnu.org/licenses/gpl-3.0.en.html"} :main jepsen.garage :dependencies [[org.clojure/clojure "1.11.1"] - [jepsen "0.2.1-SNAPSHOT"]] + [jepsen "0.3.2-SNAPSHOT"]] :repl-options {:init-ns jepsen.garage}) diff --git a/script/jepsen.garage/setup-tap.sh b/script/jepsen.garage/setup-tap.sh new file mode 100755 index 00000000..7e09abc3 --- /dev/null +++ b/script/jepsen.garage/setup-tap.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +USER=$(whoami) + +for NODE in 1 2 3 4 5; do + sudo ip tuntap add microvm-n$NODE mode tap user $USER + sudo ip addr add dev microvm-n$NODE 10.$NODE.0.1 +done + + -- cgit v1.2.3 From ca4cc7e44f6143ec24be626c683ed5b14ff40295 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 13:59:03 +0200 Subject: jepsen connects to vagrant vms --- script/jepsen.garage/.gitignore | 1 + script/jepsen.garage/Vagrantfile | 29 ++++++++++++++ script/jepsen.garage/flake.lock | 64 ------------------------------ script/jepsen.garage/flake.nix | 59 --------------------------- script/jepsen.garage/nodes | 5 --- script/jepsen.garage/nodes.containers | 5 +++ script/jepsen.garage/nodes.vagrant | 5 +++ script/jepsen.garage/shell.nix | 1 + script/jepsen.garage/src/jepsen/garage.clj | 3 +- 9 files changed, 43 insertions(+), 129 deletions(-) create mode 100644 script/jepsen.garage/Vagrantfile delete mode 100644 script/jepsen.garage/flake.lock delete mode 100644 script/jepsen.garage/flake.nix delete mode 100644 script/jepsen.garage/nodes create mode 100644 script/jepsen.garage/nodes.containers create mode 100644 script/jepsen.garage/nodes.vagrant diff --git a/script/jepsen.garage/.gitignore b/script/jepsen.garage/.gitignore index 44e41749..6eb8c209 100644 --- a/script/jepsen.garage/.gitignore +++ b/script/jepsen.garage/.gitignore @@ -13,3 +13,4 @@ pom.xml.asc .hg/ .direnv /store +.vagrant diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile new file mode 100644 index 00000000..da6a277f --- /dev/null +++ b/script/jepsen.garage/Vagrantfile @@ -0,0 +1,29 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : +# + +def vm(config, hostname, ip) + config.vm.hostname = hostname + config.vm.network "private_network", ip: ip +end + +Vagrant.configure("2") do |config| + config.vm.box = "generic/alpine38" + + config.vm.provider "virtualbox" do |vb| + vb.gui = false + vb.memory = "512" + end + + config.vm.provision "shell", inline: <<-SHELL + echo "root:root" | chpasswd + apk update + apk add wget + SHELL + + config.vm.define "n1" do |config| vm(config, "n1", "192.168.56.21") end + config.vm.define "n2" do |config| vm(config, "n2", "192.168.56.22") end + config.vm.define "n3" do |config| vm(config, "n3", "192.168.56.23") end + config.vm.define "n4" do |config| vm(config, "n4", "192.168.56.24") end + config.vm.define "n5" do |config| vm(config, "n5", "192.168.56.25") end +end diff --git a/script/jepsen.garage/flake.lock b/script/jepsen.garage/flake.lock deleted file mode 100644 index 63dd88f8..00000000 --- a/script/jepsen.garage/flake.lock +++ /dev/null @@ -1,64 +0,0 @@ -{ - "nodes": { - "flake-utils": { - "locked": { - "lastModified": 1678901627, - "narHash": "sha256-U02riOqrKKzwjsxc/400XnElV+UtPUQWpANPlyazjH0=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "93a2b84fc4b70d9e089d029deacc3583435c2ed6", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "microvm": { - "inputs": { - "flake-utils": "flake-utils", - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1681747916, - "narHash": "sha256-tpWJMHWbTrFD2Nmj3Y3qYXoaTP4LFT0P0wt5zW8/aI8=", - "owner": "astro", - "repo": "microvm.nix", - "rev": "68f1b9ece0f116d5ea1d1ecaf17f7b526303df81", - "type": "github" - }, - "original": { - "owner": "astro", - "repo": "microvm.nix", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1681737997, - "narHash": "sha256-pHhjgsIkRMu80LmVe8QoKIZB6VZGRRxFmIvsC5S89k4=", - "owner": "nixos", - "repo": "nixpkgs", - "rev": "f00994e78cd39e6fc966f0c4103f908e63284780", - "type": "github" - }, - "original": { - "owner": "nixos", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "microvm": "microvm", - "nixpkgs": "nixpkgs" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/script/jepsen.garage/flake.nix b/script/jepsen.garage/flake.nix deleted file mode 100644 index 2afa9ea1..00000000 --- a/script/jepsen.garage/flake.nix +++ /dev/null @@ -1,59 +0,0 @@ -# Example flake.nix -{ - inputs.nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; - inputs.microvm.url = "github:astro/microvm.nix"; - inputs.microvm.inputs.nixpkgs.follows = "nixpkgs"; - - outputs = { self, nixpkgs, microvm }: - with nixpkgs.lib; - let - addressMap = - { - "n1" = { ip = "10.1.0.10"; mac = "02:00:00:00:99:01"; }; - "n2" = { ip = "10.2.0.10"; mac = "02:00:00:00:99:02"; }; - "n3" = { ip = "10.3.0.10"; mac = "02:00:00:00:99:03"; }; - "n4" = { ip = "10.4.0.10"; mac = "02:00:00:00:99:04"; }; - "n5" = { ip = "10.5.0.10"; mac = "02:00:00:00:99:05"; }; - }; - toHostsEntry = name: { ip, ... }: "${ip} ${name}"; - extraHosts = - builtins.concatStringsSep "\n" - (attrsets.mapAttrsToList toHostsEntry addressMap); - - nodeConfig = hostName: { ip, mac }: nixosSystem { - system = "x86_64-linux"; - modules = [ - # Include the microvm module - microvm.nixosModules.microvm - # Add more modules here - { - networking = { - inherit hostName extraHosts; - }; - - microvm.hypervisor = "cloud-hypervisor"; - microvm.interfaces = [ { - inherit mac; - type = "tap"; - id = "microvm-${hostName}"; - } ]; - - services.openssh = { - enable = true; - permitRootLogin = "yes"; - }; - users.users.root.initialPassword = "root"; - - #services.garage = { - # enable = true; - # logLevel = "debug"; - # settings.replication_mode = "3"; - #}; - } - ]; - }; - in - { - nixosConfigurations = mapAttrs nodeConfig addressMap; - }; -} diff --git a/script/jepsen.garage/nodes b/script/jepsen.garage/nodes deleted file mode 100644 index b8f3eeb1..00000000 --- a/script/jepsen.garage/nodes +++ /dev/null @@ -1,5 +0,0 @@ -n1.containers -n2.containers -n3.containers -n4.containers -n5.containers diff --git a/script/jepsen.garage/nodes.containers b/script/jepsen.garage/nodes.containers new file mode 100644 index 00000000..b8f3eeb1 --- /dev/null +++ b/script/jepsen.garage/nodes.containers @@ -0,0 +1,5 @@ +n1.containers +n2.containers +n3.containers +n4.containers +n5.containers diff --git a/script/jepsen.garage/nodes.vagrant b/script/jepsen.garage/nodes.vagrant new file mode 100644 index 00000000..df7c4622 --- /dev/null +++ b/script/jepsen.garage/nodes.vagrant @@ -0,0 +1,5 @@ +192.168.56.21 +192.168.56.22 +192.168.56.23 +192.168.56.24 +192.168.56.25 diff --git a/script/jepsen.garage/shell.nix b/script/jepsen.garage/shell.nix index 4f0af839..b0dda6d1 100644 --- a/script/jepsen.garage/shell.nix +++ b/script/jepsen.garage/shell.nix @@ -2,5 +2,6 @@ pkgs.mkShell { nativeBuildInputs = [ pkgs.leiningen + pkgs.vagrant ]; } diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 1351c4a7..6617064e 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -14,5 +14,6 @@ "Handles command line arguments. Can either run a test, or a web server for browsing results." [& args] - (cli/run! (cli/single-test-cmd {:test-fn garage-test}) + (cli/run! (merge (cli/single-test-cmd {:test-fn garage-test}) + (cli/serve-cmd)) args)) -- cgit v1.2.3 From bc11701999ececdb4571119082945cbe86f098aa Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 16:10:07 +0200 Subject: jepsen: s3 gets and puts --- script/jepsen.garage/Vagrantfile | 6 +- script/jepsen.garage/destroy-tap.sh | 9 -- script/jepsen.garage/garage-cluster.nix | 70 --------------- script/jepsen.garage/nodes.containers | 5 -- script/jepsen.garage/project.clj | 3 +- script/jepsen.garage/setup-tap.sh | 10 --- script/jepsen.garage/src/jepsen/garage.clj | 140 ++++++++++++++++++++++++++++- 7 files changed, 141 insertions(+), 102 deletions(-) delete mode 100755 script/jepsen.garage/destroy-tap.sh delete mode 100644 script/jepsen.garage/garage-cluster.nix delete mode 100644 script/jepsen.garage/nodes.containers delete mode 100755 script/jepsen.garage/setup-tap.sh diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile index da6a277f..c40c600d 100644 --- a/script/jepsen.garage/Vagrantfile +++ b/script/jepsen.garage/Vagrantfile @@ -8,7 +8,7 @@ def vm(config, hostname, ip) end Vagrant.configure("2") do |config| - config.vm.box = "generic/alpine38" + config.vm.box = "generic/debian10" config.vm.provider "virtualbox" do |vb| vb.gui = false @@ -17,8 +17,8 @@ Vagrant.configure("2") do |config| config.vm.provision "shell", inline: <<-SHELL echo "root:root" | chpasswd - apk update - apk add wget + mkdir -p /root/.ssh + echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJpaBZdYxHqMxhv2RExAOa7nkKhPBOHupMP3mYaZ73w9 lx@lindy" >> /root/.ssh/authorized_keys SHELL config.vm.define "n1" do |config| vm(config, "n1", "192.168.56.21") end diff --git a/script/jepsen.garage/destroy-tap.sh b/script/jepsen.garage/destroy-tap.sh deleted file mode 100755 index 544b3053..00000000 --- a/script/jepsen.garage/destroy-tap.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -USER=$(whoami) - -for NODE in 1 2 3 4 5; do - sudo ip link delete microvm-n$NODE -done - - diff --git a/script/jepsen.garage/garage-cluster.nix b/script/jepsen.garage/garage-cluster.nix deleted file mode 100644 index ebc73aaf..00000000 --- a/script/jepsen.garage/garage-cluster.nix +++ /dev/null @@ -1,70 +0,0 @@ -{ config, lib, pkgs, ... }: -let - unstable = import ./unstable.nix; - addressMap = - { - "n1" = { localAddress = "10.233.0.101"; hostAddress = "10.233.1.101"; }; - "n2" = { localAddress = "10.233.0.102"; hostAddress = "10.233.1.102"; }; - "n3" = { localAddress = "10.233.0.103"; hostAddress = "10.233.1.103"; }; - "n4" = { localAddress = "10.233.0.104"; hostAddress = "10.233.1.104"; }; - "n5" = { localAddress = "10.233.0.105"; hostAddress = "10.233.1.105"; }; - }; - toHostsEntry = name: { localAddress, ... }: "${localAddress} ${name}"; - extraHosts = - builtins.concatStringsSep "\n" - (lib.attrsets.mapAttrsToList toHostsEntry addressMap); - nodeConfig = hostName: { localAddress, hostAddress }: { - inherit localAddress hostAddress; - - ephemeral = true; - autoStart = true; - privateNetwork = true; - - config = { config, pkgs, ... }: - { - networking = { - inherit hostName extraHosts; - }; - - services.openssh = { - enable = true; - permitRootLogin = "yes"; - kexAlgorithms = [ "curve25519-sha256@libssh.org" "ecdh-sha2-nistp256" "ecdh-sha2-nistp384" "ecdh-sha2-nistp521" "diffie-hellman-group-exchange-sha256" "diffie-hellman-group14-sha1" "diffie-hellman-group-exchange-sha1" "diffie-hellman-group1-sha1" ]; - }; - users.users.root.initialPassword = "root"; - users.users.root.openssh.authorizedKeys.keys = [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJpaBZdYxHqMxhv2RExAOa7nkKhPBOHupMP3mYaZ73w9" - ]; - - system.stateVersion = "22.11"; - - services.garage = { - enable = true; - logLevel = "debug"; - settings = { - replication_mode = "3"; - db_engine = "lmdb"; - rpc_secret = "b597bb28ebdc90cdc4f15712733ca678cfb9a7e0311e0b9e93db9610fc3685e6"; - rpc_bind_addr = "0.0.0.0:3901"; - s3_api = { - region = "garage"; - api_bind_addr = "0.0.0.0:3900"; - }; - k2v_api.api_bind_addr = "0.0.0.0:3902"; - admin = { - api_bind_addr = "0.0.0.0:3903"; - admin_token = "icanhazadmin"; - }; - }; - }; - - networking.firewall.allowedTCPPorts = [ 3901 3900 3902 3903 ]; - }; - }; -in -{ - containers = lib.attrsets.mapAttrs nodeConfig addressMap; - networking = { - inherit extraHosts; - }; -} diff --git a/script/jepsen.garage/nodes.containers b/script/jepsen.garage/nodes.containers deleted file mode 100644 index b8f3eeb1..00000000 --- a/script/jepsen.garage/nodes.containers +++ /dev/null @@ -1,5 +0,0 @@ -n1.containers -n2.containers -n3.containers -n4.containers -n5.containers diff --git a/script/jepsen.garage/project.clj b/script/jepsen.garage/project.clj index 2ab03e49..04ae4cc3 100644 --- a/script/jepsen.garage/project.clj +++ b/script/jepsen.garage/project.clj @@ -5,5 +5,6 @@ :url "https://www.gnu.org/licenses/gpl-3.0.en.html"} :main jepsen.garage :dependencies [[org.clojure/clojure "1.11.1"] - [jepsen "0.3.2-SNAPSHOT"]] + [jepsen "0.3.2-SNAPSHOT"] + [amazonica "0.3.163"]] :repl-options {:init-ns jepsen.garage}) diff --git a/script/jepsen.garage/setup-tap.sh b/script/jepsen.garage/setup-tap.sh deleted file mode 100755 index 7e09abc3..00000000 --- a/script/jepsen.garage/setup-tap.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -USER=$(whoami) - -for NODE in 1 2 3 4 5; do - sudo ip tuntap add microvm-n$NODE mode tap user $USER - sudo ip addr add dev microvm-n$NODE 10.$NODE.0.1 -done - - diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 6617064e..26c58097 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -1,14 +1,146 @@ (ns jepsen.garage - (:require [jepsen.cli :as cli] - [jepsen.tests :as tests])) + (:require [clojure.tools.logging :refer :all] + [clojure.string :as str] + [jepsen [cli :as cli] + [client :as client] + [control :as c] + [db :as db] + [generator :as gen] + [tests :as tests]] + [jepsen.control.util :as cu] + [jepsen.os.debian :as debian] + [amazonica.aws.s3 :as s3] + [amazonica.aws.s3transfer :as s3transfer])) + +(def dir "/opt/garage") +(def binary (str dir "/garage")) +(def logfile (str dir "/garage.log")) +(def pidfile (str dir "/garage.pid")) + +(def grg-admin-token "icanhazadmin") +(def grg-key "jepsen") +(def grg-bucket "jepsen") +(def grg-object "1") + +(defn db + "Garage DB for a particular version" + [version] + (reify db/DB + (setup! [_ test node] + (info node "installing garage" version) + (c/su + (c/exec :mkdir :-p dir) + (let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage") + cache (cu/wget! url)] + (c/exec :cp cache binary)) + (c/exec :chmod :+x binary) + (cu/write-file! + (str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n" + "rpc_bind_addr = \"0.0.0.0:3901\"\n" + "rpc_public_addr = \"" node ":3901\"\n" + "db_engine = \"lmdb\"\n" + "replication_mode = \"3\"\n" + "data_dir = \"" dir "/data\"\n" + "metadata_dir = \"" dir "/meta\"\n" + "[s3_api]\n" + "s3_region = \"us-east-1\"\n" + "api_bind_addr = \"0.0.0.0:3900\"\n" + "[k2v_api]\n" + "api_bind_addr = \"0.0.0.0:3902\"\n" + "[admin]\n" + "api_bind_addr = \"0.0.0.0:3903\"\n" + "admin_token = \"" grg-admin-token "\"\n") + "/etc/garage.toml") + (cu/start-daemon! + {:logfile logfile + :pidfile pidfile + :chdir dir} + binary + :server) + (Thread/sleep 100) + (let [node-id (c/exec binary :node :id :-q)] + (info node "node id:" node-id) + (c/on-many (:nodes test) + (c/exec binary :node :connect node-id)) + (c/exec binary :layout :assign (subs node-id 0 16) :-c 1 :-z :dc1 :-t node)) + (if (= node (first (:nodes test))) + (do + (Thread/sleep 2000) + (c/exec binary :layout :apply :--version 1) + (info node "garage status:" (c/exec binary :status)) + (c/exec binary :key :new :--name grg-key) + (c/exec binary :bucket :create grg-bucket) + (c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key) + (info node "key info: " (c/exec binary :key :info grg-key)))))) + (teardown! [_ test node] + (info node "tearing down garage" version) + (c/su + (cu/stop-daemon! binary pidfile) + (c/exec :rm :-rf dir))) + db/LogFiles + (log-files [_ test node] + [logfile]))) + +(defn op-get [_ _] {:type :invoke, :f :get-object, :value nil}) +(defn op-put [_ _] {:type :invoke, :f :put-object, :value (str (rand-int 50))}) +(defn op-del [_ _] {:type :invoke, :f :del-object, :value nil}) + +(defrecord Client [creds] + client/Client + (open! [this test node] + (let [key-info (c/on node (c/exec binary :key :info grg-key)) + [_ ak sk] (re-matches + #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" + key-info) + creds {:access-key ak + :secret-key sk + :endpoint (str "http://" node ":3900") + :client-config {:path-style-access-enabled true}}] + (info node "s3 credentials:" creds) + (assoc this :creds creds))) + (setup! [this test]) + (invoke! [this test op] + (case (:f op) + :get-object + (let [value-bytes (try + (-> (s3/get-object (:creds this) grg-bucket grg-object) + :input-stream + slurp) + (catch Exception e nil))] + (assoc op :type :ok, :value value-bytes)) + :put-object + (let [some-bytes (.getBytes (:value op) "UTF-8") + bytes-stream (java.io.ByteArrayInputStream. some-bytes)] + (s3/put-object (:creds this) + :bucket-name grg-bucket + :key grg-object + :input-stream bytes-stream + :metadata {:content-length (count some-bytes)}) + (assoc op :type :ok)) + :del-object + (do + (s3/delete-object (:creds this) + :bucket-name grg-bucket + :key grg-object) + (assoc op :type :ok, :value nil)))) + (teardown! [this test]) + (close! [this test])) (defn garage-test "Given an options map from the command line runner (e.g. :nodes, :ssh, :concurrency, ...), constructs a test map." [opts] (merge tests/noop-test - {:pure-generators true} - opts)) + opts + {:pure-generators true + :name "garage" + :os debian/os + :db (db "v0.8.2") + :client (Client. nil) + :generator (->> (gen/mix [op-get op-put]) + (gen/stagger 1) + (gen/nemesis nil) + (gen/time-limit 15))})) (defn -main "Handles command line arguments. Can either run a test, or a web server for -- cgit v1.2.3 From 70c1d3db46b3bbdfad55b61c29e346c629380a32 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 16:19:35 +0200 Subject: better match exceptions --- script/jepsen.garage/src/jepsen/garage.clj | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 26c58097..9d6f891e 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -9,6 +9,7 @@ [tests :as tests]] [jepsen.control.util :as cu] [jepsen.os.debian :as debian] + [slingshot.slingshot :refer [try+]] [amazonica.aws.s3 :as s3] [amazonica.aws.s3transfer :as s3transfer])) @@ -101,13 +102,14 @@ (setup! [this test]) (invoke! [this test op] (case (:f op) - :get-object - (let [value-bytes (try - (-> (s3/get-object (:creds this) grg-bucket grg-object) - :input-stream - slurp) - (catch Exception e nil))] - (assoc op :type :ok, :value value-bytes)) + :get-object (try+ + (let [value + (-> (s3/get-object (:creds this) grg-bucket grg-object) + :input-stream + slurp)] + (assoc op :type :ok, :value value)) + (catch (re-find #"Key not found" (.getMessage %)) ex + (assoc op :type :ok, :value nil))) :put-object (let [some-bytes (.getBytes (:value op) "UTF-8") bytes-stream (java.io.ByteArrayInputStream. some-bytes)] @@ -137,10 +139,10 @@ :os debian/os :db (db "v0.8.2") :client (Client. nil) - :generator (->> (gen/mix [op-get op-put]) + :generator (->> (gen/mix [op-get op-put op-del]) (gen/stagger 1) (gen/nemesis nil) - (gen/time-limit 15))})) + (gen/time-limit 20))})) (defn -main "Handles command line arguments. Can either run a test, or a web server for -- cgit v1.2.3 From dc5245ce65e6acc4c2b1f81dfdf38fc76fe06d3f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 17:47:53 +0200 Subject: even without nemesis, s3 get/put/delete is not linearizable (is this normal?) --- script/jepsen.garage/README.md | 34 +++++++++++------ script/jepsen.garage/shell.nix | 7 ++-- script/jepsen.garage/src/jepsen/garage.clj | 60 +++++++++++++++++++----------- 3 files changed, 64 insertions(+), 37 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index e1d1a555..ed956830 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -1,22 +1,32 @@ # jepsen.garage -A Clojure library designed to ... well, that part is up to you. +Jepsen checking of Garage consistency properties. ## Usage -FIXME +Requirements: + +- vagrant +- VirtualBox, configured so that nodes can take an IP in a private network `192.168.56.0/24` +- a user that can create VirtualBox VMs +- leiningen +- gnuplot + +Set up VMs: + +``` +vagrant up +``` + +Run tests: + +``` +lein run test --nodes-file nodes.vagrant +``` ## License -Copyright © 2023 FIXME +Copyright © 2023 Alex Auvolat This program and the accompanying materials are made available under the -terms of the Eclipse Public License 2.0 which is available at -http://www.eclipse.org/legal/epl-2.0. - -This Source Code may also be made available under the following Secondary -Licenses when the conditions for such availability set forth in the Eclipse -Public License, v. 2.0 are satisfied: GNU General Public License as published by -the Free Software Foundation, either version 2 of the License, or (at your -option) any later version, with the GNU Classpath Exception which is available -at https://www.gnu.org/software/classpath/license.html. +terms of the GNU General Public License v3.0. diff --git a/script/jepsen.garage/shell.nix b/script/jepsen.garage/shell.nix index b0dda6d1..595a78d9 100644 --- a/script/jepsen.garage/shell.nix +++ b/script/jepsen.garage/shell.nix @@ -1,7 +1,8 @@ { pkgs ? import {} }: pkgs.mkShell { - nativeBuildInputs = [ - pkgs.leiningen - pkgs.vagrant + nativeBuildInputs = with pkgs; [ + leiningen + vagrant + gnuplot ]; } diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 9d6f891e..df3c8f7d 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -1,14 +1,18 @@ (ns jepsen.garage (:require [clojure.tools.logging :refer :all] [clojure.string :as str] - [jepsen [cli :as cli] + [jepsen [checker :as checker] + [cli :as cli] [client :as client] [control :as c] [db :as db] [generator :as gen] + [nemesis :as nemesis] [tests :as tests]] + [jepsen.checker.timeline :as timeline] [jepsen.control.util :as cu] [jepsen.os.debian :as debian] + [knossos.model :as model] [slingshot.slingshot :refer [try+]] [amazonica.aws.s3 :as s3] [amazonica.aws.s3transfer :as s3transfer])) @@ -82,9 +86,9 @@ (log-files [_ test node] [logfile]))) -(defn op-get [_ _] {:type :invoke, :f :get-object, :value nil}) -(defn op-put [_ _] {:type :invoke, :f :put-object, :value (str (rand-int 50))}) -(defn op-del [_ _] {:type :invoke, :f :del-object, :value nil}) +(defn op-get [_ _] {:type :invoke, :f :read, :value nil}) +(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 9))}) +(defn op-del [_ _] {:type :invoke, :f :write, :value nil}) (defrecord Client [creds] client/Client @@ -102,7 +106,7 @@ (setup! [this test]) (invoke! [this test op] (case (:f op) - :get-object (try+ + :read (try+ (let [value (-> (s3/get-object (:creds this) grg-bucket grg-object) :input-stream @@ -110,21 +114,21 @@ (assoc op :type :ok, :value value)) (catch (re-find #"Key not found" (.getMessage %)) ex (assoc op :type :ok, :value nil))) - :put-object - (let [some-bytes (.getBytes (:value op) "UTF-8") - bytes-stream (java.io.ByteArrayInputStream. some-bytes)] - (s3/put-object (:creds this) - :bucket-name grg-bucket - :key grg-object - :input-stream bytes-stream - :metadata {:content-length (count some-bytes)}) - (assoc op :type :ok)) - :del-object - (do - (s3/delete-object (:creds this) - :bucket-name grg-bucket - :key grg-object) - (assoc op :type :ok, :value nil)))) + :write + (if (= (:value op) nil) + (do + (s3/delete-object (:creds this) + :bucket-name grg-bucket + :key grg-object) + (assoc op :type :ok, :value nil)) + (let [some-bytes (.getBytes (:value op) "UTF-8") + bytes-stream (java.io.ByteArrayInputStream. some-bytes)] + (s3/put-object (:creds this) + :bucket-name grg-bucket + :key grg-object + :input-stream bytes-stream + :metadata {:content-length (count some-bytes)}) + (assoc op :type :ok))))) (teardown! [this test]) (close! [this test])) @@ -139,10 +143,22 @@ :os debian/os :db (db "v0.8.2") :client (Client. nil) + :nemesis (nemesis/partition-random-halves) + :checker (checker/compose + {:perf (checker/perf) + :timeline (timeline/html) + :linear (checker/linearizable + {:model (model/register) + :algorithm :linear})}) :generator (->> (gen/mix [op-get op-put op-del]) - (gen/stagger 1) + (gen/stagger 0.02) (gen/nemesis nil) - (gen/time-limit 20))})) + ; (gen/nemesis + ; (cycle [(gen/sleep 5) + ; {:type :info, :f :start} + ; (gen/sleep 5) + ; {:type :info, :f :stop}])) + (gen/time-limit (+ (:time-limit opts) 5)))})) (defn -main "Handles command line arguments. Can either run a test, or a web server for -- cgit v1.2.3 From 93a7132b4c5fb63966702f15ec995d805a40c8f9 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 18:21:35 +0200 Subject: the fix for increasing timestamps does not make things linearizable --- script/jepsen.garage/src/jepsen/garage.clj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index df3c8f7d..492dad4c 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -27,7 +27,7 @@ (def grg-bucket "jepsen") (def grg-object "1") -(defn db +(defn garage "Garage DB for a particular version" [version] (reify db/DB @@ -141,7 +141,8 @@ {:pure-generators true :name "garage" :os debian/os - :db (db "v0.8.2") + :db (garage "v0.8.2") + ; :db (garage "d39c5c6984c581e16932aaa07e3687e7b5ce266d") ; fixed for increasing timestamps :client (Client. nil) :nemesis (nemesis/partition-random-halves) :checker (checker/compose -- cgit v1.2.3 From 80d7b7d8582171d7ecd0e7745893792d10dd3038 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Apr 2023 12:56:40 +0200 Subject: remove useless files --- script/jepsen.garage/CHANGELOG.md | 24 ---- script/jepsen.garage/LICENSE | 280 -------------------------------------- script/jepsen.garage/README.md | 2 +- script/jepsen.garage/doc/intro.md | 3 - script/jepsen.garage/project.clj | 4 +- 5 files changed, 3 insertions(+), 310 deletions(-) delete mode 100644 script/jepsen.garage/CHANGELOG.md delete mode 100644 script/jepsen.garage/LICENSE delete mode 100644 script/jepsen.garage/doc/intro.md diff --git a/script/jepsen.garage/CHANGELOG.md b/script/jepsen.garage/CHANGELOG.md deleted file mode 100644 index c1abdb4d..00000000 --- a/script/jepsen.garage/CHANGELOG.md +++ /dev/null @@ -1,24 +0,0 @@ -# Change Log -All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). - -## [Unreleased] -### Changed -- Add a new arity to `make-widget-async` to provide a different widget shape. - -## [0.1.1] - 2023-04-18 -### Changed -- Documentation on how to make the widgets. - -### Removed -- `make-widget-sync` - we're all async, all the time. - -### Fixed -- Fixed widget maker to keep working when daylight savings switches over. - -## 0.1.0 - 2023-04-18 -### Added -- Files from the new template. -- Widget maker public API - `make-widget-sync`. - -[Unreleased]: https://sourcehost.site/your-name/jepsen.garage/compare/0.1.1...HEAD -[0.1.1]: https://sourcehost.site/your-name/jepsen.garage/compare/0.1.0...0.1.1 diff --git a/script/jepsen.garage/LICENSE b/script/jepsen.garage/LICENSE deleted file mode 100644 index 23151265..00000000 --- a/script/jepsen.garage/LICENSE +++ /dev/null @@ -1,280 +0,0 @@ -Eclipse Public License - v 2.0 - - THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE - PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION - OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. - -1. DEFINITIONS - -"Contribution" means: - - a) in the case of the initial Contributor, the initial content - Distributed under this Agreement, and - - b) in the case of each subsequent Contributor: - i) changes to the Program, and - ii) additions to the Program; - where such changes and/or additions to the Program originate from - and are Distributed by that particular Contributor. A Contribution - "originates" from a Contributor if it was added to the Program by - such Contributor itself or anyone acting on such Contributor's behalf. - Contributions do not include changes or additions to the Program that - are not Modified Works. - -"Contributor" means any person or entity that Distributes the Program. - -"Licensed Patents" mean patent claims licensable by a Contributor which -are necessarily infringed by the use or sale of its Contribution alone -or when combined with the Program. - -"Program" means the Contributions Distributed in accordance with this -Agreement. - -"Recipient" means anyone who receives the Program under this Agreement -or any Secondary License (as applicable), including Contributors. - -"Derivative Works" shall mean any work, whether in Source Code or other -form, that is based on (or derived from) the Program and for which the -editorial revisions, annotations, elaborations, or other modifications -represent, as a whole, an original work of authorship. - -"Modified Works" shall mean any work in Source Code or other form that -results from an addition to, deletion from, or modification of the -contents of the Program, including, for purposes of clarity any new file -in Source Code form that contains any contents of the Program. Modified -Works shall not include works that contain only declarations, -interfaces, types, classes, structures, or files of the Program solely -in each case in order to link to, bind by name, or subclass the Program -or Modified Works thereof. - -"Distribute" means the acts of a) distributing or b) making available -in any manner that enables the transfer of a copy. - -"Source Code" means the form of a Program preferred for making -modifications, including but not limited to software source code, -documentation source, and configuration files. - -"Secondary License" means either the GNU General Public License, -Version 2.0, or any later versions of that license, including any -exceptions or additional permissions as identified by the initial -Contributor. - -2. GRANT OF RIGHTS - - a) Subject to the terms of this Agreement, each Contributor hereby - grants Recipient a non-exclusive, worldwide, royalty-free copyright - license to reproduce, prepare Derivative Works of, publicly display, - publicly perform, Distribute and sublicense the Contribution of such - Contributor, if any, and such Derivative Works. - - b) Subject to the terms of this Agreement, each Contributor hereby - grants Recipient a non-exclusive, worldwide, royalty-free patent - license under Licensed Patents to make, use, sell, offer to sell, - import and otherwise transfer the Contribution of such Contributor, - if any, in Source Code or other form. This patent license shall - apply to the combination of the Contribution and the Program if, at - the time the Contribution is added by the Contributor, such addition - of the Contribution causes such combination to be covered by the - Licensed Patents. The patent license shall not apply to any other - combinations which include the Contribution. No hardware per se is - licensed hereunder. - - c) Recipient understands that although each Contributor grants the - licenses to its Contributions set forth herein, no assurances are - provided by any Contributor that the Program does not infringe the - patent or other intellectual property rights of any other entity. - Each Contributor disclaims any liability to Recipient for claims - brought by any other entity based on infringement of intellectual - property rights or otherwise. As a condition to exercising the - rights and licenses granted hereunder, each Recipient hereby - assumes sole responsibility to secure any other intellectual - property rights needed, if any. For example, if a third party - patent license is required to allow Recipient to Distribute the - Program, it is Recipient's responsibility to acquire that license - before distributing the Program. - - d) Each Contributor represents that to its knowledge it has - sufficient copyright rights in its Contribution, if any, to grant - the copyright license set forth in this Agreement. - - e) Notwithstanding the terms of any Secondary License, no - Contributor makes additional grants to any Recipient (other than - those set forth in this Agreement) as a result of such Recipient's - receipt of the Program under the terms of a Secondary License - (if permitted under the terms of Section 3). - -3. REQUIREMENTS - -3.1 If a Contributor Distributes the Program in any form, then: - - a) the Program must also be made available as Source Code, in - accordance with section 3.2, and the Contributor must accompany - the Program with a statement that the Source Code for the Program - is available under this Agreement, and informs Recipients how to - obtain it in a reasonable manner on or through a medium customarily - used for software exchange; and - - b) the Contributor may Distribute the Program under a license - different than this Agreement, provided that such license: - i) effectively disclaims on behalf of all other Contributors all - warranties and conditions, express and implied, including - warranties or conditions of title and non-infringement, and - implied warranties or conditions of merchantability and fitness - for a particular purpose; - - ii) effectively excludes on behalf of all other Contributors all - liability for damages, including direct, indirect, special, - incidental and consequential damages, such as lost profits; - - iii) does not attempt to limit or alter the recipients' rights - in the Source Code under section 3.2; and - - iv) requires any subsequent distribution of the Program by any - party to be under a license that satisfies the requirements - of this section 3. - -3.2 When the Program is Distributed as Source Code: - - a) it must be made available under this Agreement, or if the - Program (i) is combined with other material in a separate file or - files made available under a Secondary License, and (ii) the initial - Contributor attached to the Source Code the notice described in - Exhibit A of this Agreement, then the Program may be made available - under the terms of such Secondary Licenses, and - - b) a copy of this Agreement must be included with each copy of - the Program. - -3.3 Contributors may not remove or alter any copyright, patent, -trademark, attribution notices, disclaimers of warranty, or limitations -of liability ("notices") contained within the Program from any copy of -the Program which they Distribute, provided that Contributors may add -their own appropriate notices. - -4. COMMERCIAL DISTRIBUTION - -Commercial distributors of software may accept certain responsibilities -with respect to end users, business partners and the like. While this -license is intended to facilitate the commercial use of the Program, -the Contributor who includes the Program in a commercial product -offering should do so in a manner which does not create potential -liability for other Contributors. Therefore, if a Contributor includes -the Program in a commercial product offering, such Contributor -("Commercial Contributor") hereby agrees to defend and indemnify every -other Contributor ("Indemnified Contributor") against any losses, -damages and costs (collectively "Losses") arising from claims, lawsuits -and other legal actions brought by a third party against the Indemnified -Contributor to the extent caused by the acts or omissions of such -Commercial Contributor in connection with its distribution of the Program -in a commercial product offering. The obligations in this section do not -apply to any claims or Losses relating to any actual or alleged -intellectual property infringement. In order to qualify, an Indemnified -Contributor must: a) promptly notify the Commercial Contributor in -writing of such claim, and b) allow the Commercial Contributor to control, -and cooperate with the Commercial Contributor in, the defense and any -related settlement negotiations. The Indemnified Contributor may -participate in any such claim at its own expense. - -For example, a Contributor might include the Program in a commercial -product offering, Product X. That Contributor is then a Commercial -Contributor. If that Commercial Contributor then makes performance -claims, or offers warranties related to Product X, those performance -claims and warranties are such Commercial Contributor's responsibility -alone. Under this section, the Commercial Contributor would have to -defend claims against the other Contributors related to those performance -claims and warranties, and if a court requires any other Contributor to -pay any damages as a result, the Commercial Contributor must pay -those damages. - -5. NO WARRANTY - -EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT -PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" -BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR -IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF -TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR -PURPOSE. Each Recipient is solely responsible for determining the -appropriateness of using and distributing the Program and assumes all -risks associated with its exercise of rights under this Agreement, -including but not limited to the risks and costs of program errors, -compliance with applicable laws, damage to or loss of data, programs -or equipment, and unavailability or interruption of operations. - -6. DISCLAIMER OF LIABILITY - -EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT -PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS -SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST -PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE -EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - -7. GENERAL - -If any provision of this Agreement is invalid or unenforceable under -applicable law, it shall not affect the validity or enforceability of -the remainder of the terms of this Agreement, and without further -action by the parties hereto, such provision shall be reformed to the -minimum extent necessary to make such provision valid and enforceable. - -If Recipient institutes patent litigation against any entity -(including a cross-claim or counterclaim in a lawsuit) alleging that the -Program itself (excluding combinations of the Program with other software -or hardware) infringes such Recipient's patent(s), then such Recipient's -rights granted under Section 2(b) shall terminate as of the date such -litigation is filed. - -All Recipient's rights under this Agreement shall terminate if it -fails to comply with any of the material terms or conditions of this -Agreement and does not cure such failure in a reasonable period of -time after becoming aware of such noncompliance. If all Recipient's -rights under this Agreement terminate, Recipient agrees to cease use -and distribution of the Program as soon as reasonably practicable. -However, Recipient's obligations under this Agreement and any licenses -granted by Recipient relating to the Program shall continue and survive. - -Everyone is permitted to copy and distribute copies of this Agreement, -but in order to avoid inconsistency the Agreement is copyrighted and -may only be modified in the following manner. The Agreement Steward -reserves the right to publish new versions (including revisions) of -this Agreement from time to time. No one other than the Agreement -Steward has the right to modify this Agreement. The Eclipse Foundation -is the initial Agreement Steward. The Eclipse Foundation may assign the -responsibility to serve as the Agreement Steward to a suitable separate -entity. Each new version of the Agreement will be given a distinguishing -version number. The Program (including Contributions) may always be -Distributed subject to the version of the Agreement under which it was -received. In addition, after a new version of the Agreement is published, -Contributor may elect to Distribute the Program (including its -Contributions) under the new version. - -Except as expressly stated in Sections 2(a) and 2(b) above, Recipient -receives no rights or licenses to the intellectual property of any -Contributor under this Agreement, whether expressly, by implication, -estoppel or otherwise. All rights in the Program not expressly granted -under this Agreement are reserved. Nothing in this Agreement is intended -to be enforceable by any entity that is not a Contributor or Recipient. -No third-party beneficiary rights are created under this Agreement. - -Exhibit A - Form of Secondary Licenses Notice - -"This Source Code may also be made available under the following -Secondary Licenses when the conditions for such availability set forth -in the Eclipse Public License, v. 2.0 are satisfied: GNU General Public -License as published by the Free Software Foundation, either version 2 -of the License, or (at your option) any later version, with the GNU -Classpath Exception which is available at -https://www.gnu.org/software/classpath/license.html." - - Simply including a copy of this Agreement, including this Exhibit A - is not sufficient to license the Source Code under Secondary Licenses. - - If it is not possible or desirable to put the notice in a particular - file, then You may include the notice in a location (such as a LICENSE - file in a relevant directory) where a recipient would be likely to - look for such a notice. - - You may add additional accurate notices of copyright ownership. diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index ed956830..460f0b9e 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -29,4 +29,4 @@ lein run test --nodes-file nodes.vagrant Copyright © 2023 Alex Auvolat This program and the accompanying materials are made available under the -terms of the GNU General Public License v3.0. +terms of the GNU Affero General Public License v3.0. diff --git a/script/jepsen.garage/doc/intro.md b/script/jepsen.garage/doc/intro.md deleted file mode 100644 index ee727bbc..00000000 --- a/script/jepsen.garage/doc/intro.md +++ /dev/null @@ -1,3 +0,0 @@ -# Introduction to jepsen.garage - -TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) diff --git a/script/jepsen.garage/project.clj b/script/jepsen.garage/project.clj index 04ae4cc3..20bb59c7 100644 --- a/script/jepsen.garage/project.clj +++ b/script/jepsen.garage/project.clj @@ -1,8 +1,8 @@ (defproject jepsen.garage "0.1.0-SNAPSHOT" :description "Jepsen testing for Garage" :url "https://git.deuxfleurs.fr/Deuxfleurs/garage" - :license {:name "GPLv3" - :url "https://www.gnu.org/licenses/gpl-3.0.en.html"} + :license {:name "AGPLv3" + :url "https://www.gnu.org/licenses/agpl-3.0.en.html"} :main jepsen.garage :dependencies [[org.clojure/clojure "1.11.1"] [jepsen "0.3.2-SNAPSHOT"] -- cgit v1.2.3 From eb86eaa6d2702ad5171c4fdfcbceb3dfba21833c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Apr 2023 14:14:22 +0200 Subject: refactor jepsen test --- script/jepsen.garage/src/jepsen/garage.clj | 205 ++++++------------------- script/jepsen.garage/src/jepsen/garage/grg.clj | 125 +++++++++++++++ script/jepsen.garage/src/jepsen/garage/reg.clj | 69 +++++++++ 3 files changed, 244 insertions(+), 155 deletions(-) create mode 100644 script/jepsen.garage/src/jepsen/garage/grg.clj create mode 100644 script/jepsen.garage/src/jepsen/garage/reg.clj diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 492dad4c..cd30dab8 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -1,170 +1,65 @@ (ns jepsen.garage - (:require [clojure.tools.logging :refer :all] - [clojure.string :as str] - [jepsen [checker :as checker] - [cli :as cli] - [client :as client] - [control :as c] - [db :as db] - [generator :as gen] - [nemesis :as nemesis] - [tests :as tests]] - [jepsen.checker.timeline :as timeline] - [jepsen.control.util :as cu] - [jepsen.os.debian :as debian] - [knossos.model :as model] - [slingshot.slingshot :refer [try+]] - [amazonica.aws.s3 :as s3] - [amazonica.aws.s3transfer :as s3transfer])) + (:require + [clojure.string :as str] + [jepsen + [checker :as checker] + [cli :as cli] + [generator :as gen] + [nemesis :as nemesis] + [tests :as tests]] + [jepsen.os.debian :as debian] + [jepsen.garage + [grg :as grg] + [reg :as reg]])) -(def dir "/opt/garage") -(def binary (str dir "/garage")) -(def logfile (str dir "/garage.log")) -(def pidfile (str dir "/garage.pid")) +(def workloads + "A map of workload names to functions that construct workloads, given opts." + {"reg" reg/workload}) -(def grg-admin-token "icanhazadmin") -(def grg-key "jepsen") -(def grg-bucket "jepsen") -(def grg-object "1") - -(defn garage - "Garage DB for a particular version" - [version] - (reify db/DB - (setup! [_ test node] - (info node "installing garage" version) - (c/su - (c/exec :mkdir :-p dir) - (let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage") - cache (cu/wget! url)] - (c/exec :cp cache binary)) - (c/exec :chmod :+x binary) - (cu/write-file! - (str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n" - "rpc_bind_addr = \"0.0.0.0:3901\"\n" - "rpc_public_addr = \"" node ":3901\"\n" - "db_engine = \"lmdb\"\n" - "replication_mode = \"3\"\n" - "data_dir = \"" dir "/data\"\n" - "metadata_dir = \"" dir "/meta\"\n" - "[s3_api]\n" - "s3_region = \"us-east-1\"\n" - "api_bind_addr = \"0.0.0.0:3900\"\n" - "[k2v_api]\n" - "api_bind_addr = \"0.0.0.0:3902\"\n" - "[admin]\n" - "api_bind_addr = \"0.0.0.0:3903\"\n" - "admin_token = \"" grg-admin-token "\"\n") - "/etc/garage.toml") - (cu/start-daemon! - {:logfile logfile - :pidfile pidfile - :chdir dir} - binary - :server) - (Thread/sleep 100) - (let [node-id (c/exec binary :node :id :-q)] - (info node "node id:" node-id) - (c/on-many (:nodes test) - (c/exec binary :node :connect node-id)) - (c/exec binary :layout :assign (subs node-id 0 16) :-c 1 :-z :dc1 :-t node)) - (if (= node (first (:nodes test))) - (do - (Thread/sleep 2000) - (c/exec binary :layout :apply :--version 1) - (info node "garage status:" (c/exec binary :status)) - (c/exec binary :key :new :--name grg-key) - (c/exec binary :bucket :create grg-bucket) - (c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key) - (info node "key info: " (c/exec binary :key :info grg-key)))))) - (teardown! [_ test node] - (info node "tearing down garage" version) - (c/su - (cu/stop-daemon! binary pidfile) - (c/exec :rm :-rf dir))) - db/LogFiles - (log-files [_ test node] - [logfile]))) - -(defn op-get [_ _] {:type :invoke, :f :read, :value nil}) -(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 9))}) -(defn op-del [_ _] {:type :invoke, :f :write, :value nil}) - -(defrecord Client [creds] - client/Client - (open! [this test node] - (let [key-info (c/on node (c/exec binary :key :info grg-key)) - [_ ak sk] (re-matches - #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" - key-info) - creds {:access-key ak - :secret-key sk - :endpoint (str "http://" node ":3900") - :client-config {:path-style-access-enabled true}}] - (info node "s3 credentials:" creds) - (assoc this :creds creds))) - (setup! [this test]) - (invoke! [this test op] - (case (:f op) - :read (try+ - (let [value - (-> (s3/get-object (:creds this) grg-bucket grg-object) - :input-stream - slurp)] - (assoc op :type :ok, :value value)) - (catch (re-find #"Key not found" (.getMessage %)) ex - (assoc op :type :ok, :value nil))) - :write - (if (= (:value op) nil) - (do - (s3/delete-object (:creds this) - :bucket-name grg-bucket - :key grg-object) - (assoc op :type :ok, :value nil)) - (let [some-bytes (.getBytes (:value op) "UTF-8") - bytes-stream (java.io.ByteArrayInputStream. some-bytes)] - (s3/put-object (:creds this) - :bucket-name grg-bucket - :key grg-object - :input-stream bytes-stream - :metadata {:content-length (count some-bytes)}) - (assoc op :type :ok))))) - (teardown! [this test]) - (close! [this test])) +(def cli-opts + "Additional command line options." + [["-I" "--increasing-timestamps" "Garage version with increasing timestamps on PutObject" + :default false] + ["-r" "--rate HZ" "Approximate number of requests per second, per thread." + :default 10 + :parse-fn read-string + :validate [#(and (number? %) (pos? %)) "Must be a positive number"]] + [nil "--ops-per-key NUM" "Maximum number of operations on any given key." + :default 100 + :parse-fn parse-long + :validate [pos? "Must be a positive integer."]] + ["-w" "--workload NAME" "Workload of test to run" + :default "reg" + :validate [workloads (cli/one-of workloads)]]]) (defn garage-test "Given an options map from the command line runner (e.g. :nodes, :ssh, :concurrency, ...), constructs a test map." [opts] - (merge tests/noop-test - opts - {:pure-generators true - :name "garage" - :os debian/os - :db (garage "v0.8.2") - ; :db (garage "d39c5c6984c581e16932aaa07e3687e7b5ce266d") ; fixed for increasing timestamps - :client (Client. nil) - :nemesis (nemesis/partition-random-halves) - :checker (checker/compose - {:perf (checker/perf) - :timeline (timeline/html) - :linear (checker/linearizable - {:model (model/register) - :algorithm :linear})}) - :generator (->> (gen/mix [op-get op-put op-del]) - (gen/stagger 0.02) - (gen/nemesis nil) - ; (gen/nemesis - ; (cycle [(gen/sleep 5) - ; {:type :info, :f :start} - ; (gen/sleep 5) - ; {:type :info, :f :stop}])) - (gen/time-limit (+ (:time-limit opts) 5)))})) + (let [workload ((get workloads (:workload opts)) opts) + garage-version (if (:increasing-timestamps opts) + "03490d41d58576d7b3bcf977b2726d72a3a66ada" + "v0.8.2")] + (merge tests/noop-test + opts + {:pure-generators true + :name (str "garage " (name (:workload opts))) + :os debian/os + :db (grg/db garage-version) + :client (:client workload) + :generator (:generator workload) + :nemesis (nemesis/partition-random-halves) + :checker (checker/compose + {:perf (checker/perf) + :workload (:checker workload)}) + }))) + (defn -main "Handles command line arguments. Can either run a test, or a web server for browsing results." [& args] - (cli/run! (merge (cli/single-test-cmd {:test-fn garage-test}) + (cli/run! (merge (cli/single-test-cmd {:test-fn garage-test + :opt-spec cli-opts}) (cli/serve-cmd)) args)) diff --git a/script/jepsen.garage/src/jepsen/garage/grg.clj b/script/jepsen.garage/src/jepsen/garage/grg.clj new file mode 100644 index 00000000..2309d824 --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/grg.clj @@ -0,0 +1,125 @@ +(ns jepsen.garage.grg + (:require [clojure.tools.logging :refer :all] + [jepsen [control :as c] + [db :as db]] + [jepsen.control.util :as cu] + [amazonica.aws.s3 :as s3] + [slingshot.slingshot :refer [try+]])) + +; CONSTANTS -- HOW GARAGE IS SET UP + +(def dir "/opt/garage") +(def binary (str dir "/garage")) +(def logfile (str dir "/garage.log")) +(def pidfile (str dir "/garage.pid")) + +(def grg-admin-token "icanhazadmin") +(def grg-key "jepsen") +(def grg-bucket "jepsen") + +; THE GARAGE DB + +(defn db + "Garage DB for a particular version" + [version] + (reify db/DB + (setup! [_ test node] + (info node "installing garage" version) + (c/su + (c/exec :mkdir :-p dir) + (let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage") + cache (cu/wget! url)] + (c/exec :cp cache binary)) + (c/exec :chmod :+x binary) + (cu/write-file! + (str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n" + "rpc_bind_addr = \"0.0.0.0:3901\"\n" + "rpc_public_addr = \"" node ":3901\"\n" + "db_engine = \"lmdb\"\n" + "replication_mode = \"3\"\n" + "data_dir = \"" dir "/data\"\n" + "metadata_dir = \"" dir "/meta\"\n" + "[s3_api]\n" + "s3_region = \"us-east-1\"\n" + "api_bind_addr = \"0.0.0.0:3900\"\n" + "[k2v_api]\n" + "api_bind_addr = \"0.0.0.0:3902\"\n" + "[admin]\n" + "api_bind_addr = \"0.0.0.0:3903\"\n" + "admin_token = \"" grg-admin-token "\"\n") + "/etc/garage.toml") + (cu/start-daemon! + {:logfile logfile + :pidfile pidfile + :chdir dir} + binary + :server) + (Thread/sleep 100) + (let [node-id (c/exec binary :node :id :-q)] + (info node "node id:" node-id) + (c/on-many (:nodes test) + (c/exec binary :node :connect node-id)) + (c/exec binary :layout :assign (subs node-id 0 16) :-c 1 :-z :dc1 :-t node)) + (if (= node (first (:nodes test))) + (do + (Thread/sleep 2000) + (c/exec binary :layout :apply :--version 1) + (info node "garage status:" (c/exec binary :status)) + (c/exec binary :key :new :--name grg-key) + (c/exec binary :bucket :create grg-bucket) + (c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key) + (info node "key info: " (c/exec binary :key :info grg-key)))))) + (teardown! [_ test node] + (info node "tearing down garage" version) + (c/su + (cu/stop-daemon! binary pidfile) + (c/exec :rm :-rf dir))) + db/LogFiles + (log-files [_ test node] + [logfile]))) + +; GARAGE S3 HELPER FUNCTIONS + +(defn s3-creds + "Get S3 credentials for node" + [node] + (let [key-info (c/on node (c/exec binary :key :info grg-key)) + [_ ak sk] (re-matches + #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" + key-info)] + {:access-key ak + :secret-key sk + :endpoint (str "http://" node ":3900") + :bucket grg-bucket + :client-config {:path-style-access-enabled true}})) + +(defn s3-get + "Helper for GetObject" + [creds k] + (try+ + (-> (s3/get-object creds (:bucket creds) k) + :input-stream + slurp) + (catch (re-find #"Key not found" (.getMessage %)) ex + nil))) + +(defn s3-put + "Helper for PutObject or DeleteObject (is a delete if value is nil)" + [creds k v] + (if (= v nil) + (s3/delete-object creds + :bucket-name (:bucket creds) + :key k) + (let [some-bytes (.getBytes v "UTF-8") + bytes-stream (java.io.ByteArrayInputStream. some-bytes)] + (s3/put-object creds + :bucket-name (:bucket creds) + :key k + :input-stream bytes-stream + :metadata {:content-length (count some-bytes)})))) + +(defn s3-list + "Helper for ListObjects -- just lists everything in the bucket" + [creds] + (s3/list-objects-v2 creds + {:bucket-name (:bucket creds)})) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj new file mode 100644 index 00000000..557b4a99 --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -0,0 +1,69 @@ +(ns jepsen.garage.reg + (:require [clojure.tools.logging :refer :all] + [clojure.string :as str] + [jepsen [checker :as checker] + [cli :as cli] + [client :as client] + [control :as c] + [db :as db] + [generator :as gen] + [independent :as independent] + [nemesis :as nemesis] + [tests :as tests]] + [jepsen.checker.timeline :as timeline] + [jepsen.control.util :as cu] + [jepsen.os.debian :as debian] + [jepsen.garage.grg :as grg] + [knossos.model :as model] + [slingshot.slingshot :refer [try+]])) + +(defn op-get [_ _] {:type :invoke, :f :read, :value nil}) +(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 9))}) +(defn op-del [_ _] {:type :invoke, :f :write, :value nil}) + +(defrecord RegClient [creds] + client/Client + (open! [this test node] + (let [creds (grg/s3-creds node)] + (info node "s3 credentials:" creds) + (assoc this :creds creds))) + (setup! [this test]) + (invoke! [this test op] + (let [[k v] (:value op)] + (case (:f op) + :read + (let [value (grg/s3-get (:creds this) k)] + (assoc op :type :ok, :value (independent/tuple k value))) + :write + (do + (grg/s3-put (:creds this) k v) + (assoc op :type :ok))))) + (teardown! [this test]) + (close! [this test])) + +(defn workload + "Tests linearizable reads and writes" + [opts] + {:client (RegClient. nil) + :checker (independent/checker + (checker/compose + {:linear (checker/linearizable + {:model (model/register) + :algorithm :linear}) + :timeline (timeline/html)})) + :generator (->> (independent/concurrent-generator + 10 + (range) + (fn [k] + (->> + (gen/mix [op-get op-put op-del]) + (gen/stagger (/ (:rate opts))) + (gen/limit (:ops-per-key opts))))) + (gen/nemesis + (cycle [(gen/sleep 5) + {:type :info, :f :start} + (gen/sleep 5) + {:type :info, :f :stop}])) + (gen/time-limit (:time-limit opts)))}) + + -- cgit v1.2.3 From 6eb26be548c08707b59473e6086f3f5eee89fe47 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Apr 2023 15:27:26 +0200 Subject: Add garage set test (this one works :p) --- script/jepsen.garage/README.md | 4 +- script/jepsen.garage/src/jepsen/garage.clj | 22 ++++++++-- script/jepsen.garage/src/jepsen/garage/grg.clj | 13 +++++- script/jepsen.garage/src/jepsen/garage/reg.clj | 21 ++++------ script/jepsen.garage/src/jepsen/garage/set.clj | 56 ++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 21 deletions(-) create mode 100644 script/jepsen.garage/src/jepsen/garage/set.clj diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 460f0b9e..800dde94 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -18,10 +18,10 @@ Set up VMs: vagrant up ``` -Run tests: +Run tests (this one should fail): ``` -lein run test --nodes-file nodes.vagrant +lein run test --nodes-file nodes.vagrant --time-limit 64 --concurrency 50 --rate 50 --workload reg ``` ## License diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index cd30dab8..7f762923 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -10,11 +10,13 @@ [jepsen.os.debian :as debian] [jepsen.garage [grg :as grg] - [reg :as reg]])) + [reg :as reg] + [set :as set]])) (def workloads "A map of workload names to functions that construct workloads, given opts." - {"reg" reg/workload}) + {"reg" reg/workload + "set" set/workload}) (def cli-opts "Additional command line options." @@ -47,7 +49,21 @@ :os debian/os :db (grg/db garage-version) :client (:client workload) - :generator (:generator workload) + :generator (gen/phases + (->> + (:generator workload) + (gen/stagger (/ (:rate opts))) + (gen/nemesis + (cycle [(gen/sleep 5) + {:type :info, :f :start} + (gen/sleep 5) + {:type :info, :f :stop}])) + (gen/time-limit (:time-limit opts))) + (gen/log "Healing cluster") + (gen/nemesis (gen/once {:type :info, :f :stop})) + (gen/log "Waiting for recovery") + (gen/sleep 10) + (gen/clients (:final-generator workload))) :nemesis (nemesis/partition-random-halves) :checker (checker/compose {:perf (checker/perf) diff --git a/script/jepsen.garage/src/jepsen/garage/grg.clj b/script/jepsen.garage/src/jepsen/garage/grg.clj index 2309d824..2e9bcc72 100644 --- a/script/jepsen.garage/src/jepsen/garage/grg.clj +++ b/script/jepsen.garage/src/jepsen/garage/grg.clj @@ -121,5 +121,14 @@ (defn s3-list "Helper for ListObjects -- just lists everything in the bucket" [creds] - (s3/list-objects-v2 creds - {:bucket-name (:bucket creds)})) + (defn list-inner [ct accum] + (let [list-result (s3/list-objects-v2 creds + {:bucket-name (:bucket creds) + :continuation-token ct}) + new-object-summaries (:object-summaries list-result) + new-objects (map (fn [d] (:key d)) new-object-summaries) + objects (concat new-objects accum)] + (if (:truncated? list-result) + (list-inner (:next-continuation-token list-result) objects) + objects))) + (list-inner nil [])) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index 557b4a99..0b370b36 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -51,19 +51,12 @@ {:model (model/register) :algorithm :linear}) :timeline (timeline/html)})) - :generator (->> (independent/concurrent-generator - 10 - (range) - (fn [k] - (->> - (gen/mix [op-get op-put op-del]) - (gen/stagger (/ (:rate opts))) - (gen/limit (:ops-per-key opts))))) - (gen/nemesis - (cycle [(gen/sleep 5) - {:type :info, :f :start} - (gen/sleep 5) - {:type :info, :f :stop}])) - (gen/time-limit (:time-limit opts)))}) + :generator (independent/concurrent-generator + 10 + (range) + (fn [k] + (->> + (gen/mix [op-get op-put op-del]) + (gen/limit (:ops-per-key opts)))))}) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj new file mode 100644 index 00000000..ab99e6ef --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -0,0 +1,56 @@ +(ns jepsen.garage.set + (:require [clojure.tools.logging :refer :all] + [clojure.string :as str] + [jepsen [checker :as checker] + [cli :as cli] + [client :as client] + [control :as c] + [db :as db] + [generator :as gen] + [independent :as independent] + [nemesis :as nemesis] + [tests :as tests]] + [jepsen.checker.timeline :as timeline] + [jepsen.control.util :as cu] + [jepsen.os.debian :as debian] + [jepsen.garage.grg :as grg] + [knossos.model :as model] + [slingshot.slingshot :refer [try+]])) + +(defn op-add [_ _] {:type :invoke, :f :add, :value (rand-int 100000)}) +(defn op-read [_ _] {:type :invoke, :f :read, :value nil}) + +(defrecord SetClient [creds] + client/Client + (open! [this test node] + (let [creds (grg/s3-creds node)] + (info node "s3 credentials:" creds) + (assoc this :creds creds))) + (setup! [this test]) + (invoke! [this test op] + (case (:f op) + :add + (do + (grg/s3-put (:creds this) (str (:value op)) "present") + (assoc op :type :ok)) + :read + (let [items (grg/s3-list (:creds this))] + (assoc op :type :ok, :value (set (map read-string items)))))) + (teardown! [this test]) + (close! [this test])) + +(defn workload + "Tests insertions and deletions" + [opts] + {:client (SetClient. nil) + :checker (checker/compose + {:set (checker/set) + :timeline (timeline/html)}) + ; :generator (gen/mix [op-add op-read]) + ; :generator (->> (range) + ; (map (fn [x] {:type :invoke, :f :add, :value x}))) + :generator (gen/mix [op-read + (->> (range) (map (fn [x] {:type :invoke, :f :add, :value x})))]) + :final-generator (gen/once op-read)}) + + -- cgit v1.2.3 From 0bb1577ae12b2dde313e036399f252266bc04d0a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Apr 2023 15:59:30 +0200 Subject: two set workloads with different checkers --- script/jepsen.garage/src/jepsen/garage.clj | 3 ++- script/jepsen.garage/src/jepsen/garage/grg.clj | 3 ++- script/jepsen.garage/src/jepsen/garage/set.clj | 20 ++++++++++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 7f762923..dbaf265b 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -16,7 +16,8 @@ (def workloads "A map of workload names to functions that construct workloads, given opts." {"reg" reg/workload - "set" set/workload}) + "set1" set/workload1 + "set2" set/workload2}) (def cli-opts "Additional command line options." diff --git a/script/jepsen.garage/src/jepsen/garage/grg.clj b/script/jepsen.garage/src/jepsen/garage/grg.clj index 2e9bcc72..4597fb14 100644 --- a/script/jepsen.garage/src/jepsen/garage/grg.clj +++ b/script/jepsen.garage/src/jepsen/garage/grg.clj @@ -120,10 +120,11 @@ (defn s3-list "Helper for ListObjects -- just lists everything in the bucket" - [creds] + [creds prefix] (defn list-inner [ct accum] (let [list-result (s3/list-objects-v2 creds {:bucket-name (:bucket creds) + :prefix prefix :continuation-token ct}) new-object-summaries (:object-summaries list-result) new-objects (map (fn [d] (:key d)) new-object-summaries) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index ab99e6ef..f8c92580 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -34,12 +34,12 @@ (grg/s3-put (:creds this) (str (:value op)) "present") (assoc op :type :ok)) :read - (let [items (grg/s3-list (:creds this))] + (let [items (grg/s3-list (:creds this) "")] (assoc op :type :ok, :value (set (map read-string items)))))) (teardown! [this test]) (close! [this test])) -(defn workload +(defn workload1 "Tests insertions and deletions" [opts] {:client (SetClient. nil) @@ -47,10 +47,18 @@ {:set (checker/set) :timeline (timeline/html)}) ; :generator (gen/mix [op-add op-read]) - ; :generator (->> (range) - ; (map (fn [x] {:type :invoke, :f :add, :value x}))) - :generator (gen/mix [op-read - (->> (range) (map (fn [x] {:type :invoke, :f :add, :value x})))]) + :generator (->> (range) + (map (fn [x] {:type :invoke, :f :add, :value x}))) :final-generator (gen/once op-read)}) +(defn workload2 + "Tests insertions and deletions" + [opts] + {:client (SetClient. nil) + :checker (checker/compose + {:set (checker/set-full {:linearizable? false}) + :timeline (timeline/html)}) + :generator (gen/mix [op-read + (->> (range) (map (fn [x] {:type :invoke, :f :add, :value x})))])}) + -- cgit v1.2.3 From 55eb4e87c42bf0da88186eb5b2fe1fbbbdf9ed43 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Apr 2023 16:16:34 +0200 Subject: set tests with independant tests together --- script/jepsen.garage/README.md | 7 ++++ script/jepsen.garage/src/jepsen/garage/set.clj | 55 ++++++++++++++++---------- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 800dde94..1bba32ec 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -24,6 +24,13 @@ Run tests (this one should fail): lein run test --nodes-file nodes.vagrant --time-limit 64 --concurrency 50 --rate 50 --workload reg ``` +These ones are working: + +``` +lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set1 +lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2 +``` + ## License Copyright © 2023 Alex Auvolat diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index f8c92580..156493bc 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -28,14 +28,18 @@ (assoc this :creds creds))) (setup! [this test]) (invoke! [this test op] - (case (:f op) - :add - (do - (grg/s3-put (:creds this) (str (:value op)) "present") - (assoc op :type :ok)) - :read - (let [items (grg/s3-list (:creds this) "")] - (assoc op :type :ok, :value (set (map read-string items)))))) + (let [[k v] (:value op) + prefix (str "set" k "/")] + (case (:f op) + :add + (do + (grg/s3-put (:creds this) (str prefix v) "present") + (assoc op :type :ok)) + :read + (let [items (grg/s3-list (:creds this) prefix) + items-stripped (map (fn [o] (str/replace-first o prefix "")) items) + items-set (set (map read-string items-stripped))] + (assoc op :type :ok, :value (independent/tuple k items-set)))))) (teardown! [this test]) (close! [this test])) @@ -43,22 +47,33 @@ "Tests insertions and deletions" [opts] {:client (SetClient. nil) - :checker (checker/compose - {:set (checker/set) - :timeline (timeline/html)}) - ; :generator (gen/mix [op-add op-read]) - :generator (->> (range) - (map (fn [x] {:type :invoke, :f :add, :value x}))) - :final-generator (gen/once op-read)}) + :checker (independent/checker + (checker/compose + {:set (checker/set) + :timeline (timeline/html)})) + :generator (independent/concurrent-generator + 10 + (range 100) + (fn [k] + (->> + (gen/mix [op-add]) + (gen/limit (:ops-per-key opts))))) + :final-generator (independent/sequential-generator + (range 100) + (fn [k] (gen/once op-read)))}) (defn workload2 "Tests insertions and deletions" [opts] {:client (SetClient. nil) - :checker (checker/compose - {:set (checker/set-full {:linearizable? false}) - :timeline (timeline/html)}) - :generator (gen/mix [op-read - (->> (range) (map (fn [x] {:type :invoke, :f :add, :value x})))])}) + :checker (independent/checker + (checker/compose + {:set (checker/set-full {:linearizable? false}) + :timeline (timeline/html)})) + :generator (independent/concurrent-generator + 10 + (range 100) + (fn [k] + (gen/mix [op-add op-read])))}) -- cgit v1.2.3 From 18ee8efb5f7bfcd64bf1a1336ffd6f54574aa79f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Apr 2023 22:02:03 +0200 Subject: Check read-after-write property for sets --- script/jepsen.garage/src/jepsen/garage/set.clj | 48 +++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index 156493bc..9b21d50e 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -1,10 +1,12 @@ (ns jepsen.garage.set (:require [clojure.tools.logging :refer :all] [clojure.string :as str] + [clojure.set :as set] [jepsen [checker :as checker] [cli :as cli] [client :as client] [control :as c] + [checker :as checker] [db :as db] [generator :as gen] [independent :as independent] @@ -17,7 +19,7 @@ [knossos.model :as model] [slingshot.slingshot :refer [try+]])) -(defn op-add [_ _] {:type :invoke, :f :add, :value (rand-int 100000)}) +(defn op-add-rand100 [_ _] {:type :invoke, :f :add, :value (rand-int 100)}) (defn op-read [_ _] {:type :invoke, :f :read, :value nil}) (defrecord SetClient [creds] @@ -43,6 +45,41 @@ (teardown! [this test]) (close! [this test])) +(defn set-read-after-write + "Read-after-Write checker for set operations" + [] + (reify checker/Checker + (check [this test history opts] + (let [init {:add-started #{} + :add-done #{} + :read-must-contain {} + :missed #{} + :unexpected #{}} + final (reduce + (fn [state op] + (info "state:" state) + (info "operation:" op) + (case [(:type op) (:f op)] + ([:invoke :add]) + (assoc state :add-started (conj (:add-started state) (:value op))) + ([:ok :add]) + (assoc state :add-done (conj (:add-done state) (:value op))) + ([:invoke :read]) + (assoc-in state [:read-must-contain (:process op)] (:add-done state)) + ([:ok :read]) + (let [read-must-contain (get (:process op) (:read-must-contain state)) + new-missed (set/difference read-must-contain (:value op)) + new-unexpected (set/difference (:value op) (:add-started state))] + (assoc state + :read-must-contain (dissoc (:read-must-contain state) (:process op)) + :missed (set/union (:missed state) new-missed), + :unexpected (set/union (:unexpected state) new-unexpected))) + state)) + init history) + valid? (and (empty? (:missed final)) (empty? (:unexpected final)))] + (info "final state:" final) + (assoc final :valid? valid?))))) + (defn workload1 "Tests insertions and deletions" [opts] @@ -55,8 +92,8 @@ 10 (range 100) (fn [k] - (->> - (gen/mix [op-add]) + (->> (range) + (map (fn [x] {:type :invoke, :f :add, :value x})) (gen/limit (:ops-per-key opts))))) :final-generator (independent/sequential-generator (range 100) @@ -68,12 +105,13 @@ {:client (SetClient. nil) :checker (independent/checker (checker/compose - {:set (checker/set-full {:linearizable? false}) + {:set-full (checker/set-full {:linearizable? false}) + :set-read-after-write (set-read-after-write) :timeline (timeline/html)})) :generator (independent/concurrent-generator 10 (range 100) (fn [k] - (gen/mix [op-add op-read])))}) + (gen/mix [op-add-rand100 op-read])))}) -- cgit v1.2.3 From 9ec4cca3340cfd03deb55cd58d6b354630e7eb5f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Apr 2023 22:17:20 +0200 Subject: reformatting --- script/jepsen.garage/shell.nix | 14 +++++++------- script/jepsen.garage/src/jepsen/garage/set.clj | 5 +---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/script/jepsen.garage/shell.nix b/script/jepsen.garage/shell.nix index 595a78d9..1f2514c6 100644 --- a/script/jepsen.garage/shell.nix +++ b/script/jepsen.garage/shell.nix @@ -1,8 +1,8 @@ -{ pkgs ? import {} }: - pkgs.mkShell { - nativeBuildInputs = with pkgs; [ - leiningen - vagrant - gnuplot - ]; +{ pkgs ? import { } }: +pkgs.mkShell { + nativeBuildInputs = with pkgs; [ + leiningen + vagrant + gnuplot + ]; } diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index 9b21d50e..5f76d1ac 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -57,8 +57,6 @@ :unexpected #{}} final (reduce (fn [state op] - (info "state:" state) - (info "operation:" op) (case [(:type op) (:f op)] ([:invoke :add]) (assoc state :add-started (conj (:add-started state) (:value op))) @@ -77,7 +75,6 @@ state)) init history) valid? (and (empty? (:missed final)) (empty? (:unexpected final)))] - (info "final state:" final) (assoc final :valid? valid?))))) (defn workload1 @@ -110,7 +107,7 @@ :timeline (timeline/html)})) :generator (independent/concurrent-generator 10 - (range 100) + (range) (fn [k] (gen/mix [op-add-rand100 op-read])))}) -- cgit v1.2.3 From ef5ca86dfc2678e8fc16b99daffeac40093945ae Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 14:01:18 +0200 Subject: jepsen: update to garage 0.9.0 --- script/jepsen.garage/src/jepsen/garage.clj | 2 +- script/jepsen.garage/src/jepsen/garage/grg.clj | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index dbaf265b..754ddf7d 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -42,7 +42,7 @@ (let [workload ((get workloads (:workload opts)) opts) garage-version (if (:increasing-timestamps opts) "03490d41d58576d7b3bcf977b2726d72a3a66ada" - "v0.8.2")] + "v0.9.0")] (merge tests/noop-test opts {:pure-generators true diff --git a/script/jepsen.garage/src/jepsen/garage/grg.clj b/script/jepsen.garage/src/jepsen/garage/grg.clj index 4597fb14..31f20852 100644 --- a/script/jepsen.garage/src/jepsen/garage/grg.clj +++ b/script/jepsen.garage/src/jepsen/garage/grg.clj @@ -9,6 +9,8 @@ ; CONSTANTS -- HOW GARAGE IS SET UP (def dir "/opt/garage") +(def data-dir (str dir "/data")) +(def meta-dir (str dir "/meta")) (def binary (str dir "/garage")) (def logfile (str dir "/garage.log")) (def pidfile (str dir "/garage.pid")) @@ -54,18 +56,19 @@ :chdir dir} binary :server) - (Thread/sleep 100) + (info node "garage daemon started") + (c/exec :sleep 5) (let [node-id (c/exec binary :node :id :-q)] (info node "node id:" node-id) (c/on-many (:nodes test) (c/exec binary :node :connect node-id)) - (c/exec binary :layout :assign (subs node-id 0 16) :-c 1 :-z :dc1 :-t node)) + (c/exec binary :layout :assign (subs node-id 0 16) :-c :1G :-z :dc1 :-t node)) (if (= node (first (:nodes test))) (do - (Thread/sleep 2000) + (c/exec :sleep 5) (c/exec binary :layout :apply :--version 1) (info node "garage status:" (c/exec binary :status)) - (c/exec binary :key :new :--name grg-key) + (c/exec binary :key :create grg-key) (c/exec binary :bucket :create grg-bucket) (c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key) (info node "key info: " (c/exec binary :key :info grg-key)))))) @@ -73,7 +76,8 @@ (info node "tearing down garage" version) (c/su (cu/stop-daemon! binary pidfile) - (c/exec :rm :-rf dir))) + (c/exec :rm :-rf data-dir) + (c/exec :rm :-rf meta-dir))) db/LogFiles (log-files [_ test node] [logfile]))) -- cgit v1.2.3 From 012ade5d4b81cc623f45042a1d3ca5c3ddafa569 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 14:06:32 +0200 Subject: jepsen: update jepsen and fix garage key info --- script/jepsen.garage/project.clj | 2 +- script/jepsen.garage/src/jepsen/garage/grg.clj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/script/jepsen.garage/project.clj b/script/jepsen.garage/project.clj index 20bb59c7..59d45484 100644 --- a/script/jepsen.garage/project.clj +++ b/script/jepsen.garage/project.clj @@ -5,6 +5,6 @@ :url "https://www.gnu.org/licenses/agpl-3.0.en.html"} :main jepsen.garage :dependencies [[org.clojure/clojure "1.11.1"] - [jepsen "0.3.2-SNAPSHOT"] + [jepsen "0.3.3-SNAPSHOT"] [amazonica "0.3.163"]] :repl-options {:init-ns jepsen.garage}) diff --git a/script/jepsen.garage/src/jepsen/garage/grg.clj b/script/jepsen.garage/src/jepsen/garage/grg.clj index 31f20852..4347b1a7 100644 --- a/script/jepsen.garage/src/jepsen/garage/grg.clj +++ b/script/jepsen.garage/src/jepsen/garage/grg.clj @@ -87,7 +87,7 @@ (defn s3-creds "Get S3 credentials for node" [node] - (let [key-info (c/on node (c/exec binary :key :info grg-key)) + (let [key-info (c/on node (c/exec binary :key :info grg-key :--show-secret)) [_ ak sk] (re-matches #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" key-info)] -- cgit v1.2.3 From 84d43501ce6dfd645d844ada0fcbff932d6fea76 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 15:34:12 +0200 Subject: refactor jepsen setup logic --- script/jepsen.garage/shell.nix | 12 ++- script/jepsen.garage/src/jepsen/garage/grg.clj | 131 ++++++++++++++++--------- 2 files changed, 96 insertions(+), 47 deletions(-) diff --git a/script/jepsen.garage/shell.nix b/script/jepsen.garage/shell.nix index 1f2514c6..01e4c845 100644 --- a/script/jepsen.garage/shell.nix +++ b/script/jepsen.garage/shell.nix @@ -1,8 +1,18 @@ -{ pkgs ? import { } }: +{ pkgs ? import { + overlays = [ + (self: super: { + jdk = super.jdk11; + jre = super.jre11; + }) + ]; +} }: pkgs.mkShell { nativeBuildInputs = with pkgs; [ leiningen + jdk + jna vagrant gnuplot + graphviz ]; } diff --git a/script/jepsen.garage/src/jepsen/garage/grg.clj b/script/jepsen.garage/src/jepsen/garage/grg.clj index 4347b1a7..f6075762 100644 --- a/script/jepsen.garage/src/jepsen/garage/grg.clj +++ b/script/jepsen.garage/src/jepsen/garage/grg.clj @@ -1,6 +1,7 @@ (ns jepsen.garage.grg (:require [clojure.tools.logging :refer :all] [jepsen [control :as c] + [core :as jepsen] [db :as db]] [jepsen.control.util :as cu] [amazonica.aws.s3 :as s3] @@ -21,63 +22,101 @@ ; THE GARAGE DB +(defn install! + "Download and install Garage" + [node version] + (c/su + (c/trace + (info node "installing garage" version) + (c/exec :mkdir :-p dir) + (let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage") + cache (cu/cached-wget! url)] + (c/exec :cp cache binary)) + (c/exec :chmod :+x binary)))) + +(defn configure! + "Configure Garage" + [node] + (c/su + (c/trace + (cu/write-file! + (str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n" + "rpc_bind_addr = \"0.0.0.0:3901\"\n" + "rpc_public_addr = \"" node ":3901\"\n" + "db_engine = \"lmdb\"\n" + "replication_mode = \"3\"\n" + "data_dir = \"" dir "/data\"\n" + "metadata_dir = \"" dir "/meta\"\n" + "[s3_api]\n" + "s3_region = \"us-east-1\"\n" + "api_bind_addr = \"0.0.0.0:3900\"\n" + "[k2v_api]\n" + "api_bind_addr = \"0.0.0.0:3902\"\n" + "[admin]\n" + "api_bind_addr = \"0.0.0.0:3903\"\n" + "admin_token = \"" grg-admin-token "\"\n") + "/etc/garage.toml")))) + +(defn connect-node! + "Connect a Garage node to the rest of the cluster" + [test node] + (c/trace + (let [node-id (c/exec binary :node :id :-q)] + (info node "node id:" node-id) + (c/on-many (:nodes test) + (c/exec binary :node :connect node-id))))) + +(defn configure-node! + "Configure a Garage node to be part of a cluster layout" + [test node] + (c/trace + (let [node-id (c/exec binary :node :id :-q)] + (c/on (jepsen/primary test) + (c/exec binary :layout :assign (subs node-id 0 16) :-c :1G :-z :dc1 :-t node))))) + +(defn finalize-config! + "Apply the layout and create a key/bucket pair in the cluster" + [node] + (c/trace + (c/exec binary :layout :apply :--version 1) + (info node "garage status:" (c/exec binary :status)) + (c/exec binary :key :create grg-key) + (c/exec binary :bucket :create grg-bucket) + (c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key) + (info node "key info: " (c/exec binary :key :info grg-key)))) + (defn db "Garage DB for a particular version" [version] (reify db/DB (setup! [_ test node] - (info node "installing garage" version) - (c/su - (c/exec :mkdir :-p dir) - (let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage") - cache (cu/wget! url)] - (c/exec :cp cache binary)) - (c/exec :chmod :+x binary) - (cu/write-file! - (str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n" - "rpc_bind_addr = \"0.0.0.0:3901\"\n" - "rpc_public_addr = \"" node ":3901\"\n" - "db_engine = \"lmdb\"\n" - "replication_mode = \"3\"\n" - "data_dir = \"" dir "/data\"\n" - "metadata_dir = \"" dir "/meta\"\n" - "[s3_api]\n" - "s3_region = \"us-east-1\"\n" - "api_bind_addr = \"0.0.0.0:3900\"\n" - "[k2v_api]\n" - "api_bind_addr = \"0.0.0.0:3902\"\n" - "[admin]\n" - "api_bind_addr = \"0.0.0.0:3903\"\n" - "admin_token = \"" grg-admin-token "\"\n") - "/etc/garage.toml") - (cu/start-daemon! - {:logfile logfile - :pidfile pidfile - :chdir dir} - binary - :server) - (info node "garage daemon started") - (c/exec :sleep 5) - (let [node-id (c/exec binary :node :id :-q)] - (info node "node id:" node-id) - (c/on-many (:nodes test) - (c/exec binary :node :connect node-id)) - (c/exec binary :layout :assign (subs node-id 0 16) :-c :1G :-z :dc1 :-t node)) - (if (= node (first (:nodes test))) - (do - (c/exec :sleep 5) - (c/exec binary :layout :apply :--version 1) - (info node "garage status:" (c/exec binary :status)) - (c/exec binary :key :create grg-key) - (c/exec binary :bucket :create grg-bucket) - (c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key) - (info node "key info: " (c/exec binary :key :info grg-key)))))) + (install! node version) + (configure! node) + (cu/start-daemon! + {:logfile logfile + :pidfile pidfile + :chdir dir} + binary + :server) + (c/exec :sleep 3) + + (jepsen/synchronize test) + (connect-node! test node) + + (jepsen/synchronize test) + (configure-node! test node) + + (jepsen/synchronize test) + (when (= node (jepsen/primary test)) + (finalize-config! node))) + (teardown! [_ test node] (info node "tearing down garage" version) (c/su (cu/stop-daemon! binary pidfile) (c/exec :rm :-rf data-dir) (c/exec :rm :-rf meta-dir))) + db/LogFiles (log-files [_ test node] [logfile]))) -- cgit v1.2.3 From ddd3de7fcef2a2e05805766085c156d7fde9e12e Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 16:30:45 +0200 Subject: refactor jepsen code --- script/jepsen.garage/src/jepsen/garage.clj | 2 +- script/jepsen.garage/src/jepsen/garage/daemon.clj | 134 ++++++++++++++++ script/jepsen.garage/src/jepsen/garage/grg.clj | 178 ---------------------- script/jepsen.garage/src/jepsen/garage/reg.clj | 9 +- script/jepsen.garage/src/jepsen/garage/s3api.clj | 48 ++++++ script/jepsen.garage/src/jepsen/garage/set.clj | 9 +- 6 files changed, 193 insertions(+), 187 deletions(-) create mode 100644 script/jepsen.garage/src/jepsen/garage/daemon.clj delete mode 100644 script/jepsen.garage/src/jepsen/garage/grg.clj create mode 100644 script/jepsen.garage/src/jepsen/garage/s3api.clj diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 754ddf7d..5816512b 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -9,7 +9,7 @@ [tests :as tests]] [jepsen.os.debian :as debian] [jepsen.garage - [grg :as grg] + [daemon :as grg] [reg :as reg] [set :as set]])) diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj new file mode 100644 index 00000000..81163521 --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -0,0 +1,134 @@ +(ns jepsen.garage.daemon + (:require [clojure.tools.logging :refer :all] + [jepsen [control :as c] + [core :as jepsen] + [db :as db]] + [jepsen.control.util :as cu])) + +; CONSTANTS -- HOW GARAGE IS SET UP + +(def base-dir "/opt/garage") +(def data-dir (str base-dir "/data")) +(def meta-dir (str base-dir "/meta")) +(def binary (str base-dir "/garage")) +(def logfile (str base-dir "/garage.log")) +(def pidfile (str base-dir "/garage.pid")) + +(def admin-token "icanhazadmin") +(def access-key "jepsen") +(def bucket-name "jepsen") + +; THE GARAGE DB + +(defn install! + "Download and install Garage" + [node version] + (c/su + (c/trace + (info node "installing garage" version) + (c/exec :mkdir :-p base-dir) + (let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage") + cache (cu/cached-wget! url)] + (c/exec :cp cache binary)) + (c/exec :chmod :+x binary)))) + +(defn configure! + "Configure Garage" + [node] + (c/su + (c/trace + (cu/write-file! + (str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n" + "rpc_bind_addr = \"0.0.0.0:3901\"\n" + "rpc_public_addr = \"" node ":3901\"\n" + "db_engine = \"lmdb\"\n" + "replication_mode = \"3\"\n" + "data_dir = \"" data-dir "\"\n" + "metadata_dir = \"" meta-dir "\"\n" + "[s3_api]\n" + "s3_region = \"us-east-1\"\n" + "api_bind_addr = \"0.0.0.0:3900\"\n" + "[k2v_api]\n" + "api_bind_addr = \"0.0.0.0:3902\"\n" + "[admin]\n" + "api_bind_addr = \"0.0.0.0:3903\"\n" + "admin_token = \"" admin-token "\"\n") + "/etc/garage.toml")))) + +(defn connect-node! + "Connect a Garage node to the rest of the cluster" + [test node] + (c/trace + (let [node-id (c/exec binary :node :id :-q)] + (info node "node id:" node-id) + (c/on-many (:nodes test) + (c/exec binary :node :connect node-id))))) + +(defn configure-node! + "Configure a Garage node to be part of a cluster layout" + [test node] + (c/trace + (let [node-id (c/exec binary :node :id :-q)] + (c/on (jepsen/primary test) + (c/exec binary :layout :assign (subs node-id 0 16) :-c :1G :-z :dc1 :-t node))))) + +(defn finalize-config! + "Apply the layout and create a key/bucket pair in the cluster" + [node] + (c/trace + (c/exec binary :layout :apply :--version 1) + (info node "garage status:" (c/exec binary :status)) + (c/exec binary :key :create access-key) + (c/exec binary :bucket :create bucket-name) + (c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key) + (info node "key info: " (c/exec binary :key :info access-key)))) + +(defn db + "Garage DB for a particular version" + [version] + (reify db/DB + (setup! [_ test node] + (install! node version) + (configure! node) + (cu/start-daemon! + {:logfile logfile + :pidfile pidfile + :chdir base-dir} + binary + :server) + (c/exec :sleep 3) + + (jepsen/synchronize test) + (connect-node! test node) + + (jepsen/synchronize test) + (configure-node! test node) + + (jepsen/synchronize test) + (when (= node (jepsen/primary test)) + (finalize-config! node))) + + (teardown! [_ test node] + (info node "tearing down garage" version) + (c/su + (cu/stop-daemon! binary pidfile) + (c/exec :rm :-rf data-dir) + (c/exec :rm :-rf meta-dir))) + + db/LogFiles + (log-files [_ test node] + [logfile]))) + +(defn creds + "Obtain Garage credentials for node" + [node] + (let [key-info (c/on node (c/exec binary :key :info access-key :--show-secret)) + [_ ak sk] (re-matches + #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" + key-info)] + {:access-key ak + :secret-key sk + :endpoint (str "http://" node ":3900") + :bucket bucket-name + :client-config {:path-style-access-enabled true}})) + diff --git a/script/jepsen.garage/src/jepsen/garage/grg.clj b/script/jepsen.garage/src/jepsen/garage/grg.clj deleted file mode 100644 index f6075762..00000000 --- a/script/jepsen.garage/src/jepsen/garage/grg.clj +++ /dev/null @@ -1,178 +0,0 @@ -(ns jepsen.garage.grg - (:require [clojure.tools.logging :refer :all] - [jepsen [control :as c] - [core :as jepsen] - [db :as db]] - [jepsen.control.util :as cu] - [amazonica.aws.s3 :as s3] - [slingshot.slingshot :refer [try+]])) - -; CONSTANTS -- HOW GARAGE IS SET UP - -(def dir "/opt/garage") -(def data-dir (str dir "/data")) -(def meta-dir (str dir "/meta")) -(def binary (str dir "/garage")) -(def logfile (str dir "/garage.log")) -(def pidfile (str dir "/garage.pid")) - -(def grg-admin-token "icanhazadmin") -(def grg-key "jepsen") -(def grg-bucket "jepsen") - -; THE GARAGE DB - -(defn install! - "Download and install Garage" - [node version] - (c/su - (c/trace - (info node "installing garage" version) - (c/exec :mkdir :-p dir) - (let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage") - cache (cu/cached-wget! url)] - (c/exec :cp cache binary)) - (c/exec :chmod :+x binary)))) - -(defn configure! - "Configure Garage" - [node] - (c/su - (c/trace - (cu/write-file! - (str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n" - "rpc_bind_addr = \"0.0.0.0:3901\"\n" - "rpc_public_addr = \"" node ":3901\"\n" - "db_engine = \"lmdb\"\n" - "replication_mode = \"3\"\n" - "data_dir = \"" dir "/data\"\n" - "metadata_dir = \"" dir "/meta\"\n" - "[s3_api]\n" - "s3_region = \"us-east-1\"\n" - "api_bind_addr = \"0.0.0.0:3900\"\n" - "[k2v_api]\n" - "api_bind_addr = \"0.0.0.0:3902\"\n" - "[admin]\n" - "api_bind_addr = \"0.0.0.0:3903\"\n" - "admin_token = \"" grg-admin-token "\"\n") - "/etc/garage.toml")))) - -(defn connect-node! - "Connect a Garage node to the rest of the cluster" - [test node] - (c/trace - (let [node-id (c/exec binary :node :id :-q)] - (info node "node id:" node-id) - (c/on-many (:nodes test) - (c/exec binary :node :connect node-id))))) - -(defn configure-node! - "Configure a Garage node to be part of a cluster layout" - [test node] - (c/trace - (let [node-id (c/exec binary :node :id :-q)] - (c/on (jepsen/primary test) - (c/exec binary :layout :assign (subs node-id 0 16) :-c :1G :-z :dc1 :-t node))))) - -(defn finalize-config! - "Apply the layout and create a key/bucket pair in the cluster" - [node] - (c/trace - (c/exec binary :layout :apply :--version 1) - (info node "garage status:" (c/exec binary :status)) - (c/exec binary :key :create grg-key) - (c/exec binary :bucket :create grg-bucket) - (c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key) - (info node "key info: " (c/exec binary :key :info grg-key)))) - -(defn db - "Garage DB for a particular version" - [version] - (reify db/DB - (setup! [_ test node] - (install! node version) - (configure! node) - (cu/start-daemon! - {:logfile logfile - :pidfile pidfile - :chdir dir} - binary - :server) - (c/exec :sleep 3) - - (jepsen/synchronize test) - (connect-node! test node) - - (jepsen/synchronize test) - (configure-node! test node) - - (jepsen/synchronize test) - (when (= node (jepsen/primary test)) - (finalize-config! node))) - - (teardown! [_ test node] - (info node "tearing down garage" version) - (c/su - (cu/stop-daemon! binary pidfile) - (c/exec :rm :-rf data-dir) - (c/exec :rm :-rf meta-dir))) - - db/LogFiles - (log-files [_ test node] - [logfile]))) - -; GARAGE S3 HELPER FUNCTIONS - -(defn s3-creds - "Get S3 credentials for node" - [node] - (let [key-info (c/on node (c/exec binary :key :info grg-key :--show-secret)) - [_ ak sk] (re-matches - #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" - key-info)] - {:access-key ak - :secret-key sk - :endpoint (str "http://" node ":3900") - :bucket grg-bucket - :client-config {:path-style-access-enabled true}})) - -(defn s3-get - "Helper for GetObject" - [creds k] - (try+ - (-> (s3/get-object creds (:bucket creds) k) - :input-stream - slurp) - (catch (re-find #"Key not found" (.getMessage %)) ex - nil))) - -(defn s3-put - "Helper for PutObject or DeleteObject (is a delete if value is nil)" - [creds k v] - (if (= v nil) - (s3/delete-object creds - :bucket-name (:bucket creds) - :key k) - (let [some-bytes (.getBytes v "UTF-8") - bytes-stream (java.io.ByteArrayInputStream. some-bytes)] - (s3/put-object creds - :bucket-name (:bucket creds) - :key k - :input-stream bytes-stream - :metadata {:content-length (count some-bytes)})))) - -(defn s3-list - "Helper for ListObjects -- just lists everything in the bucket" - [creds prefix] - (defn list-inner [ct accum] - (let [list-result (s3/list-objects-v2 creds - {:bucket-name (:bucket creds) - :prefix prefix - :continuation-token ct}) - new-object-summaries (:object-summaries list-result) - new-objects (map (fn [d] (:key d)) new-object-summaries) - objects (concat new-objects accum)] - (if (:truncated? list-result) - (list-inner (:next-continuation-token list-result) objects) - objects))) - (list-inner nil [])) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index 0b370b36..3a2c1d55 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -13,7 +13,8 @@ [jepsen.checker.timeline :as timeline] [jepsen.control.util :as cu] [jepsen.os.debian :as debian] - [jepsen.garage.grg :as grg] + [jepsen.garage.daemon :as grg] + [jepsen.garage.s3api :as s3] [knossos.model :as model] [slingshot.slingshot :refer [try+]])) @@ -24,7 +25,7 @@ (defrecord RegClient [creds] client/Client (open! [this test node] - (let [creds (grg/s3-creds node)] + (let [creds (grg/creds node)] (info node "s3 credentials:" creds) (assoc this :creds creds))) (setup! [this test]) @@ -32,11 +33,11 @@ (let [[k v] (:value op)] (case (:f op) :read - (let [value (grg/s3-get (:creds this) k)] + (let [value (s3/get (:creds this) k)] (assoc op :type :ok, :value (independent/tuple k value))) :write (do - (grg/s3-put (:creds this) k v) + (s3/put (:creds this) k v) (assoc op :type :ok))))) (teardown! [this test]) (close! [this test])) diff --git a/script/jepsen.garage/src/jepsen/garage/s3api.clj b/script/jepsen.garage/src/jepsen/garage/s3api.clj new file mode 100644 index 00000000..239d5423 --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/s3api.clj @@ -0,0 +1,48 @@ +(ns jepsen.garage.s3api + (:require [clojure.tools.logging :refer :all] + [jepsen [control :as c]] + [amazonica.aws.s3 :as s3] + [slingshot.slingshot :refer [try+]])) + +; GARAGE S3 HELPER FUNCTIONS + +(defn get + "Helper for GetObject" + [creds k] + (try+ + (-> (s3/get-object creds (:bucket creds) k) + :input-stream + slurp) + (catch (re-find #"Key not found" (.getMessage %)) ex + nil))) + +(defn put + "Helper for PutObject or DeleteObject (is a delete if value is nil)" + [creds k v] + (if (= v nil) + (s3/delete-object creds + :bucket-name (:bucket creds) + :key k) + (let [some-bytes (.getBytes v "UTF-8") + bytes-stream (java.io.ByteArrayInputStream. some-bytes)] + (s3/put-object creds + :bucket-name (:bucket creds) + :key k + :input-stream bytes-stream + :metadata {:content-length (count some-bytes)})))) + +(defn list + "Helper for ListObjects -- just lists everything in the bucket" + [creds prefix] + (defn list-inner [ct accum] + (let [list-result (s3/list-objects-v2 creds + {:bucket-name (:bucket creds) + :prefix prefix + :continuation-token ct}) + new-object-summaries (:object-summaries list-result) + new-objects (map (fn [d] (:key d)) new-object-summaries) + objects (concat new-objects accum)] + (if (:truncated? list-result) + (list-inner (:next-continuation-token list-result) objects) + objects))) + (list-inner nil [])) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index 5f76d1ac..6bbc1ee0 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -15,7 +15,8 @@ [jepsen.checker.timeline :as timeline] [jepsen.control.util :as cu] [jepsen.os.debian :as debian] - [jepsen.garage.grg :as grg] + [jepsen.garage.daemon :as grg] + [jepsen.garage.s3api :as s3] [knossos.model :as model] [slingshot.slingshot :refer [try+]])) @@ -25,7 +26,7 @@ (defrecord SetClient [creds] client/Client (open! [this test node] - (let [creds (grg/s3-creds node)] + (let [creds (grg/creds node)] (info node "s3 credentials:" creds) (assoc this :creds creds))) (setup! [this test]) @@ -35,10 +36,10 @@ (case (:f op) :add (do - (grg/s3-put (:creds this) (str prefix v) "present") + (s3/put (:creds this) (str prefix v) "present") (assoc op :type :ok)) :read - (let [items (grg/s3-list (:creds this) prefix) + (let [items (s3/list (:creds this) prefix) items-stripped (map (fn [o] (str/replace-first o prefix "")) items) items-set (set (map read-string items-stripped))] (assoc op :type :ok, :value (independent/tuple k items-set)))))) -- cgit v1.2.3 From b3bf16ee27b061a3a091022e718b2994365f945c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 17:51:34 +0200 Subject: make jepsen test more robust: handle errors and timeouts, fixed access key --- script/jepsen.garage/src/jepsen/garage.clj | 17 ++++++++++++----- script/jepsen.garage/src/jepsen/garage/daemon.clj | 23 ++++++++++------------- script/jepsen.garage/src/jepsen/garage/reg.clj | 22 ++++++++++++++-------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 5816512b..891fdf12 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -41,7 +41,7 @@ [opts] (let [workload ((get workloads (:workload opts)) opts) garage-version (if (:increasing-timestamps opts) - "03490d41d58576d7b3bcf977b2726d72a3a66ada" + "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" "v0.9.0")] (merge tests/noop-test opts @@ -56,16 +56,23 @@ (gen/stagger (/ (:rate opts))) (gen/nemesis (cycle [(gen/sleep 5) - {:type :info, :f :start} + ;{:type :info, :f :partition-start} + ;(gen/sleep 5) + {:type :info, :f :clock-scramble} (gen/sleep 5) - {:type :info, :f :stop}])) + ;{:type :info, :f :partition-stop} + ;(gen/sleep 5) + {:type :info, :f :clock-scramble}])) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") - (gen/nemesis (gen/once {:type :info, :f :stop})) + (gen/nemesis (gen/once {:type :info, :f :partition-stop})) (gen/log "Waiting for recovery") (gen/sleep 10) (gen/clients (:final-generator workload))) - :nemesis (nemesis/partition-random-halves) + :nemesis (nemesis/compose + {{:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)}) :checker (checker/compose {:perf (checker/perf) :workload (:checker workload)}) diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index 81163521..2dcfa94f 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -15,7 +15,8 @@ (def pidfile (str base-dir "/garage.pid")) (def admin-token "icanhazadmin") -(def access-key "jepsen") +(def access-key-id "GK8bfb6a51286071c6c9cd8bc3") +(def secret-access-key "b0be95f71c1c6f16858a9edf395078b75c12ecb6b1c03385c4ae92076e4994a3") (def bucket-name "jepsen") ; THE GARAGE DB @@ -78,10 +79,10 @@ (c/trace (c/exec binary :layout :apply :--version 1) (info node "garage status:" (c/exec binary :status)) - (c/exec binary :key :create access-key) + (c/exec binary :key :import access-key-id secret-access-key :--yes) (c/exec binary :bucket :create bucket-name) - (c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key) - (info node "key info: " (c/exec binary :key :info access-key)))) + (c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key-id) + (info node "key info: " (c/exec binary :key :info access-key-id)))) (defn db "Garage DB for a particular version" @@ -122,13 +123,9 @@ (defn creds "Obtain Garage credentials for node" [node] - (let [key-info (c/on node (c/exec binary :key :info access-key :--show-secret)) - [_ ak sk] (re-matches - #"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*" - key-info)] - {:access-key ak - :secret-key sk - :endpoint (str "http://" node ":3900") - :bucket bucket-name - :client-config {:path-style-access-enabled true}})) + {:access-key access-key-id + :secret-key secret-access-key + :endpoint (str "http://" node ":3900") + :bucket bucket-name + :client-config {:path-style-access-enabled true}}) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index 3a2c1d55..d5404c6b 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -33,19 +33,25 @@ (let [[k v] (:value op)] (case (:f op) :read - (let [value (s3/get (:creds this) k)] - (assoc op :type :ok, :value (independent/tuple k value))) + (try+ + (let [value (s3/get (:creds this) k)] + (assoc op :type :ok, :value (independent/tuple k value))) + (catch (re-find #"Unavailable" (.getMessage %)) ex + (assoc op :type :fail, :error [:s3-error (.getMessage ex)]))) :write - (do - (s3/put (:creds this) k v) - (assoc op :type :ok))))) + (try+ + (do + (s3/put (:creds this) k v) + (assoc op :type :ok)) + (catch (re-find #"Unavailable" (.getMessage %)) ex + (assoc op :type :fail, :error [:s3-error (.getMessage ex)])))))) (teardown! [this test]) (close! [this test])) (defn workload "Tests linearizable reads and writes" [opts] - {:client (RegClient. nil) + {:client (client/timeout 10 (RegClient. nil)) :checker (independent/checker (checker/compose {:linear (checker/linearizable @@ -53,8 +59,8 @@ :algorithm :linear}) :timeline (timeline/html)})) :generator (independent/concurrent-generator - 10 - (range) + (/ (:concurrency opts) 10) ; divide threads in 10 groups + (range) ; working on 10 keys (fn [k] (->> (gen/mix [op-get op-put op-del]) -- cgit v1.2.3 From 74e50eddddf319ce1a32a9b57b3825ea40db3a6c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 19 Oct 2023 14:34:19 +0200 Subject: jepsen: refactoring --- script/jepsen.garage/README.md | 33 +++++++++++++++++++++++ script/jepsen.garage/src/jepsen/garage.clj | 8 +++--- script/jepsen.garage/src/jepsen/garage/daemon.clj | 2 +- script/jepsen.garage/src/jepsen/garage/reg.clj | 27 +++++++++---------- script/jepsen.garage/src/jepsen/garage/set.clj | 30 +++++++++++++-------- 5 files changed, 70 insertions(+), 30 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 1bba32ec..5cb98e4d 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -31,6 +31,39 @@ lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrenc lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2 ``` +## Results + +**Register linear, without timestamp patch** + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100` + +Results: fails with a simple clock-scramble nemesis. + +Explanation: without the timestamp patch, nodes will create objects using their +local clock only as a timestamp, so the ordering will be all over the place if +clocks are scrambled. + +**Register linear, with timestamp patch** + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100 -I` + +Results: + +- No failure with clock-scramble nemesis +- Fails with clock-scramble nemesis + partition nemesis + +Explanation: S3 objects are not meant to behave like linearizable registers. TODO explain using a counter-example + +**Read-after-write CRDT register model**: TODO: determine the expected semantics of such a register, code a checker and show that results are correct + +**Set, basic test** + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload set1 --ops-per-key 100` + +Results: + +- ListObjects returns objects not within prefix???? + ## License Copyright © 2023 Alex Auvolat diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 891fdf12..c8865248 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -56,12 +56,12 @@ (gen/stagger (/ (:rate opts))) (gen/nemesis (cycle [(gen/sleep 5) - ;{:type :info, :f :partition-start} - ;(gen/sleep 5) + {:type :info, :f :partition-start} + (gen/sleep 5) {:type :info, :f :clock-scramble} (gen/sleep 5) - ;{:type :info, :f :partition-stop} - ;(gen/sleep 5) + {:type :info, :f :partition-stop} + (gen/sleep 5) {:type :info, :f :clock-scramble}])) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index 2dcfa94f..f924dcd6 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -43,7 +43,7 @@ "rpc_bind_addr = \"0.0.0.0:3901\"\n" "rpc_public_addr = \"" node ":3901\"\n" "db_engine = \"lmdb\"\n" - "replication_mode = \"3\"\n" + "replication_mode = \"2\"\n" "data_dir = \"" data-dir "\"\n" "metadata_dir = \"" meta-dir "\"\n" "[s3_api]\n" diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index d5404c6b..f3d5cec5 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -9,6 +9,7 @@ [generator :as gen] [independent :as independent] [nemesis :as nemesis] + [util :as util] [tests :as tests]] [jepsen.checker.timeline :as timeline] [jepsen.control.util :as cu] @@ -25,33 +26,31 @@ (defrecord RegClient [creds] client/Client (open! [this test node] - (let [creds (grg/creds node)] - (info node "s3 credentials:" creds) - (assoc this :creds creds))) + (assoc this :creds (grg/creds node))) (setup! [this test]) (invoke! [this test op] (let [[k v] (:value op)] (case (:f op) :read - (try+ + (util/timeout + 10000 + (assoc op :type :fail, :error ::timeout) (let [value (s3/get (:creds this) k)] - (assoc op :type :ok, :value (independent/tuple k value))) - (catch (re-find #"Unavailable" (.getMessage %)) ex - (assoc op :type :fail, :error [:s3-error (.getMessage ex)]))) + (assoc op :type :ok, :value (independent/tuple k value)))) :write - (try+ + (util/timeout + 10000 + (assoc op :type :info, :error ::timeout) (do (s3/put (:creds this) k v) - (assoc op :type :ok)) - (catch (re-find #"Unavailable" (.getMessage %)) ex - (assoc op :type :fail, :error [:s3-error (.getMessage ex)])))))) + (assoc op :type :ok)))))) (teardown! [this test]) (close! [this test])) (defn workload "Tests linearizable reads and writes" [opts] - {:client (client/timeout 10 (RegClient. nil)) + {:client (RegClient. nil) :checker (independent/checker (checker/compose {:linear (checker/linearizable @@ -59,8 +58,8 @@ :algorithm :linear}) :timeline (timeline/html)})) :generator (independent/concurrent-generator - (/ (:concurrency opts) 10) ; divide threads in 10 groups - (range) ; working on 10 keys + 10 + (range) (fn [k] (->> (gen/mix [op-get op-put op-del]) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index 6bbc1ee0..ff597095 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -11,6 +11,7 @@ [generator :as gen] [independent :as independent] [nemesis :as nemesis] + [util :as util] [tests :as tests]] [jepsen.checker.timeline :as timeline] [jepsen.control.util :as cu] @@ -26,23 +27,29 @@ (defrecord SetClient [creds] client/Client (open! [this test node] - (let [creds (grg/creds node)] - (info node "s3 credentials:" creds) - (assoc this :creds creds))) + (assoc this :creds (grg/creds node))) (setup! [this test]) (invoke! [this test op] (let [[k v] (:value op) prefix (str "set" k "/")] (case (:f op) :add - (do - (s3/put (:creds this) (str prefix v) "present") - (assoc op :type :ok)) + (util/timeout + 10000 + (assoc op :type :info, :error ::timeout) + (do + (s3/put (:creds this) (str prefix v) "present") + (assoc op :type :ok))) :read - (let [items (s3/list (:creds this) prefix) - items-stripped (map (fn [o] (str/replace-first o prefix "")) items) - items-set (set (map read-string items-stripped))] - (assoc op :type :ok, :value (independent/tuple k items-set)))))) + (util/timeout + 10000 + (assoc op :type :fail, :error ::timeout) + (let [items (s3/list (:creds this) prefix) + items-stripped (map (fn [o] + (assert (str/starts-with? o prefix)) + (str/replace-first o prefix "")) items) + items-set (set (map parse-long items-stripped))] + (assoc op :type :ok, :value (independent/tuple k items-set))))))) (teardown! [this test]) (close! [this test])) @@ -110,6 +117,7 @@ 10 (range) (fn [k] - (gen/mix [op-add-rand100 op-read])))}) + (->> (gen/mix [op-add-rand100 op-read]) + (gen/limit (:ops-per-key opts)))))}) -- cgit v1.2.3 From da8b1707489b70c25395ee49383ecbbd8c9f9404 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 19 Oct 2023 16:45:24 +0200 Subject: jepsen: investigating listobjects error --- script/jepsen.garage/README.md | 11 ++++++++++- script/jepsen.garage/jaeger.sh | 13 +++++++++++++ script/jepsen.garage/src/jepsen/garage/daemon.clj | 7 +++++-- script/jepsen.garage/src/jepsen/garage/s3api.clj | 1 + script/jepsen.garage/src/jepsen/garage/set.clj | 17 ++++++++++------- 5 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 script/jepsen.garage/jaeger.sh diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 5cb98e4d..f6fb3a59 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -62,7 +62,16 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 -- Results: -- ListObjects returns objects not within prefix???? +- ListObjects returns objects not within prefix???? -> BAD, definitely a bug, but maybe it's in the instrumentation code? + +In `store/garage set1/20231019T163358.615+0200`: + +``` +INFO [2023-10-19 16:35:20,977] clojure-agent-send-off-pool-207 - jepsen.garage.set list results for prefix set20/ : (set13/0 set13/1 set13/10 set13/11 set13/12 set13/13 set13/14 set13/15 set13/16 set13/17 set13/18 set13/19 set13/2 set13/20 set13/21 set13/22 set13/23 set13/24 set13/25 set13/26 set13/27 set13/28 set13/29 set13/3 set13/30 set13/31 set13/32 set13/33 set13/34 set13/35 set13/36 set13/37 set13/38 set13/39 set13/4 set13/40 set13/41 set13/42 set13/43 set13/44 set13/45 set13/46 set13/47 set13/48 set13/49 set13/5 set13/50 set13/51 set13/52 set13/53 set13/54 set13/55 set13/56 set13/57 set13/58 set13/59 set13/6 set13/60 set13/61 set13/62 set13/63 set13/64 set13/65 set13/66 set13/67 set13/68 set13/69 set13/7 set13/70 set13/71 set13/72 set13/73 set13/74 set13/75 set13/76 set13/77 set13/78 set13/79 set13/8 set13/80 set13/81 set13/82 set13/83 set13/84 set13/85 set13/86 set13/87 set13/88 set13/89 set13/9 set13/90 set13/91 set13/92 set13/93 set13/94 set13/95 set13/96 set13/97 set13/98 set13/99) (node: http://192.168.56.25:3900 ) + +``` + +- Sometimes ListObjects returns an empty list???? -> BAD, quorums should ensure this doesn't happen ## License diff --git a/script/jepsen.garage/jaeger.sh b/script/jepsen.garage/jaeger.sh new file mode 100644 index 00000000..7f67b61b --- /dev/null +++ b/script/jepsen.garage/jaeger.sh @@ -0,0 +1,13 @@ +docker stop jaeger +docker rm jaeger + +# UI is on localhost:16686 +# otel-grpc collector is on localhost:4317 +# otel-http collector is on localhost:4318 + +docker run -d --name jaeger \ + -e COLLECTOR_OTLP_ENABLED=true \ + -p 4317:4317 \ + -p 4318:4318 \ + -p 16686:16686 \ + jaegertracing/all-in-one:1.50 diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index f924dcd6..7c581ba1 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -53,7 +53,8 @@ "api_bind_addr = \"0.0.0.0:3902\"\n" "[admin]\n" "api_bind_addr = \"0.0.0.0:3903\"\n" - "admin_token = \"" admin-token "\"\n") + "admin_token = \"" admin-token "\"\n" + "trace_sink = \"http://192.168.56.1:4317\"\n") "/etc/garage.toml")))) (defn connect-node! @@ -94,7 +95,8 @@ (cu/start-daemon! {:logfile logfile :pidfile pidfile - :chdir base-dir} + :chdir base-dir + :env {:RUST_LOG "garage=debug,garage_api=trace"}} binary :server) (c/exec :sleep 3) @@ -113,6 +115,7 @@ (info node "tearing down garage" version) (c/su (cu/stop-daemon! binary pidfile) + (c/exec :rm :-rf logfile) (c/exec :rm :-rf data-dir) (c/exec :rm :-rf meta-dir))) diff --git a/script/jepsen.garage/src/jepsen/garage/s3api.clj b/script/jepsen.garage/src/jepsen/garage/s3api.clj index 239d5423..977de7dc 100644 --- a/script/jepsen.garage/src/jepsen/garage/s3api.clj +++ b/script/jepsen.garage/src/jepsen/garage/s3api.clj @@ -42,6 +42,7 @@ new-object-summaries (:object-summaries list-result) new-objects (map (fn [d] (:key d)) new-object-summaries) objects (concat new-objects accum)] + (info (:endpoint creds) "ListObjectsV2 prefix(" prefix "), ct(" ct "): " new-objects) (if (:truncated? list-result) (list-inner (:next-continuation-token list-result) objects) objects))) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index ff597095..8a1ab83f 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -44,12 +44,13 @@ (util/timeout 10000 (assoc op :type :fail, :error ::timeout) - (let [items (s3/list (:creds this) prefix) - items-stripped (map (fn [o] + (let [items (s3/list (:creds this) prefix)] + (info "list results for prefix" prefix ":" items " (node:" (:endpoint (:creds this)) ")") + (let [items-stripped (map (fn [o] (assert (str/starts-with? o prefix)) (str/replace-first o prefix "")) items) - items-set (set (map parse-long items-stripped))] - (assoc op :type :ok, :value (independent/tuple k items-set))))))) + items-set (set (map parse-long items-stripped))] + (assoc op :type :ok, :value (independent/tuple k items-set)))))))) (teardown! [this test]) (close! [this test])) @@ -100,9 +101,11 @@ (->> (range) (map (fn [x] {:type :invoke, :f :add, :value x})) (gen/limit (:ops-per-key opts))))) - :final-generator (independent/sequential-generator - (range 100) - (fn [k] (gen/once op-read)))}) + :final-generator (gen/phases + (independent/sequential-generator + (range 100) + (fn [k] (gen/once op-read))) + (gen/sleep 5))}) (defn workload2 "Tests insertions and deletions" -- cgit v1.2.3 From ef662822c9e48ff7cfd9300590617e089c0a9498 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 19 Oct 2023 23:40:55 +0200 Subject: jepsen: fix the list-objects call (?) --- script/jepsen.garage/README.md | 37 +++++++++++++++++++----- script/jepsen.garage/src/jepsen/garage/s3api.clj | 27 ++++++++--------- script/jepsen.garage/src/jepsen/garage/set.clj | 16 +++++----- 3 files changed, 52 insertions(+), 28 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index f6fb3a59..8dcd3766 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -33,7 +33,7 @@ lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrenc ## Results -**Register linear, without timestamp patch** +### Register linear, without timestamp patch Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100` @@ -43,7 +43,7 @@ Explanation: without the timestamp patch, nodes will create objects using their local clock only as a timestamp, so the ordering will be all over the place if clocks are scrambled. -**Register linear, with timestamp patch** +### Register linear, with timestamp patch Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100 -I` @@ -54,24 +54,45 @@ Results: Explanation: S3 objects are not meant to behave like linearizable registers. TODO explain using a counter-example -**Read-after-write CRDT register model**: TODO: determine the expected semantics of such a register, code a checker and show that results are correct +### Read-after-write CRDT register model -**Set, basic test** +TODO: determine the expected semantics of such a register, code a checker and show that results are correct -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload set1 --ops-per-key 100` +### Set, basic test (write some items, then read) + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set1 --ops-per-key 100` Results: -- ListObjects returns objects not within prefix???? -> BAD, definitely a bug, but maybe it's in the instrumentation code? +- For now, no failures with clock-scramble nemesis + partition nemesis + +### Set, continuous test (interspersed reads and writes) + +TODO + +TODO: nemesis that reconfigures the cluster with a different subset of nodes, to have requests that occur during a resync period. + + +## Investigating (and fixing) wierd behavior + +### Segfaults + +They are due to the download being interrupted in the middle (^C during first launch on clean VMs), the `garage` binary is truncated. +Add `:force?` to the `cached-wget!` call in `daemon.clj` to re-download the binary. + +### In `jepsen.garage`: prefix wierdness In `store/garage set1/20231019T163358.615+0200`: ``` INFO [2023-10-19 16:35:20,977] clojure-agent-send-off-pool-207 - jepsen.garage.set list results for prefix set20/ : (set13/0 set13/1 set13/10 set13/11 set13/12 set13/13 set13/14 set13/15 set13/16 set13/17 set13/18 set13/19 set13/2 set13/20 set13/21 set13/22 set13/23 set13/24 set13/25 set13/26 set13/27 set13/28 set13/29 set13/3 set13/30 set13/31 set13/32 set13/33 set13/34 set13/35 set13/36 set13/37 set13/38 set13/39 set13/4 set13/40 set13/41 set13/42 set13/43 set13/44 set13/45 set13/46 set13/47 set13/48 set13/49 set13/5 set13/50 set13/51 set13/52 set13/53 set13/54 set13/55 set13/56 set13/57 set13/58 set13/59 set13/6 set13/60 set13/61 set13/62 set13/63 set13/64 set13/65 set13/66 set13/67 set13/68 set13/69 set13/7 set13/70 set13/71 set13/72 set13/73 set13/74 set13/75 set13/76 set13/77 set13/78 set13/79 set13/8 set13/80 set13/81 set13/82 set13/83 set13/84 set13/85 set13/86 set13/87 set13/88 set13/89 set13/9 set13/90 set13/91 set13/92 set13/93 set13/94 set13/95 set13/96 set13/97 set13/98 set13/99) (node: http://192.168.56.25:3900 ) - ``` -- Sometimes ListObjects returns an empty list???? -> BAD, quorums should ensure this doesn't happen +After inspecting, the actual S3 call made was with prefix "set13/", so at least this is not an error in Garage itself but in the jepsen code. + +Finally found out that this was due to closures not correctly capturing their context in the list function in s3api.clj (wtf clojure?) +Not sure exactly where it came from but it seems to have been fixed by making list-inner a separate function and not a sub-function, +and passing all values that were previously in the context (creds and prefix) as additional arguments. ## License diff --git a/script/jepsen.garage/src/jepsen/garage/s3api.clj b/script/jepsen.garage/src/jepsen/garage/s3api.clj index 977de7dc..4f292ac0 100644 --- a/script/jepsen.garage/src/jepsen/garage/s3api.clj +++ b/script/jepsen.garage/src/jepsen/garage/s3api.clj @@ -31,19 +31,20 @@ :input-stream bytes-stream :metadata {:content-length (count some-bytes)})))) +(defn list-inner [creds prefix ct accum] + (let [list-result (s3/list-objects-v2 creds + {:bucket-name (:bucket creds) + :prefix prefix + :continuation-token ct}) + new-object-summaries (:object-summaries list-result) + new-objects (map (fn [d] (:key d)) new-object-summaries) + objects (concat new-objects accum)] + (info (:endpoint creds) "ListObjectsV2 prefix(" prefix "), ct(" ct "): " new-objects) + (if (:truncated? list-result) + (list-inner creds prefix (:next-continuation-token list-result) objects) + objects))) (defn list "Helper for ListObjects -- just lists everything in the bucket" [creds prefix] - (defn list-inner [ct accum] - (let [list-result (s3/list-objects-v2 creds - {:bucket-name (:bucket creds) - :prefix prefix - :continuation-token ct}) - new-object-summaries (:object-summaries list-result) - new-objects (map (fn [d] (:key d)) new-object-summaries) - objects (concat new-objects accum)] - (info (:endpoint creds) "ListObjectsV2 prefix(" prefix "), ct(" ct "): " new-objects) - (if (:truncated? list-result) - (list-inner (:next-continuation-token list-result) objects) - objects))) - (list-inner nil [])) + (info "in s3/list creds:" creds ", prefix:" prefix) + (list-inner creds prefix nil [])) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index 8a1ab83f..c5387462 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -44,13 +44,15 @@ (util/timeout 10000 (assoc op :type :fail, :error ::timeout) - (let [items (s3/list (:creds this) prefix)] - (info "list results for prefix" prefix ":" items " (node:" (:endpoint (:creds this)) ")") - (let [items-stripped (map (fn [o] - (assert (str/starts-with? o prefix)) - (str/replace-first o prefix "")) items) - items-set (set (map parse-long items-stripped))] - (assoc op :type :ok, :value (independent/tuple k items-set)))))))) + (do + (info "call s3/list creds: " (:creds this) ", prefix:" prefix) + (let [items (s3/list (:creds this) prefix)] + (info "list results for prefix" prefix ":" items " (node:" (:endpoint (:creds this)) ")") + (let [items-stripped (map (fn [o] + (assert (str/starts-with? o prefix)) + (str/replace-first o prefix "")) items) + items-set (set (map parse-long items-stripped))] + (assoc op :type :ok, :value (independent/tuple k items-set))))))))) (teardown! [this test]) (close! [this test])) -- cgit v1.2.3 From 4ba18ce9cca1b828edcf3f8c8770d49c75ed3083 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 12:13:11 +0200 Subject: jepsen: wip checker for register-like behavior --- script/jepsen.garage/src/jepsen/garage/reg.clj | 66 ++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index f3d5cec5..b5bf28ff 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -1,6 +1,7 @@ (ns jepsen.garage.reg (:require [clojure.tools.logging :refer :all] [clojure.string :as str] + [clojure.set :as set] [jepsen [checker :as checker] [cli :as cli] [client :as client] @@ -20,7 +21,7 @@ [slingshot.slingshot :refer [try+]])) (defn op-get [_ _] {:type :invoke, :f :read, :value nil}) -(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 9))}) +(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 99))}) (defn op-del [_ _] {:type :invoke, :f :write, :value nil}) (defrecord RegClient [creds] @@ -47,15 +48,72 @@ (teardown! [this test]) (close! [this test])) +(defn reg-read-after-write + "Read-after-Write checker for register operations" + [] + (reify checker/Checker + (check [this test history opts] + (let [init {:put-values {-1 nil} + :put-done #{-1} + :put-in-progress {} + :read-can-contain {} + :bad-reads #{}} + final (reduce + (fn [state op] + (let [current-values (set/union + (set (map (fn [idx] (get (:put-values state) idx)) (:put-done state))) + (set (map (fn [[_ [idx _]]] (get (:put-values state) idx)) (:put-in-progress state)))) + read-can-contain (reduce + (fn [rcc [idx v]] (assoc rcc idx (set/union current-values v))) + {} (:read-can-contain state))] + (info "--------") + (info "state: " state) + (info "current-values: " current-values) + (info "read-can-contain: " read-can-contain) + (info "op: " op) + (case [(:type op) (:f op)] + ([:invoke :write]) + (assoc state + :read-can-contain read-can-contain + :put-values (assoc (:put-values state) (:index op) (:value op)) + :put-in-progress (assoc (:put-in-progress state) (:process op) [(:index op) (:put-done state)])) + ([:ok :write]) + (let [[index overwrites] (get (:put-in-progress state) (:process op))] + (assoc state + :read-can-contain read-can-contain + :put-in-progress (dissoc (:put-in-progress state) (:process op)) + :put-done + (conj + (set/difference (:put-done state) overwrites) + index))) + ([:invoke :read]) + (assoc state + :read-can-contain (assoc read-can-contain (:process op) current-values)) + ([:ok :read]) + (let [this-read-can-contain (get read-can-contain (:process op)) + bad-reads (if (contains? this-read-can-contain (:value op)) + (:bad-reads state) + (conj (:bad-reads state) [(:process op) (:index op) (:value op) this-read-can-contain]))] + (info "this-read-can-contain: " this-read-can-contain) + (assoc state + :read-can-contain (dissoc read-can-contain (:process op)) + :bad-reads bad-reads)) + state))) + init history) + valid? (empty? (:bad-reads final))] + (assoc final :valid? valid?))))) + (defn workload "Tests linearizable reads and writes" [opts] {:client (RegClient. nil) :checker (independent/checker (checker/compose - {:linear (checker/linearizable - {:model (model/register) - :algorithm :linear}) + {:reg-read-after-write (reg-read-after-write) + ; linear test is desactivated, indeed Garage is not linear + ;:linear (checker/linearizable + ; {:model (model/register) + ; :algorithm :linear}) :timeline (timeline/html)})) :generator (independent/concurrent-generator 10 -- cgit v1.2.3 From 4b93ce179a3777c8461f3b5843dc3802bddc739c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 12:56:45 +0200 Subject: jepsen: errors in reg2 workload under investigation --- script/jepsen.garage/README.md | 25 +++++++++++++++------ script/jepsen.garage/src/jepsen/garage.clj | 3 ++- script/jepsen.garage/src/jepsen/garage/reg.clj | 30 +++++++++++++++++--------- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 8dcd3766..762901fe 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -35,7 +35,7 @@ lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrenc ### Register linear, without timestamp patch -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg1 --ops-per-key 100` Results: fails with a simple clock-scramble nemesis. @@ -45,7 +45,7 @@ clocks are scrambled. ### Register linear, with timestamp patch -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100 -I` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg1 --ops-per-key 100 -I` Results: @@ -54,9 +54,23 @@ Results: Explanation: S3 objects are not meant to behave like linearizable registers. TODO explain using a counter-example -### Read-after-write CRDT register model +### Read-after-write CRDT register model, without timestamp patch + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload reg2 --ops-per-key 100` + +Results: fails with a simple clock-scramble nemesis. + +Explanation: old values are not overwritten correctly when their timestamps are in the future. + +### Read-after-write CRDT register model, with timestamp patch + +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload reg2 --ops-per-key 100 -I` + +Results: + +- Failures with clock-scramble nemesis + partition nemesis ???? TODO INVESTIGATE +- TODO: layout reconfiguration nemesis -TODO: determine the expected semantics of such a register, code a checker and show that results are correct ### Set, basic test (write some items, then read) @@ -65,13 +79,12 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 - Results: - For now, no failures with clock-scramble nemesis + partition nemesis +- TODO: layout reconfiguration nemesis ### Set, continuous test (interspersed reads and writes) TODO -TODO: nemesis that reconfigures the cluster with a different subset of nodes, to have requests that occur during a resync period. - ## Investigating (and fixing) wierd behavior diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index c8865248..be192a7f 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -15,7 +15,8 @@ (def workloads "A map of workload names to functions that construct workloads, given opts." - {"reg" reg/workload + {"reg1" reg/workload1 + "reg2" reg/workload2 "set1" set/workload1 "set2" set/workload2}) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index b5bf28ff..6772abfe 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -103,18 +103,10 @@ valid? (empty? (:bad-reads final))] (assoc final :valid? valid?))))) -(defn workload - "Tests linearizable reads and writes" +(defn workload-common + "Common parts of workload" [opts] {:client (RegClient. nil) - :checker (independent/checker - (checker/compose - {:reg-read-after-write (reg-read-after-write) - ; linear test is desactivated, indeed Garage is not linear - ;:linear (checker/linearizable - ; {:model (model/register) - ; :algorithm :linear}) - :timeline (timeline/html)})) :generator (independent/concurrent-generator 10 (range) @@ -123,4 +115,22 @@ (gen/mix [op-get op-put op-del]) (gen/limit (:ops-per-key opts)))))}) +(defn workload1 + "Tests linearizable reads and writes" + [opts] + (assoc (workload-common opts) + :checker (independent/checker + (checker/compose + {:linear (checker/linearizable + {:model (model/register) + :algorithm :linear}) + :timeline (timeline/html)})))) +(defn workload2 + "Tests CRDT reads and writes" + [opts] + (assoc (workload-common opts) + :checker (independent/checker + (checker/compose + {:reg-read-after-write (reg-read-after-write) + :timeline (timeline/html)})))) -- cgit v1.2.3 From d148b83d4f440dc79b2ed08eaa171aca0e2037b0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 13:36:48 +0200 Subject: jepsen: reg2 failure seems to happen only with deleteobject --- script/jepsen.garage/README.md | 20 +++++++++++++++++++- script/jepsen.garage/src/jepsen/garage.clj | 14 +++++++++----- script/jepsen.garage/src/jepsen/garage/reg.clj | 3 ++- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 762901fe..da6f0b77 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -69,6 +69,8 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 - Results: - Failures with clock-scramble nemesis + partition nemesis ???? TODO INVESTIGATE + -> the issue seems to be only after DeleteObject (deletions are not always taken into account), + the issue does not appear if we are using only PutObject with an actual object content - TODO: layout reconfiguration nemesis @@ -86,7 +88,7 @@ Results: TODO -## Investigating (and fixing) wierd behavior +## Investigating (and fixing) errors ### Segfaults @@ -107,6 +109,22 @@ Finally found out that this was due to closures not correctly capturing their co Not sure exactly where it came from but it seems to have been fixed by making list-inner a separate function and not a sub-function, and passing all values that were previously in the context (creds and prefix) as additional arguments. +### `reg2` test inconsistency, even with timestamp fix + +The reg2 test is our custom checker for CRDT read-after-write on individual object keys, acting as registers which can be updated. +The test fails without the timestamp fix, which is expected as the clock scrambler will prevent nodes from having a correct ordering of objects. + +With the timestamp fix, the happenned-before relationship should at least be respected, meaning that when a PutObject call starts +after another PutObject call has ended, the second call should overwrite the value of the first call, and that value should not be +readable by future GetObject calls. +However, we observed inconsistencies even with the timestamp fix. + +The inconsistencies seemed to always happenned after writing a nil value, which translates to a DeleteObject call +instead of a PutObject. By removing the possibility of writing nil values, therefore only doing +PutObject calls, the issue disappears. There is therefore an issue to fix in DeleteObject. + + + ## License Copyright © 2023 Alex Auvolat diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index be192a7f..ce02b7f7 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -20,10 +20,16 @@ "set1" set/workload1 "set2" set/workload2}) +(def patches + "A map of patch names to Garage builds" + {"default" "v0.9.0" + "tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09"}) + (def cli-opts "Additional command line options." - [["-I" "--increasing-timestamps" "Garage version with increasing timestamps on PutObject" - :default false] + [["-p" "--patch NAME" "Garage patch to use" + :default "default" + :validate [patches (cli/one-of patches)]] ["-r" "--rate HZ" "Approximate number of requests per second, per thread." :default 10 :parse-fn read-string @@ -41,9 +47,7 @@ :concurrency, ...), constructs a test map." [opts] (let [workload ((get workloads (:workload opts)) opts) - garage-version (if (:increasing-timestamps opts) - "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" - "v0.9.0")] + garage-version (get patches (:patch opts))] (merge tests/noop-test opts {:pure-generators true diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index 6772abfe..ecc96590 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -112,7 +112,8 @@ (range) (fn [k] (->> - (gen/mix [op-get op-put op-del]) + ; (gen/mix [op-get op-put op-del]) + (gen/mix [op-get op-put]) (gen/limit (:ops-per-key opts)))))}) (defn workload1 -- cgit v1.2.3 From f5b09727815523a1bd4ba5f62d892b2b45b5bed6 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 15:00:10 +0200 Subject: jepsen: register crdt read-after-write is fixed with deleteobject patch --- script/jepsen.garage/README.md | 11 +++++------ script/jepsen.garage/Vagrantfile | 1 + script/jepsen.garage/src/jepsen/garage.clj | 3 ++- script/jepsen.garage/src/jepsen/garage/reg.clj | 3 +-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index da6f0b77..4c3c70b3 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -45,7 +45,7 @@ clocks are scrambled. ### Register linear, with timestamp patch -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg1 --ops-per-key 100 -I` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg1 --ops-per-key 100 --patch tsfix1` Results: @@ -62,15 +62,13 @@ Results: fails with a simple clock-scramble nemesis. Explanation: old values are not overwritten correctly when their timestamps are in the future. -### Read-after-write CRDT register model, with timestamp patch +### Read-after-write CRDT register model, with timestamp patch (v2 with DeleteObject fix as well) -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload reg2 --ops-per-key 100 -I` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload reg2 --ops-per-key 100 --patch tsfix2` Results: -- Failures with clock-scramble nemesis + partition nemesis ???? TODO INVESTIGATE - -> the issue seems to be only after DeleteObject (deletions are not always taken into account), - the issue does not appear if we are using only PutObject with an actual object content +- No failures with clock-scramble nemesis + partition nemesis - TODO: layout reconfiguration nemesis @@ -123,6 +121,7 @@ The inconsistencies seemed to always happenned after writing a nil value, which instead of a PutObject. By removing the possibility of writing nil values, therefore only doing PutObject calls, the issue disappears. There is therefore an issue to fix in DeleteObject. +The issue in DeleteObject seems to have been fixed by commit `c82d91c6bccf307186332b6c5c6fc0b128b1b2b1` ## License diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile index c40c600d..d0e545d3 100644 --- a/script/jepsen.garage/Vagrantfile +++ b/script/jepsen.garage/Vagrantfile @@ -13,6 +13,7 @@ Vagrant.configure("2") do |config| config.vm.provider "virtualbox" do |vb| vb.gui = false vb.memory = "512" + vb.customize ["modifyvm", :id, "--vram=12"] end config.vm.provision "shell", inline: <<-SHELL diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index ce02b7f7..a566d9be 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -23,7 +23,8 @@ (def patches "A map of patch names to Garage builds" {"default" "v0.9.0" - "tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09"}) + "tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" + "tsfix2" "c82d91c6bccf307186332b6c5c6fc0b128b1b2b1"}) (def cli-opts "Additional command line options." diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index ecc96590..6772abfe 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -112,8 +112,7 @@ (range) (fn [k] (->> - ; (gen/mix [op-get op-put op-del]) - (gen/mix [op-get op-put]) + (gen/mix [op-get op-put op-del]) (gen/limit (:ops-per-key opts)))))}) (defn workload1 -- cgit v1.2.3 From 654775308ed03abd68941b07b2ad367a4de5c57f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 15:48:37 +0200 Subject: jepsen: add cluster reconfiguration nemesis --- script/jepsen.garage/Vagrantfile | 1 + script/jepsen.garage/nodes.vagrant | 1 + script/jepsen.garage/src/jepsen/garage.clj | 27 ++++--- script/jepsen.garage/src/jepsen/garage/nemesis.clj | 91 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 14 deletions(-) create mode 100644 script/jepsen.garage/src/jepsen/garage/nemesis.clj diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile index d0e545d3..1125bccf 100644 --- a/script/jepsen.garage/Vagrantfile +++ b/script/jepsen.garage/Vagrantfile @@ -27,4 +27,5 @@ Vagrant.configure("2") do |config| config.vm.define "n3" do |config| vm(config, "n3", "192.168.56.23") end config.vm.define "n4" do |config| vm(config, "n4", "192.168.56.24") end config.vm.define "n5" do |config| vm(config, "n5", "192.168.56.25") end + config.vm.define "n6" do |config| vm(config, "n6", "192.168.56.26") end end diff --git a/script/jepsen.garage/nodes.vagrant b/script/jepsen.garage/nodes.vagrant index df7c4622..3f7e2b42 100644 --- a/script/jepsen.garage/nodes.vagrant +++ b/script/jepsen.garage/nodes.vagrant @@ -3,3 +3,4 @@ 192.168.56.23 192.168.56.24 192.168.56.25 +192.168.56.26 diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index a566d9be..65a92a76 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -10,6 +10,7 @@ [jepsen.os.debian :as debian] [jepsen.garage [daemon :as grg] + [nemesis :as grgNemesis] [reg :as reg] [set :as set]])) @@ -20,6 +21,11 @@ "set1" set/workload1 "set2" set/workload2}) +(def scenari + "A map of scenari to the associated nemesis" + {"cp" grgNemesis/scenario-cp + "r" grgNemesis/scenario-r}) + (def patches "A map of patch names to Garage builds" {"default" "v0.9.0" @@ -31,6 +37,9 @@ [["-p" "--patch NAME" "Garage patch to use" :default "default" :validate [patches (cli/one-of patches)]] + ["-s" "--scenario NAME" "Nemesis scenario to run" + :default "cp" + :validate [scenari (cli/one-of scenari)]] ["-r" "--rate HZ" "Approximate number of requests per second, per thread." :default 10 :parse-fn read-string @@ -40,7 +49,7 @@ :parse-fn parse-long :validate [pos? "Must be a positive integer."]] ["-w" "--workload NAME" "Workload of test to run" - :default "reg" + :default "reg1" :validate [workloads (cli/one-of workloads)]]]) (defn garage-test @@ -48,6 +57,7 @@ :concurrency, ...), constructs a test map." [opts] (let [workload ((get workloads (:workload opts)) opts) + scenario ((get scenari (:scenario opts)) opts) garage-version (get patches (:patch opts))] (merge tests/noop-test opts @@ -60,25 +70,14 @@ (->> (:generator workload) (gen/stagger (/ (:rate opts))) - (gen/nemesis - (cycle [(gen/sleep 5) - {:type :info, :f :partition-start} - (gen/sleep 5) - {:type :info, :f :clock-scramble} - (gen/sleep 5) - {:type :info, :f :partition-stop} - (gen/sleep 5) - {:type :info, :f :clock-scramble}])) + (gen/nemesis (:generator scenario)) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") (gen/nemesis (gen/once {:type :info, :f :partition-stop})) (gen/log "Waiting for recovery") (gen/sleep 10) (gen/clients (:final-generator workload))) - :nemesis (nemesis/compose - {{:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)}) + :nemesis (:nemesis scenario) :checker (checker/compose {:perf (checker/perf) :workload (:checker workload)}) diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj new file mode 100644 index 00000000..7cd9306e --- /dev/null +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -0,0 +1,91 @@ +(ns jepsen.garage.nemesis + (:require [clojure.tools.logging :refer :all] + [jepsen [control :as c] + [core :as jepsen] + [generator :as gen] + [nemesis :as nemesis]] + [jepsen.garage.daemon :as grg] + [jepsen.control.util :as cu])) + +(defn configure-present! + "Configure node to be active in new cluster layout" + [test node] + (info "configure-present!" node) + (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] + (c/on + (jepsen/primary test) + (c/exec grg/binary :layout :assign (subs node-id 0 16) :-c :1G)))) + +(defn configure-absent! + "Configure node to be active in new cluster layout" + [test node] + (info "configure-absent!" node) + (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] + (c/on + (jepsen/primary test) + (c/exec grg/binary :layout :assign (subs node-id 0 16) :-g)))) + +(defn finalize-config! + "Apply the proposed cluster layout" + [test] + (let [layout-show (c/on (jepsen/primary test) (c/exec grg/binary :layout :show)) + [_ layout-next-version] (re-find #"apply --version (\d+)\n" layout-show)] + (info "layout show: " layout-show "; next-version: " layout-next-version) + (c/on (jepsen/primary test) + (c/exec grg/binary :layout :apply :--version layout-next-version)))) + +(defn reconfigure-subset + "Reconfigure cluster with only a subset of nodes" + [cnt] + (reify nemesis/Nemesis + (setup! [this test] this) + + (invoke! [this test op] op + (case (:f op) + :start + (let [[keep-nodes remove-nodes] + (->> (:nodes test) + shuffle + (split-at cnt))] + (info "layout split: keep " keep-nodes ", remove " remove-nodes) + (run! #(configure-present! test %) keep-nodes) + (run! #(configure-absent! test %) remove-nodes) + (finalize-config! test) + (assoc op :value keep-nodes)) + :stop + (do + (info "layout un-split: all nodes=" (:nodes test)) + (run! #(configure-present! test %) (:nodes test)) + (finalize-config! test) + (assoc op :value (:nodes test))))) + + (teardown! [this test] this))) + +(defn scenario-cp + "Clock scramble + parittion scenario" + [opts] + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :partition-start} + (gen/sleep 5) + {:type :info, :f :clock-scramble} + (gen/sleep 5) + {:type :info, :f :partition-stop} + (gen/sleep 5) + {:type :info, :f :clock-scramble}]) + :nemesis (nemesis/compose + {{:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + +(defn scenario-r + "Cluster reconfiguration scenario" + [opts] + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-stop}]) + :nemesis (nemesis/compose + {{:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)})}) -- cgit v1.2.3 From 9030c1eef8f4c7c3435835f15e6b236497f93323 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 15:53:46 +0200 Subject: jepsen: code path for nemesis final generator --- script/jepsen.garage/src/jepsen/garage.clj | 2 +- script/jepsen.garage/src/jepsen/garage/nemesis.clj | 44 +++++++++++----------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 65a92a76..ace3e66a 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -73,7 +73,7 @@ (gen/nemesis (:generator scenario)) (gen/time-limit (:time-limit opts))) (gen/log "Healing cluster") - (gen/nemesis (gen/once {:type :info, :f :partition-stop})) + (gen/nemesis (:final-generator scenario)) (gen/log "Waiting for recovery") (gen/sleep 10) (gen/clients (:final-generator workload))) diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj index 7cd9306e..9edfe418 100644 --- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -64,28 +64,30 @@ (defn scenario-cp "Clock scramble + parittion scenario" [opts] - {:generator (cycle [(gen/sleep 5) - {:type :info, :f :partition-start} - (gen/sleep 5) - {:type :info, :f :clock-scramble} - (gen/sleep 5) - {:type :info, :f :partition-stop} - (gen/sleep 5) - {:type :info, :f :clock-scramble}]) - :nemesis (nemesis/compose - {{:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :partition-start} + (gen/sleep 5) + {:type :info, :f :clock-scramble} + (gen/sleep 5) + {:type :info, :f :partition-stop} + (gen/sleep 5) + {:type :info, :f :clock-scramble}]) + :final-generator (gen/once {:type :info, :f :partition-stop}) + :nemesis (nemesis/compose + {{:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) (defn scenario-r "Cluster reconfiguration scenario" [opts] - {:generator (cycle [(gen/sleep 5) - {:type :info, :f :reconfigure-start} - (gen/sleep 5) - {:type :info, :f :reconfigure-start} - (gen/sleep 5) - {:type :info, :f :reconfigure-stop}]) - :nemesis (nemesis/compose - {{:reconfigure-start :start - :reconfigure-stop :stop} (reconfigure-subset 3)})}) + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-start} + (gen/sleep 5) + {:type :info, :f :reconfigure-stop}]) + :final-generator (gen/once {:type :info, :f :reconfigure-stop}) + :nemesis (nemesis/compose + {{:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)})}) -- cgit v1.2.3 From fb6c9a1243bd561d2a0de6b49c8debf37d566473 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 15:55:09 +0200 Subject: jepsen: update readme --- script/jepsen.garage/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 4c3c70b3..684bce87 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -69,7 +69,7 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 - Results: - No failures with clock-scramble nemesis + partition nemesis -- TODO: layout reconfiguration nemesis +- Fails with layout reconfiguration nemesis (TODO: test more and investigate) ### Set, basic test (write some items, then read) @@ -79,7 +79,7 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 - Results: - For now, no failures with clock-scramble nemesis + partition nemesis -- TODO: layout reconfiguration nemesis +- TODO: layout reconfiguration nemesis (does not fail yet! but it should) ### Set, continuous test (interspersed reads and writes) -- cgit v1.2.3 From d2c365767b0a4cb70dcbb1d20b75f41e0f9c20c8 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 24 Oct 2023 11:39:45 +0200 Subject: jepsen: more testing --- script/jepsen.garage/README.md | 71 ++++++++++++++-------- script/jepsen.garage/src/jepsen/garage.clj | 6 +- script/jepsen.garage/src/jepsen/garage/nemesis.clj | 34 ++++++++++- script/jepsen.garage/src/jepsen/garage/s3api.clj | 2 - script/jepsen.garage/src/jepsen/garage/set.clj | 6 +- 5 files changed, 84 insertions(+), 35 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 684bce87..06379d25 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -35,55 +35,74 @@ lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrenc ### Register linear, without timestamp patch -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg1 --ops-per-key 100` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 20 --workload reg1 --ops-per-key 100` -Results: fails with a simple clock-scramble nemesis. +Results without timestamp patch: -Explanation: without the timestamp patch, nodes will create objects using their -local clock only as a timestamp, so the ordering will be all over the place if -clocks are scrambled. +- Fails with a simple clock-scramble nemesis (`--scenario c`). + Explanation: without the timestamp patch, nodes will create objects using their + local clock only as a timestamp, so the ordering will be all over the place if + clocks are scrambled. -### Register linear, with timestamp patch +Results with timestamp patch (`--patch tsfix2`): -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg1 --ops-per-key 100 --patch tsfix1` +- No failure with clock-scramble nemesis -Results: +- Fails with clock-scramble nemesis + partition nemesis (`--scenario cp`). -- No failure with clock-scramble nemesis -- Fails with clock-scramble nemesis + partition nemesis +**This test is expected to fail.** +Indeed, S3 objects are not meant to behave like linearizable registers. +TODO explain using a counter-example -Explanation: S3 objects are not meant to behave like linearizable registers. TODO explain using a counter-example -### Read-after-write CRDT register model, without timestamp patch +### Read-after-write CRDT register model Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload reg2 --ops-per-key 100` -Results: fails with a simple clock-scramble nemesis. +Results without timestamp patch: -Explanation: old values are not overwritten correctly when their timestamps are in the future. +- Fails with a simple clock-scramble nemesis (`--scenario c`). + Explanation: old values are not overwritten correctly when their timestamps are in the future. -### Read-after-write CRDT register model, with timestamp patch (v2 with DeleteObject fix as well) +Results with timestamp patch (`--patch tsfix2`): -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload reg2 --ops-per-key 100 --patch tsfix2` +- No failures with clock-scramble nemesis + partition nemesis (`--scenario cp`). + This proves that `tsfix2` (PR#543) does improve consistency. -Results: - -- No failures with clock-scramble nemesis + partition nemesis -- Fails with layout reconfiguration nemesis (TODO: test more and investigate) +- **Fails with layout reconfiguration nemesis** (`--scenario r`) + (TODO: note down the run id of a failed run) + (TODO: test more and investigate). + This is the failure mode we are looking for and trying to fix for NLnet task 3. ### Set, basic test (write some items, then read) -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set1 --ops-per-key 100` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set1 --ops-per-key 100 --patch tsfix2` Results: -- For now, no failures with clock-scramble nemesis + partition nemesis -- TODO: layout reconfiguration nemesis (does not fail yet! but it should) +- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run + +- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be. + +- **Fails with partition + layout reconfiguration nemesis** (`--scenario pr`) + (TODO: note down the run id of a failed run) + (TODO: test more and investigate). + This is the failure mode we are looking for and trying to fix for NLnet task 3. + ### Set, continuous test (interspersed reads and writes) -TODO +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set2 --ops-per-key 100 --patch tsfix2` + +Results: + +- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run + +- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be. + +- TODO: failures should be achieved with `--scenario pr`? Even with 4 or 5 consecutive test runs, no failures were achieved, why? + (TODO: note down the run id of a failed run) ## Investigating (and fixing) errors @@ -112,7 +131,7 @@ and passing all values that were previously in the context (creds and prefix) as The reg2 test is our custom checker for CRDT read-after-write on individual object keys, acting as registers which can be updated. The test fails without the timestamp fix, which is expected as the clock scrambler will prevent nodes from having a correct ordering of objects. -With the timestamp fix, the happenned-before relationship should at least be respected, meaning that when a PutObject call starts +With the timestamp fix (`--patch tsfix1`), the happenned-before relationship should at least be respected, meaning that when a PutObject call starts after another PutObject call has ended, the second call should overwrite the value of the first call, and that value should not be readable by future GetObject calls. However, we observed inconsistencies even with the timestamp fix. @@ -121,7 +140,7 @@ The inconsistencies seemed to always happenned after writing a nil value, which instead of a PutObject. By removing the possibility of writing nil values, therefore only doing PutObject calls, the issue disappears. There is therefore an issue to fix in DeleteObject. -The issue in DeleteObject seems to have been fixed by commit `c82d91c6bccf307186332b6c5c6fc0b128b1b2b1` +The issue in DeleteObject seems to have been fixed by commit `c82d91c6bccf307186332b6c5c6fc0b128b1b2b1`, which can be used using `--patch tsfix2`. ## License diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index ace3e66a..6d64a1b8 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -23,8 +23,10 @@ (def scenari "A map of scenari to the associated nemesis" - {"cp" grgNemesis/scenario-cp - "r" grgNemesis/scenario-r}) + {"c" grgNemesis/scenario-c + "cp" grgNemesis/scenario-cp + "r" grgNemesis/scenario-r + "pr" grgNemesis/scenario-pr}) (def patches "A map of patch names to Garage builds" diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj index 9edfe418..e64bcaf1 100644 --- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -7,6 +7,8 @@ [jepsen.garage.daemon :as grg] [jepsen.control.util :as cu])) +; ---- reconfiguration nemesis ---- + (defn configure-present! "Configure node to be active in new cluster layout" [test node] @@ -61,8 +63,18 @@ (teardown! [this test] this))) +; ---- nemesis scenari ---- + +(defn scenario-c + "Clock scramble scenario" + [opts] + {:generator (cycle [(gen/sleep 5) + {:type :info, :f :clock-scramble}]) + :nemesis (nemesis/compose + {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + (defn scenario-cp - "Clock scramble + parittion scenario" + "Clock scramble + partition scenario" [opts] {:generator (cycle [(gen/sleep 5) {:type :info, :f :partition-start} @@ -91,3 +103,23 @@ :nemesis (nemesis/compose {{:reconfigure-start :start :reconfigure-stop :stop} (reconfigure-subset 3)})}) + +(defn scenario-pr + "Partition + cluster reconfiguration scenario" + [opts] + {:generator (cycle [(gen/sleep 3) + {:type :info, :f :reconfigure-start} + (gen/sleep 3) + {:type :info, :f :partition-start} + (gen/sleep 3) + {:type :info, :f :reconfigure-start} + (gen/sleep 3) + {:type :info, :f :partition-stop} + (gen/sleep 3) + {:type :info, :f :reconfigure-stop}]) + :final-generator (gen/once {:type :info, :f :partition-stop}) + :nemesis (nemesis/compose + {{:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)})}) diff --git a/script/jepsen.garage/src/jepsen/garage/s3api.clj b/script/jepsen.garage/src/jepsen/garage/s3api.clj index 4f292ac0..531e0157 100644 --- a/script/jepsen.garage/src/jepsen/garage/s3api.clj +++ b/script/jepsen.garage/src/jepsen/garage/s3api.clj @@ -39,12 +39,10 @@ new-object-summaries (:object-summaries list-result) new-objects (map (fn [d] (:key d)) new-object-summaries) objects (concat new-objects accum)] - (info (:endpoint creds) "ListObjectsV2 prefix(" prefix "), ct(" ct "): " new-objects) (if (:truncated? list-result) (list-inner creds prefix (:next-continuation-token list-result) objects) objects))) (defn list "Helper for ListObjects -- just lists everything in the bucket" [creds prefix] - (info "in s3/list creds:" creds ", prefix:" prefix) (list-inner creds prefix nil [])) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index c5387462..f625e672 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -45,9 +45,7 @@ 10000 (assoc op :type :fail, :error ::timeout) (do - (info "call s3/list creds: " (:creds this) ", prefix:" prefix) (let [items (s3/list (:creds this) prefix)] - (info "list results for prefix" prefix ":" items " (node:" (:endpoint (:creds this)) ")") (let [items-stripped (map (fn [o] (assert (str/starts-with? o prefix)) (str/replace-first o prefix "")) items) @@ -115,8 +113,8 @@ {:client (SetClient. nil) :checker (independent/checker (checker/compose - {:set-full (checker/set-full {:linearizable? false}) - :set-read-after-write (set-read-after-write) + {:set-read-after-write (set-read-after-write) + ; :set-full (checker/set-full {:linearizable? false}) :timeline (timeline/html)})) :generator (independent/concurrent-generator 10 -- cgit v1.2.3 From d13bde5e26098313e789dd3793368a635cf1cc16 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 24 Oct 2023 15:44:05 +0200 Subject: jepsen: set1 and set2 don't fail anymore ?? --- script/jepsen.garage/README.md | 20 +++---- script/jepsen.garage/src/jepsen/garage.clj | 3 +- script/jepsen.garage/src/jepsen/garage/nemesis.clj | 64 ++++++++++++---------- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 06379d25..e1dc6953 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -69,9 +69,9 @@ Results with timestamp patch (`--patch tsfix2`): - No failures with clock-scramble nemesis + partition nemesis (`--scenario cp`). This proves that `tsfix2` (PR#543) does improve consistency. -- **Fails with layout reconfiguration nemesis** (`--scenario r`) - (TODO: note down the run id of a failed run) - (TODO: test more and investigate). +- **Fails with layout reconfiguration nemesis** (`--scenario r`). + Example of a failed run: `garage reg2/20231024T120806.899+0200`. + TODO: investigate. This is the failure mode we are looking for and trying to fix for NLnet task 3. @@ -83,12 +83,11 @@ Results: - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run -- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be. +- Does not seem to fail with only the layout reconfiguation nemesis (>20 runs), although theoretically it could -- **Fails with partition + layout reconfiguration nemesis** (`--scenario pr`) - (TODO: note down the run id of a failed run) - (TODO: test more and investigate). - This is the failure mode we are looking for and trying to fix for NLnet task 3. +- Does not seem to fail with the layout reconfiguation + partition nemesis (<10 runs), although theoretically it could + +TODO: make it fail!!! ### Set, continuous test (interspersed reads and writes) @@ -99,10 +98,9 @@ Results: - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run -- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be. +- Does not seem to fail with the clock scrambler + partition + layout reconfiguation nemesis (>10 runs), although theoretically it could -- TODO: failures should be achieved with `--scenario pr`? Even with 4 or 5 consecutive test runs, no failures were achieved, why? - (TODO: note down the run id of a failed run) +TODO: make it fail!!! ## Investigating (and fixing) errors diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 6d64a1b8..a67399e0 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -26,7 +26,8 @@ {"c" grgNemesis/scenario-c "cp" grgNemesis/scenario-cp "r" grgNemesis/scenario-r - "pr" grgNemesis/scenario-pr}) + "pr" grgNemesis/scenario-pr + "cpr" grgNemesis/scenario-cpr}) (def patches "A map of patch names to Garage builds" diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj index e64bcaf1..07083038 100644 --- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -76,30 +76,24 @@ (defn scenario-cp "Clock scramble + partition scenario" [opts] - {:generator (cycle [(gen/sleep 5) - {:type :info, :f :partition-start} - (gen/sleep 5) - {:type :info, :f :clock-scramble} - (gen/sleep 5) - {:type :info, :f :partition-stop} - (gen/sleep 5) - {:type :info, :f :clock-scramble}]) + {:generator (->> + (gen/mix [{:type :info, :f :clock-scramble} + {:type :info, :f :partition-stop} + {:type :info, :f :partition-start}]) + (gen/stagger 3)) :final-generator (gen/once {:type :info, :f :partition-stop}) :nemesis (nemesis/compose - {{:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) + {:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves)})}) (defn scenario-r "Cluster reconfiguration scenario" [opts] - {:generator (cycle [(gen/sleep 5) - {:type :info, :f :reconfigure-start} - (gen/sleep 5) - {:type :info, :f :reconfigure-start} - (gen/sleep 5) - {:type :info, :f :reconfigure-stop}]) - :final-generator (gen/once {:type :info, :f :reconfigure-stop}) + {:generator (->> + (gen/mix [{:type :info, :f :reconfigure-start} + {:type :info, :f :reconfigure-stop}]) + (gen/stagger 3)) :nemesis (nemesis/compose {{:reconfigure-start :start :reconfigure-stop :stop} (reconfigure-subset 3)})}) @@ -107,19 +101,33 @@ (defn scenario-pr "Partition + cluster reconfiguration scenario" [opts] - {:generator (cycle [(gen/sleep 3) - {:type :info, :f :reconfigure-start} - (gen/sleep 3) - {:type :info, :f :partition-start} - (gen/sleep 3) - {:type :info, :f :reconfigure-start} - (gen/sleep 3) - {:type :info, :f :partition-stop} - (gen/sleep 3) - {:type :info, :f :reconfigure-stop}]) + {:generator (->> + (gen/mix [{:type :info, :f :partition-start} + {:type :info, :f :partition-stop} + {:type :info, :f :reconfigure-start} + {:type :info, :f :reconfigure-stop}]) + (gen/stagger 3)) :final-generator (gen/once {:type :info, :f :partition-stop}) :nemesis (nemesis/compose {{:partition-start :start :partition-stop :stop} (nemesis/partition-random-halves) {:reconfigure-start :start :reconfigure-stop :stop} (reconfigure-subset 3)})}) + +(defn scenario-cpr + "Clock scramble + partition + cluster reconfiguration scenario" + [opts] + {:generator (->> + (gen/mix [{:type :info, :f :clock-scramble} + {:type :info, :f :partition-start} + {:type :info, :f :partition-stop} + {:type :info, :f :reconfigure-start} + {:type :info, :f :reconfigure-stop}]) + (gen/stagger 3)) + :final-generator (gen/once {:type :info, :f :partition-stop}) + :nemesis (nemesis/compose + {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) + {:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)})}) -- cgit v1.2.3 From d7ab2c639e330cb3214d83bf98107573ef41f6db Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 24 Oct 2023 16:39:50 +0200 Subject: jepsen: fix nemesis to actually generate many operations --- script/jepsen.garage/src/jepsen/garage/nemesis.clj | 57 +++++++++++++--------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj index 07083038..6a2e1935 100644 --- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -32,9 +32,12 @@ [test] (let [layout-show (c/on (jepsen/primary test) (c/exec grg/binary :layout :show)) [_ layout-next-version] (re-find #"apply --version (\d+)\n" layout-show)] - (info "layout show: " layout-show "; next-version: " layout-next-version) - (c/on (jepsen/primary test) - (c/exec grg/binary :layout :apply :--version layout-next-version)))) + (if layout-next-version + (do + (info "layout show: " layout-show "; next-version: " layout-next-version) + (c/on (jepsen/primary test) + (c/exec grg/binary :layout :apply :--version layout-next-version))) + (info "no layout changes to apply")))) (defn reconfigure-subset "Reconfigure cluster with only a subset of nodes" @@ -65,22 +68,28 @@ ; ---- nemesis scenari ---- +(defn nemesis-op + "A generator for a single nemesis operation" + [op] + (fn [_ _] {:type :info, :f op})) + (defn scenario-c "Clock scramble scenario" [opts] - {:generator (cycle [(gen/sleep 5) - {:type :info, :f :clock-scramble}]) + {:generator (->> + (nemesis-op :clock-scramble) + (gen/stagger 5)) :nemesis (nemesis/compose - {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) (defn scenario-cp "Clock scramble + partition scenario" [opts] {:generator (->> - (gen/mix [{:type :info, :f :clock-scramble} - {:type :info, :f :partition-stop} - {:type :info, :f :partition-start}]) - (gen/stagger 3)) + (gen/mix [(nemesis-op :clock-scramble) + (nemesis-op :partition-stop) + (nemesis-op :partition-start)]) + (gen/stagger 5)) :final-generator (gen/once {:type :info, :f :partition-stop}) :nemesis (nemesis/compose {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) @@ -91,9 +100,9 @@ "Cluster reconfiguration scenario" [opts] {:generator (->> - (gen/mix [{:type :info, :f :reconfigure-start} - {:type :info, :f :reconfigure-stop}]) - (gen/stagger 3)) + (gen/mix [(nemesis-op :reconfigure-start) + (nemesis-op :reconfigure-stop)]) + (gen/stagger 5)) :nemesis (nemesis/compose {{:reconfigure-start :start :reconfigure-stop :stop} (reconfigure-subset 3)})}) @@ -102,11 +111,11 @@ "Partition + cluster reconfiguration scenario" [opts] {:generator (->> - (gen/mix [{:type :info, :f :partition-start} - {:type :info, :f :partition-stop} - {:type :info, :f :reconfigure-start} - {:type :info, :f :reconfigure-stop}]) - (gen/stagger 3)) + (gen/mix [(nemesis-op :partition-start) + (nemesis-op :partition-stop) + (nemesis-op :reconfigure-start) + (nemesis-op :reconfigure-stop)]) + (gen/stagger 5)) :final-generator (gen/once {:type :info, :f :partition-stop}) :nemesis (nemesis/compose {{:partition-start :start @@ -118,12 +127,12 @@ "Clock scramble + partition + cluster reconfiguration scenario" [opts] {:generator (->> - (gen/mix [{:type :info, :f :clock-scramble} - {:type :info, :f :partition-start} - {:type :info, :f :partition-stop} - {:type :info, :f :reconfigure-start} - {:type :info, :f :reconfigure-stop}]) - (gen/stagger 3)) + (gen/mix [(nemesis-op :clock-scramble) + (nemesis-op :partition-start) + (nemesis-op :partition-stop) + (nemesis-op :reconfigure-start) + (nemesis-op :reconfigure-stop)]) + (gen/stagger 5)) :final-generator (gen/once {:type :info, :f :partition-stop}) :nemesis (nemesis/compose {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) -- cgit v1.2.3 From 4fa2646a75ed9b4823bf36ae6218a18cca11c471 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 24 Oct 2023 17:45:22 +0200 Subject: jepsen: got a failure with set1 --- script/jepsen.garage/README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index e1dc6953..5d407b6a 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -77,17 +77,16 @@ Results with timestamp patch (`--patch tsfix2`): ### Set, basic test (write some items, then read) -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set1 --ops-per-key 100 --patch tsfix2` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 200 --concurrency 200 --workload set1 --ops-per-key 100 --patch tsfix2` Results: - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run -- Does not seem to fail with only the layout reconfiguation nemesis (>20 runs), although theoretically it could +- Does not seem to fail with only the layout reconfiguation nemesis (<10 runs), although theoretically it could -- Does not seem to fail with the layout reconfiguation + partition nemesis (<10 runs), although theoretically it could - -TODO: make it fail!!! +- **Fails with the partition + layout reconfiguration nemesis** (`--scenario pr`). + EXample of a failed run: `garage set1/20231024T172214.488+0200` (1 failure in 4 runs). ### Set, continuous test (interspersed reads and writes) -- cgit v1.2.3 From db921cc05f8bcfccd0d0ba1d90b6dcd77f06dcdd Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 25 Oct 2023 11:41:34 +0200 Subject: jepsen: reconfigure nemesis + add db nemesis --- script/jepsen.garage/README.md | 2 + script/jepsen.garage/src/jepsen/garage.clj | 14 ++- script/jepsen.garage/src/jepsen/garage/daemon.clj | 18 +++ script/jepsen.garage/src/jepsen/garage/nemesis.clj | 121 ++++++++++----------- script/jepsen.garage/src/jepsen/garage/reg.clj | 37 ++++--- script/jepsen.garage/src/jepsen/garage/set.clj | 49 +++++---- 6 files changed, 134 insertions(+), 107 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 5d407b6a..ced8ebb5 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -97,6 +97,8 @@ Results: - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run +- Does not seem to fail with partition + layout reconfiguration nemesis (>100 runs) + - Does not seem to fail with the clock scrambler + partition + layout reconfiguation nemesis (>10 runs), although theoretically it could TODO: make it fail!!! diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index a67399e0..3fe527a6 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -27,7 +27,8 @@ "cp" grgNemesis/scenario-cp "r" grgNemesis/scenario-r "pr" grgNemesis/scenario-pr - "cpr" grgNemesis/scenario-cpr}) + "cpr" grgNemesis/scenario-cpr + "dpr" grgNemesis/scenario-dpr}) (def patches "A map of patch names to Garage builds" @@ -59,15 +60,16 @@ "Given an options map from the command line runner (e.g. :nodes, :ssh, :concurrency, ...), constructs a test map." [opts] - (let [workload ((get workloads (:workload opts)) opts) - scenario ((get scenari (:scenario opts)) opts) - garage-version (get patches (:patch opts))] + (let [garage-version (get patches (:patch opts)) + db (grg/db garage-version) + workload ((get workloads (:workload opts)) opts) + scenario ((get scenari (:scenario opts)) (assoc opts :db db))] (merge tests/noop-test opts {:pure-generators true :name (str "garage " (name (:workload opts))) :os debian/os - :db (grg/db garage-version) + :db db :client (:client workload) :generator (gen/phases (->> @@ -82,7 +84,7 @@ (gen/clients (:final-generator workload))) :nemesis (:nemesis scenario) :checker (checker/compose - {:perf (checker/perf) + {:perf (checker/perf (:perf scenario)) :workload (:checker workload)}) }))) diff --git a/script/jepsen.garage/src/jepsen/garage/daemon.clj b/script/jepsen.garage/src/jepsen/garage/daemon.clj index 7c581ba1..d407dd29 100644 --- a/script/jepsen.garage/src/jepsen/garage/daemon.clj +++ b/script/jepsen.garage/src/jepsen/garage/daemon.clj @@ -119,6 +119,24 @@ (c/exec :rm :-rf data-dir) (c/exec :rm :-rf meta-dir))) + db/Pause + (pause! [_ test node] + (cu/grepkill! :stop binary)) + (resume! [_ test node] + (cu/grepkill! :cont binary)) + + db/Kill + (kill! [_ test node] + (cu/stop-daemon! binary pidfile)) + (start! [_ test node] + (cu/start-daemon! + {:logfile logfile + :pidfile pidfile + :chdir base-dir + :env {:RUST_LOG "garage=debug,garage_api=trace"}} + binary + :server)) + db/LogFiles (log-files [_ test node] [logfile]))) diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj index 6a2e1935..0222e463 100644 --- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -4,6 +4,7 @@ [core :as jepsen] [generator :as gen] [nemesis :as nemesis]] + [jepsen.nemesis.combined :as combined] [jepsen.garage.daemon :as grg] [jepsen.control.util :as cu])) @@ -11,21 +12,23 @@ (defn configure-present! "Configure node to be active in new cluster layout" - [test node] - (info "configure-present!" node) - (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] - (c/on - (jepsen/primary test) - (c/exec grg/binary :layout :assign (subs node-id 0 16) :-c :1G)))) + [test nodes] + (info "configure-present!" nodes) + (let [node-ids (c/on-many nodes (c/exec grg/binary :node :id :-q)) + node-id-strs (map (fn [[_ v]] (subs v 0 16)) node-ids)] + (c/on + (jepsen/primary test) + (apply c/exec (concat [grg/binary :layout :assign :-c :1G] node-id-strs))))) (defn configure-absent! - "Configure node to be active in new cluster layout" - [test node] - (info "configure-absent!" node) - (let [node-id (c/on node (c/exec grg/binary :node :id :-q))] - (c/on - (jepsen/primary test) - (c/exec grg/binary :layout :assign (subs node-id 0 16) :-g)))) + "Configure nodes to be active in new cluster layout" + [test nodes] + (info "configure-absent!" nodes) + (let [node-ids (c/on-many nodes (c/exec grg/binary :node :id :-q)) + node-id-strs (map (fn [[_ v]] (subs v 0 16)) node-ids)] + (c/on + (jepsen/primary test) + (apply c/exec (concat [grg/binary :layout :assign :-g] node-id-strs))))) (defn finalize-config! "Apply the proposed cluster layout" @@ -53,14 +56,14 @@ shuffle (split-at cnt))] (info "layout split: keep " keep-nodes ", remove " remove-nodes) - (run! #(configure-present! test %) keep-nodes) - (run! #(configure-absent! test %) remove-nodes) + (configure-present! test keep-nodes) + (configure-absent! test remove-nodes) (finalize-config! test) (assoc op :value keep-nodes)) :stop (do (info "layout un-split: all nodes=" (:nodes test)) - (run! #(configure-present! test %) (:nodes test)) + (configure-present! test (:nodes test)) (finalize-config! test) (assoc op :value (:nodes test))))) @@ -73,70 +76,58 @@ [op] (fn [_ _] {:type :info, :f op})) -(defn scenario-c - "Clock scramble scenario" +(defn reconfiguration-package + "Cluster reconfiguration nemesis package" [opts] {:generator (->> - (nemesis-op :clock-scramble) - (gen/stagger 5)) + (gen/mix [(nemesis-op :reconfigure-start) + (nemesis-op :reconfigure-stop)]) + (gen/stagger (:interval opts 5))) + :final-generator {:type :info, :f :reconfigure-stop} :nemesis (nemesis/compose - {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + {{:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)}) + :perf #{{:name "reconfigure" + :start #{:reconfigure-start} + :stop #{:reconfigur-stop} + :color "#A197E9"}}}) + +(defn scenario-c + "Clock modifying scenario" + [opts] + (combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}})) (defn scenario-cp - "Clock scramble + partition scenario" + "Clock modifying + partition scenario" [opts] - {:generator (->> - (gen/mix [(nemesis-op :clock-scramble) - (nemesis-op :partition-stop) - (nemesis-op :partition-start)]) - (gen/stagger 5)) - :final-generator (gen/once {:type :info, :f :partition-stop}) - :nemesis (nemesis/compose - {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) - {:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves)})}) + (combined/compose-packages + [(combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}}) + (combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}})])) (defn scenario-r "Cluster reconfiguration scenario" [opts] - {:generator (->> - (gen/mix [(nemesis-op :reconfigure-start) - (nemesis-op :reconfigure-stop)]) - (gen/stagger 5)) - :nemesis (nemesis/compose - {{:reconfigure-start :start - :reconfigure-stop :stop} (reconfigure-subset 3)})}) + (reconfiguration-package {:interval 1})) (defn scenario-pr "Partition + cluster reconfiguration scenario" [opts] - {:generator (->> - (gen/mix [(nemesis-op :partition-start) - (nemesis-op :partition-stop) - (nemesis-op :reconfigure-start) - (nemesis-op :reconfigure-stop)]) - (gen/stagger 5)) - :final-generator (gen/once {:type :info, :f :partition-stop}) - :nemesis (nemesis/compose - {{:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:reconfigure-start :start - :reconfigure-stop :stop} (reconfigure-subset 3)})}) + (combined/compose-packages + [(combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}}) + (reconfiguration-package {:interval 1})])) (defn scenario-cpr "Clock scramble + partition + cluster reconfiguration scenario" [opts] - {:generator (->> - (gen/mix [(nemesis-op :clock-scramble) - (nemesis-op :partition-start) - (nemesis-op :partition-stop) - (nemesis-op :reconfigure-start) - (nemesis-op :reconfigure-stop)]) - (gen/stagger 5)) - :final-generator (gen/once {:type :info, :f :partition-stop}) - :nemesis (nemesis/compose - {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) - {:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:reconfigure-start :start - :reconfigure-stop :stop} (reconfigure-subset 3)})}) + (combined/compose-packages + [(combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}}) + (combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}}) + (reconfiguration-package {:interval 1})])) + +(defn scenario-dpr + "Db + partition + cluster reconfiguration scenario" + [opts] + (combined/compose-packages + [(combined/db-package {:db (:db opts), :interval 1, :faults #{:db :pause :kill}}) + (combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}}) + (reconfiguration-package {:interval 1})])) diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index 6772abfe..39708c0b 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -30,21 +30,28 @@ (assoc this :creds (grg/creds node))) (setup! [this test]) (invoke! [this test op] - (let [[k v] (:value op)] - (case (:f op) - :read - (util/timeout - 10000 - (assoc op :type :fail, :error ::timeout) - (let [value (s3/get (:creds this) k)] - (assoc op :type :ok, :value (independent/tuple k value)))) - :write - (util/timeout - 10000 - (assoc op :type :info, :error ::timeout) - (do - (s3/put (:creds this) k v) - (assoc op :type :ok)))))) + (try+ + (let [[k v] (:value op)] + (case (:f op) + :read + (util/timeout + 10000 + (assoc op :type :fail, :error ::timeout) + (let [value (s3/get (:creds this) k)] + (assoc op :type :ok, :value (independent/tuple k value)))) + :write + (util/timeout + 10000 + (assoc op :type :info, :error ::timeout) + (do + (s3/put (:creds this) k v) + (assoc op :type :ok))))) + (catch (re-find #"Unavailable" (.getMessage %)) ex + (assoc op :type :info, :error ::unavailable)) + (catch (re-find #"Broken pipe" (.getMessage %)) ex + (assoc op :type :info, :error ::broken-pipe)) + (catch (re-find #"Connection refused" (.getMessage %)) ex + (assoc op :type :info, :error ::connection-refused)))) (teardown! [this test]) (close! [this test])) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index f625e672..670c73f2 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -30,27 +30,34 @@ (assoc this :creds (grg/creds node))) (setup! [this test]) (invoke! [this test op] - (let [[k v] (:value op) - prefix (str "set" k "/")] - (case (:f op) - :add - (util/timeout - 10000 - (assoc op :type :info, :error ::timeout) - (do - (s3/put (:creds this) (str prefix v) "present") - (assoc op :type :ok))) - :read - (util/timeout - 10000 - (assoc op :type :fail, :error ::timeout) - (do - (let [items (s3/list (:creds this) prefix)] - (let [items-stripped (map (fn [o] - (assert (str/starts-with? o prefix)) - (str/replace-first o prefix "")) items) - items-set (set (map parse-long items-stripped))] - (assoc op :type :ok, :value (independent/tuple k items-set))))))))) + (try+ + (let [[k v] (:value op) + prefix (str "set" k "/")] + (case (:f op) + :add + (util/timeout + 10000 + (assoc op :type :info, :error ::timeout) + (do + (s3/put (:creds this) (str prefix v) "present") + (assoc op :type :ok))) + :read + (util/timeout + 10000 + (assoc op :type :fail, :error ::timeout) + (do + (let [items (s3/list (:creds this) prefix)] + (let [items-stripped (map (fn [o] + (assert (str/starts-with? o prefix)) + (str/replace-first o prefix "")) items) + items-set (set (map parse-long items-stripped))] + (assoc op :type :ok, :value (independent/tuple k items-set)))))))) + (catch (re-find #"Unavailable" (.getMessage %)) ex + (assoc op :type :info, :error ::unavailable)) + (catch (re-find #"Broken pipe" (.getMessage %)) ex + (assoc op :type :info, :error ::broken-pipe)) + (catch (re-find #"Connection refused" (.getMessage %)) ex + (assoc op :type :info, :error ::connection-refused)))) (teardown! [this test]) (close! [this test])) -- cgit v1.2.3 From cfbfa09d24727e83bc042764dad2751e944fc939 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 25 Oct 2023 11:50:16 +0200 Subject: jepsen: fix set2 test omg finally this is so stupid --- script/jepsen.garage/src/jepsen/garage/set.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index 670c73f2..a73b8efc 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -81,7 +81,7 @@ ([:invoke :read]) (assoc-in state [:read-must-contain (:process op)] (:add-done state)) ([:ok :read]) - (let [read-must-contain (get (:process op) (:read-must-contain state)) + (let [read-must-contain (get (:read-must-contain state) (:process op)) new-missed (set/difference read-must-contain (:value op)) new-unexpected (set/difference (:value op) (:add-started state))] (assoc state -- cgit v1.2.3 From fd85010a403775bbb18030ae2d9d3689b34f3e8a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 25 Oct 2023 12:13:27 +0200 Subject: jepsen: failures with set2 test in --scenario r --- script/jepsen.garage/README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index ced8ebb5..5e50a0f4 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -86,7 +86,7 @@ Results: - Does not seem to fail with only the layout reconfiguation nemesis (<10 runs), although theoretically it could - **Fails with the partition + layout reconfiguration nemesis** (`--scenario pr`). - EXample of a failed run: `garage set1/20231024T172214.488+0200` (1 failure in 4 runs). + Example of a failed run: `garage set1/20231024T172214.488+0200` (1 failure in 4 runs). ### Set, continuous test (interspersed reads and writes) @@ -97,11 +97,10 @@ Results: - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run -- Does not seem to fail with partition + layout reconfiguration nemesis (>100 runs) - -- Does not seem to fail with the clock scrambler + partition + layout reconfiguation nemesis (>10 runs), although theoretically it could - -TODO: make it fail!!! +- **Fails with layout reconfiguration nemesis** (`--scenario r`). + Example of a failed run: `garage set2/20231025T115033.553+0200` (2 failures in 2 runs). + TODO: investigate. + This is the failure mode we are looking for and trying to fix for NLnet task 3. ## Investigating (and fixing) errors -- cgit v1.2.3 From 9df7fa0bcd8b00dee5926fe7778853d857b5636d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 25 Oct 2023 14:04:39 +0200 Subject: jepsen: use 7 nodes --- script/jepsen.garage/README.md | 2 ++ script/jepsen.garage/Vagrantfile | 1 + script/jepsen.garage/nodes.vagrant | 1 + 3 files changed, 4 insertions(+) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 5e50a0f4..0d647c72 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -87,6 +87,8 @@ Results: - **Fails with the partition + layout reconfiguration nemesis** (`--scenario pr`). Example of a failed run: `garage set1/20231024T172214.488+0200` (1 failure in 4 runs). + TODO: investigate. + This is the failure mode we are looking for and trying to fix for NLnet task 3. ### Set, continuous test (interspersed reads and writes) diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile index 1125bccf..4d02397d 100644 --- a/script/jepsen.garage/Vagrantfile +++ b/script/jepsen.garage/Vagrantfile @@ -28,4 +28,5 @@ Vagrant.configure("2") do |config| config.vm.define "n4" do |config| vm(config, "n4", "192.168.56.24") end config.vm.define "n5" do |config| vm(config, "n5", "192.168.56.25") end config.vm.define "n6" do |config| vm(config, "n6", "192.168.56.26") end + config.vm.define "n7" do |config| vm(config, "n7", "192.168.56.27") end end diff --git a/script/jepsen.garage/nodes.vagrant b/script/jepsen.garage/nodes.vagrant index 3f7e2b42..9e5694e6 100644 --- a/script/jepsen.garage/nodes.vagrant +++ b/script/jepsen.garage/nodes.vagrant @@ -4,3 +4,4 @@ 192.168.56.24 192.168.56.25 192.168.56.26 +192.168.56.27 -- cgit v1.2.3 From 5b1f50be65c251a1dc0a4358c706c409f17a82c0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 25 Oct 2023 14:43:24 +0200 Subject: jepsen: testing --- script/jepsen.garage/README.md | 6 +++--- script/jepsen.garage/src/jepsen/garage.clj | 1 + script/jepsen.garage/src/jepsen/garage/nemesis.clj | 9 +++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 0d647c72..464da4da 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -97,10 +97,10 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 - Results: -- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run +- No failures with clock-scramble nemesis + db nemesis + partition nemesis (`--scenario cdp`) (0 failures in 10 runs). -- **Fails with layout reconfiguration nemesis** (`--scenario r`). - Example of a failed run: `garage set2/20231025T115033.553+0200` (2 failures in 2 runs). +- **Fails with just layout reconfiguration nemesis** (`--scenario r`). + Example of a failed run: `garage set2/20231025T141940.198+0200` (10 failures in 10 runs). TODO: investigate. This is the failure mode we are looking for and trying to fix for NLnet task 3. diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 3fe527a6..17363c9d 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -28,6 +28,7 @@ "r" grgNemesis/scenario-r "pr" grgNemesis/scenario-pr "cpr" grgNemesis/scenario-cpr + "cdp" grgNemesis/scenario-cdp "dpr" grgNemesis/scenario-dpr}) (def patches diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj index 0222e463..dfce0255 100644 --- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -124,6 +124,14 @@ (combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}}) (reconfiguration-package {:interval 1})])) +(defn scenario-cdp + "Clock modifying + db + partition scenario" + [opts] + (combined/compose-packages + [(combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}}) + (combined/db-package {:db (:db opts), :interval 1, :faults #{:db :pause :kill}}) + (combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}})])) + (defn scenario-dpr "Db + partition + cluster reconfiguration scenario" [opts] @@ -131,3 +139,4 @@ [(combined/db-package {:db (:db opts), :interval 1, :faults #{:db :pause :kill}}) (combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}}) (reconfiguration-package {:interval 1})])) + -- cgit v1.2.3 From 18e58111593d5bf978d8c603e0dc2633ffcd91c4 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 16 Nov 2023 12:57:21 +0100 Subject: jepsen: add patch and use more complete names --- script/jepsen.garage/src/jepsen/garage.clj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 17363c9d..48659d6c 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -35,7 +35,8 @@ "A map of patch names to Garage builds" {"default" "v0.9.0" "tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" - "tsfix2" "c82d91c6bccf307186332b6c5c6fc0b128b1b2b1"}) + "tsfix2" "c82d91c6bccf307186332b6c5c6fc0b128b1b2b1" + "task3a" "d4df03424f1c7f3cc1eaba9e16d2e1d049131b97"}) (def cli-opts "Additional command line options." @@ -68,7 +69,7 @@ (merge tests/noop-test opts {:pure-generators true - :name (str "garage " (name (:workload opts))) + :name (str "garage " (name (:workload opts)) " " (name (:scenario opts)) " " (name (:patch opts))) :os debian/os :db db :client (:client workload) -- cgit v1.2.3 From 92dd2bbe15357a24eb68a3d3d6220c4758bb81a7 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 16 Nov 2023 18:09:13 +0100 Subject: jepsen: nlnet task3a seems to fix things --- script/jepsen.garage/README.md | 10 ++++++++-- script/jepsen.garage/src/jepsen/garage.clj | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 464da4da..f7479a3d 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -71,9 +71,12 @@ Results with timestamp patch (`--patch tsfix2`): - **Fails with layout reconfiguration nemesis** (`--scenario r`). Example of a failed run: `garage reg2/20231024T120806.899+0200`. - TODO: investigate. This is the failure mode we are looking for and trying to fix for NLnet task 3. +- Changes brought by NLnet task 3 code (commit 707442f5de): + no failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs), + `--scenario cpr` (0 of 10 runs) and `--scenario dpr` (0 of 10 runs). + ### Set, basic test (write some items, then read) @@ -101,9 +104,12 @@ Results: - **Fails with just layout reconfiguration nemesis** (`--scenario r`). Example of a failed run: `garage set2/20231025T141940.198+0200` (10 failures in 10 runs). - TODO: investigate. This is the failure mode we are looking for and trying to fix for NLnet task 3. +- Changes brought by NLnet task 3 code (commit 707442f5de): + no failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs). + `--scenario cpr` (0 of 10 runs) and `--scenario dpr` (0 of 10 runs). + ## Investigating (and fixing) errors diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 48659d6c..174e8df0 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -36,7 +36,7 @@ {"default" "v0.9.0" "tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" "tsfix2" "c82d91c6bccf307186332b6c5c6fc0b128b1b2b1" - "task3a" "d4df03424f1c7f3cc1eaba9e16d2e1d049131b97"}) + "task3a" "707442f5de416fdbed4681a33b739f0a787b7834"}) (def cli-opts "Additional command line options." -- cgit v1.2.3 From fa9247f11b89c960dffe82d6bf990ed4335788e3 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Dec 2023 16:23:48 +0100 Subject: jepsen: updated results, confirming that task3 works --- script/jepsen.garage/.gitignore | 1 + script/jepsen.garage/README.md | 55 ++++++++++++--------- script/jepsen.garage/Vagrantfile | 8 +++ script/jepsen.garage/all_tests_1.sh | 18 +++++++ script/jepsen.garage/all_tests_2.sh | 16 ++++++ script/jepsen.garage/nodes2.vagrant | 7 +++ .../jepsen.garage/results/Results-2023-11-16.png | Bin 0 -> 1517471 bytes .../results/Results-2023-12-13-task3c.png | Bin 0 -> 1075969 bytes .../results/Results-2023-12-13-tsfix2.png | Bin 0 -> 1136760 bytes .../results/Results-2023-12-14-task3-set1.png | Bin 0 -> 1075655 bytes script/jepsen.garage/src/jepsen/garage.clj | 10 ++-- script/jepsen.garage/src/jepsen/garage/set.clj | 12 +++-- 12 files changed, 96 insertions(+), 31 deletions(-) create mode 100755 script/jepsen.garage/all_tests_1.sh create mode 100755 script/jepsen.garage/all_tests_2.sh create mode 100644 script/jepsen.garage/nodes2.vagrant create mode 100644 script/jepsen.garage/results/Results-2023-11-16.png create mode 100644 script/jepsen.garage/results/Results-2023-12-13-task3c.png create mode 100644 script/jepsen.garage/results/Results-2023-12-13-tsfix2.png create mode 100644 script/jepsen.garage/results/Results-2023-12-14-task3-set1.png diff --git a/script/jepsen.garage/.gitignore b/script/jepsen.garage/.gitignore index 6eb8c209..31842a96 100644 --- a/script/jepsen.garage/.gitignore +++ b/script/jepsen.garage/.gitignore @@ -13,4 +13,5 @@ pom.xml.asc .hg/ .direnv /store +/store.* .vagrant diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index f7479a3d..50c7eb38 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -7,29 +7,19 @@ Jepsen checking of Garage consistency properties. Requirements: - vagrant -- VirtualBox, configured so that nodes can take an IP in a private network `192.168.56.0/24` +- VirtualBox, configured so that nodes can take an IP in a private network `192.168.56.0/24` (it's the default) - a user that can create VirtualBox VMs - leiningen - gnuplot -Set up VMs: +Set up VMs before running tests: ``` vagrant up ``` -Run tests (this one should fail): +Run tests: see commands below. -``` -lein run test --nodes-file nodes.vagrant --time-limit 64 --concurrency 50 --rate 50 --workload reg -``` - -These ones are working: - -``` -lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set1 -lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2 -``` ## Results @@ -73,16 +63,19 @@ Results with timestamp patch (`--patch tsfix2`): Example of a failed run: `garage reg2/20231024T120806.899+0200`. This is the failure mode we are looking for and trying to fix for NLnet task 3. -- Changes brought by NLnet task 3 code (commit 707442f5de): - no failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs), +Results with NLnet task 3 code (commit 707442f5de, `--patch task3a`): + +- No failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs), `--scenario cpr` (0 of 10 runs) and `--scenario dpr` (0 of 10 runs). +- Same with `--patch task3c` (commit `0041b013`, the final version). + ### Set, basic test (write some items, then read) -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 200 --concurrency 200 --workload set1 --ops-per-key 100 --patch tsfix2` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 200 --concurrency 200 --workload set1 --ops-per-key 100` -Results: +Results without NLnet task3 code (`--patch tsfix2`): - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run @@ -90,15 +83,22 @@ Results: - **Fails with the partition + layout reconfiguration nemesis** (`--scenario pr`). Example of a failed run: `garage set1/20231024T172214.488+0200` (1 failure in 4 runs). - TODO: investigate. This is the failure mode we are looking for and trying to fix for NLnet task 3. +Results with NLnet task 3 code (commit 707442f5de, `--patch task3a`): + +- The tests are buggy and often result in an "unknown" validity status, which + is caused by some requests not returning results during network partitions or + other nemesis-induced broken cluster states. However, when the tests were + able to finish, there were no failures with scenarios `r`, `pr`, `cpr`, + `dpr`. + ### Set, continuous test (interspersed reads and writes) -Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set2 --ops-per-key 100 --patch tsfix2` +Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set2 --ops-per-key 100` -Results: +Results without NLnet task3 code (`--patch tsfix2`): - No failures with clock-scramble nemesis + db nemesis + partition nemesis (`--scenario cdp`) (0 failures in 10 runs). @@ -106,17 +106,26 @@ Results: Example of a failed run: `garage set2/20231025T141940.198+0200` (10 failures in 10 runs). This is the failure mode we are looking for and trying to fix for NLnet task 3. -- Changes brought by NLnet task 3 code (commit 707442f5de): - no failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs). +Results with NLnet task3 code (commit 707442f5de, `--patch task3a`): + +- No failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs), `--scenario cpr` (0 of 10 runs) and `--scenario dpr` (0 of 10 runs). +- Same with `--patch task3c` (commit `0041b013`, the final version). + + +## NLnet task 3 final results + +- With code from task3 (`--patch task3c`): [reg2 and set2](results/Results-2023-12-13-task3c.png), [set1](results/Results-2023-12-14-task3-set1.png). +- Without (`--patch tsfix2`): [reg2 and set2](results/Results-2023-12-13-tsfix2.png), set1 TBD. ## Investigating (and fixing) errors ### Segfaults They are due to the download being interrupted in the middle (^C during first launch on clean VMs), the `garage` binary is truncated. -Add `:force?` to the `cached-wget!` call in `daemon.clj` to re-download the binary. +Add `:force?` to the `cached-wget!` call in `daemon.clj` to re-download the binary, +or restar the VMs to clear temporary files. ### In `jepsen.garage`: prefix wierdness diff --git a/script/jepsen.garage/Vagrantfile b/script/jepsen.garage/Vagrantfile index 4d02397d..b54c2426 100644 --- a/script/jepsen.garage/Vagrantfile +++ b/script/jepsen.garage/Vagrantfile @@ -29,4 +29,12 @@ Vagrant.configure("2") do |config| config.vm.define "n5" do |config| vm(config, "n5", "192.168.56.25") end config.vm.define "n6" do |config| vm(config, "n6", "192.168.56.26") end config.vm.define "n7" do |config| vm(config, "n7", "192.168.56.27") end + + config.vm.define "n8" do |config| vm(config, "n8", "192.168.56.28") end + config.vm.define "n9" do |config| vm(config, "n9", "192.168.56.29") end + config.vm.define "n10" do |config| vm(config, "n10", "192.168.56.30") end + config.vm.define "n11" do |config| vm(config, "n11", "192.168.56.31") end + config.vm.define "n12" do |config| vm(config, "n12", "192.168.56.32") end + config.vm.define "n13" do |config| vm(config, "n13", "192.168.56.33") end + config.vm.define "n14" do |config| vm(config, "n14", "192.168.56.34") end end diff --git a/script/jepsen.garage/all_tests_1.sh b/script/jepsen.garage/all_tests_1.sh new file mode 100755 index 00000000..b5397d13 --- /dev/null +++ b/script/jepsen.garage/all_tests_1.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -x + +#for ppatch in task3c task3a tsfix2; do +for ppatch in tsfix2; do + #for psc in c cp cdp r pr cpr dpr; do + for psc in cdp r pr cpr dpr; do + #for ptsk in reg2 set1 set2; do + for ptsk in set1; do + for irun in $(seq 10); do + lein run test --nodes-file nodes.vagrant \ + --time-limit 60 --rate 100 --concurrency 100 --ops-per-key 100 \ + --workload $ptsk --patch $ppatch --scenario $psc + done + done + done +done diff --git a/script/jepsen.garage/all_tests_2.sh b/script/jepsen.garage/all_tests_2.sh new file mode 100755 index 00000000..641643ed --- /dev/null +++ b/script/jepsen.garage/all_tests_2.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -x + +#for ppatch in task3c tsfix2; do +for ppatch in tsfix2; do + for psc in cdp r pr cpr dpr; do + for ptsk in set1; do + for irun in $(seq 10); do + lein run test --nodes-file nodes2.vagrant \ + --time-limit 60 --rate 100 --concurrency 100 --ops-per-key 100 \ + --workload $ptsk --patch $ppatch --scenario $psc + done + done + done +done diff --git a/script/jepsen.garage/nodes2.vagrant b/script/jepsen.garage/nodes2.vagrant new file mode 100644 index 00000000..842bf276 --- /dev/null +++ b/script/jepsen.garage/nodes2.vagrant @@ -0,0 +1,7 @@ +192.168.56.28 +192.168.56.29 +192.168.56.30 +192.168.56.31 +192.168.56.32 +192.168.56.33 +192.168.56.34 diff --git a/script/jepsen.garage/results/Results-2023-11-16.png b/script/jepsen.garage/results/Results-2023-11-16.png new file mode 100644 index 00000000..26dac833 Binary files /dev/null and b/script/jepsen.garage/results/Results-2023-11-16.png differ diff --git a/script/jepsen.garage/results/Results-2023-12-13-task3c.png b/script/jepsen.garage/results/Results-2023-12-13-task3c.png new file mode 100644 index 00000000..216043c3 Binary files /dev/null and b/script/jepsen.garage/results/Results-2023-12-13-task3c.png differ diff --git a/script/jepsen.garage/results/Results-2023-12-13-tsfix2.png b/script/jepsen.garage/results/Results-2023-12-13-tsfix2.png new file mode 100644 index 00000000..147d25e9 Binary files /dev/null and b/script/jepsen.garage/results/Results-2023-12-13-tsfix2.png differ diff --git a/script/jepsen.garage/results/Results-2023-12-14-task3-set1.png b/script/jepsen.garage/results/Results-2023-12-14-task3-set1.png new file mode 100644 index 00000000..dbff3a95 Binary files /dev/null and b/script/jepsen.garage/results/Results-2023-12-14-task3-set1.png differ diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 174e8df0..446b81de 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -36,7 +36,9 @@ {"default" "v0.9.0" "tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" "tsfix2" "c82d91c6bccf307186332b6c5c6fc0b128b1b2b1" - "task3a" "707442f5de416fdbed4681a33b739f0a787b7834"}) + "task3a" "707442f5de416fdbed4681a33b739f0a787b7834" + "task3b" "431b28e0cfdc9cac6c649193cf602108a8b02997" + "task3c" "0041b013a473e3ae72f50209d8f79db75a72848b"}) (def cli-opts "Additional command line options." @@ -69,7 +71,7 @@ (merge tests/noop-test opts {:pure-generators true - :name (str "garage " (name (:workload opts)) " " (name (:scenario opts)) " " (name (:patch opts))) + :name (str "garage-" (name (:patch opts)) " " (name (:workload opts)) " " (name (:scenario opts))) :os debian/os :db db :client (:client workload) @@ -83,7 +85,9 @@ (gen/nemesis (:final-generator scenario)) (gen/log "Waiting for recovery") (gen/sleep 10) - (gen/clients (:final-generator workload))) + (gen/log "Running final generator") + (gen/clients (:final-generator workload)) + (gen/log "Generators all done")) :nemesis (:nemesis scenario) :checker (checker/compose {:perf (checker/perf (:perf scenario)) diff --git a/script/jepsen.garage/src/jepsen/garage/set.clj b/script/jepsen.garage/src/jepsen/garage/set.clj index a73b8efc..2c7a2ccd 100644 --- a/script/jepsen.garage/src/jepsen/garage/set.clj +++ b/script/jepsen.garage/src/jepsen/garage/set.clj @@ -108,11 +108,13 @@ (->> (range) (map (fn [x] {:type :invoke, :f :add, :value x})) (gen/limit (:ops-per-key opts))))) - :final-generator (gen/phases - (independent/sequential-generator - (range 100) - (fn [k] (gen/once op-read))) - (gen/sleep 5))}) + :final-generator (independent/concurrent-generator + 10 + (range 100) + (fn [k] + (gen/phases + (gen/once op-read) + (gen/sleep 5))))}) (defn workload2 "Tests insertions and deletions" -- cgit v1.2.3