From ee4d6a01e16ac53fa3e48340a47455281d5f6bc0 Mon Sep 17 00:00:00 2001 From: sptaule Date: Tue, 25 Jan 2022 15:27:39 +0100 Subject: Organized doc md with metadata & draft for doc template --- content/documentation/_index.md | 5 + content/documentation/connect/_index.md | 6 + content/documentation/connect/apps.md | 4 +- content/documentation/connect/backup.md | 4 +- .../documentation/connect/cli-nextcloud-gui.png | Bin 201685 -> 0 bytes content/documentation/connect/cli.md | 4 +- content/documentation/connect/code.md | 4 +- content/documentation/connect/fs.md | 4 +- content/documentation/connect/index.md | 46 ---- content/documentation/connect/repositories.md | 4 +- content/documentation/connect/websites.md | 4 +- content/documentation/cookbook/_index.md | 6 + .../documentation/cookbook/exposing_websites.md | 4 +- content/documentation/cookbook/from_source.md | 4 +- content/documentation/cookbook/gateways.md | 4 +- content/documentation/cookbook/index.md | 31 --- content/documentation/cookbook/real_world.md | 4 +- content/documentation/cookbook/recovering.md | 4 +- content/documentation/cookbook/reverse_proxy.md | 4 +- content/documentation/cookbook/systemd.md | 4 +- content/documentation/cookbook/website.md | 8 - content/documentation/design/_index.md | 6 + content/documentation/design/benchmarks.md | 4 +- content/documentation/design/goals.md | 4 +- content/documentation/design/index.md | 31 --- content/documentation/design/internals.md | 4 +- content/documentation/design/related_work.md | 4 +- content/documentation/development/_index.md | 6 + content/documentation/development/devenv.md | 4 +- content/documentation/development/index.md | 19 -- .../development/miscellaneous_notes.md | 4 +- .../documentation/development/release_process.md | 4 +- content/documentation/development/scripts.md | 4 +- content/documentation/intro.md | 106 -------- content/documentation/quick-start/_index.md | 6 + content/documentation/quick-start/overview.md | 284 +++++++++++++++++++++ content/documentation/quick_start/index.md | 284 --------------------- content/documentation/reference-manual/_index.md | 6 + content/documentation/reference-manual/cli.md | 9 + .../reference-manual/configuration.md | 242 ++++++++++++++++++ content/documentation/reference-manual/layout.md | 79 ++++++ .../reference-manual/s3_compatibility.md | 65 +++++ content/documentation/reference_manual/cli.md | 9 - .../reference_manual/configuration.md | 242 ------------------ content/documentation/reference_manual/index.md | 10 - content/documentation/reference_manual/layout.md | 79 ------ .../reference_manual/s3_compatibility.md | 65 ----- content/documentation/working-documents/_index.md | 6 + .../working-documents/compatibility_target.md | 110 ++++++++ .../working-documents/design_draft.md | 167 ++++++++++++ .../working-documents/load_balancing.md | 204 +++++++++++++++ .../working-documents/migration_04.md | 110 ++++++++ .../working-documents/migration_06.md | 51 ++++ .../working_documents/compatibility_target.md | 110 -------- .../working_documents/design_draft.md | 167 ------------ content/documentation/working_documents/index.md | 13 - .../working_documents/load_balancing.md | 204 --------------- .../working_documents/migration_04.md | 110 -------- .../working_documents/migration_06.md | 51 ---- 59 files changed, 1412 insertions(+), 1629 deletions(-) create mode 100644 content/documentation/_index.md create mode 100644 content/documentation/connect/_index.md delete mode 100644 content/documentation/connect/cli-nextcloud-gui.png delete mode 100644 content/documentation/connect/index.md create mode 100644 content/documentation/cookbook/_index.md delete mode 100644 content/documentation/cookbook/index.md delete mode 100644 content/documentation/cookbook/website.md create mode 100644 content/documentation/design/_index.md delete mode 100644 content/documentation/design/index.md create mode 100644 content/documentation/development/_index.md delete mode 100644 content/documentation/development/index.md delete mode 100644 content/documentation/intro.md create mode 100644 content/documentation/quick-start/_index.md create mode 100644 content/documentation/quick-start/overview.md delete mode 100644 content/documentation/quick_start/index.md create mode 100644 content/documentation/reference-manual/_index.md create mode 100644 content/documentation/reference-manual/cli.md create mode 100644 content/documentation/reference-manual/configuration.md create mode 100644 content/documentation/reference-manual/layout.md create mode 100644 content/documentation/reference-manual/s3_compatibility.md delete mode 100644 content/documentation/reference_manual/cli.md delete mode 100644 content/documentation/reference_manual/configuration.md delete mode 100644 content/documentation/reference_manual/index.md delete mode 100644 content/documentation/reference_manual/layout.md delete mode 100644 content/documentation/reference_manual/s3_compatibility.md create mode 100644 content/documentation/working-documents/_index.md create mode 100644 content/documentation/working-documents/compatibility_target.md create mode 100644 content/documentation/working-documents/design_draft.md create mode 100644 content/documentation/working-documents/load_balancing.md create mode 100644 content/documentation/working-documents/migration_04.md create mode 100644 content/documentation/working-documents/migration_06.md delete mode 100644 content/documentation/working_documents/compatibility_target.md delete mode 100644 content/documentation/working_documents/design_draft.md delete mode 100644 content/documentation/working_documents/index.md delete mode 100644 content/documentation/working_documents/load_balancing.md delete mode 100644 content/documentation/working_documents/migration_04.md delete mode 100644 content/documentation/working_documents/migration_06.md (limited to 'content/documentation') diff --git a/content/documentation/_index.md b/content/documentation/_index.md new file mode 100644 index 0000000..68d1395 --- /dev/null +++ b/content/documentation/_index.md @@ -0,0 +1,5 @@ ++++ +template = "documentation.html" +page_template = "documentation.html" +redirect_to = "documentation/quick-start/" ++++ \ No newline at end of file diff --git a/content/documentation/connect/_index.md b/content/documentation/connect/_index.md new file mode 100644 index 0000000..ff3976a --- /dev/null +++ b/content/documentation/connect/_index.md @@ -0,0 +1,6 @@ ++++ +title = "Integrations" +weight = 3 +sort_by = "weight" +redirect_to = "documentation/connect/apps/" ++++ \ No newline at end of file diff --git a/content/documentation/connect/apps.md b/content/documentation/connect/apps.md index f88e334..37e57f3 100644 --- a/content/documentation/connect/apps.md +++ b/content/documentation/connect/apps.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Apps (Nextcloud, Peertube...)" +weight = 5 +++ # Apps (Nextcloud, Peertube...) diff --git a/content/documentation/connect/backup.md b/content/documentation/connect/backup.md index 60a875b..36b894b 100644 --- a/content/documentation/connect/backup.md +++ b/content/documentation/connect/backup.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Backups (restic, duplicity...)" +weight = 25 +++ # Backups (restic, duplicity...) diff --git a/content/documentation/connect/cli-nextcloud-gui.png b/content/documentation/connect/cli-nextcloud-gui.png deleted file mode 100644 index 7a58a3a..0000000 Binary files a/content/documentation/connect/cli-nextcloud-gui.png and /dev/null differ diff --git a/content/documentation/connect/cli.md b/content/documentation/connect/cli.md index d6b5fba..57664ac 100644 --- a/content/documentation/connect/cli.md +++ b/content/documentation/connect/cli.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "CLI tools" +weight = 20 +++ # CLI tools diff --git a/content/documentation/connect/code.md b/content/documentation/connect/code.md index 2224293..5c7f7fc 100644 --- a/content/documentation/connect/code.md +++ b/content/documentation/connect/code.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Your code (PHP, JS, Go...)" +weight = 30 +++ # Your code (PHP, JS, Go...) diff --git a/content/documentation/connect/fs.md b/content/documentation/connect/fs.md index 8258acc..eb1405f 100644 --- a/content/documentation/connect/fs.md +++ b/content/documentation/connect/fs.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "FUSE (s3fs, goofys, s3backer...)" +weight = 25 +++ # FUSE (s3fs, goofys, s3backer...) diff --git a/content/documentation/connect/index.md b/content/documentation/connect/index.md deleted file mode 100644 index a01bc48..0000000 --- a/content/documentation/connect/index.md +++ /dev/null @@ -1,46 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Integrations - -Garage implements the Amazon S3 protocol, which makes it compatible with many existing software programs. - -In particular, you will find here instructions to connect it with: - - - [web applications](./apps.md) - - [website hosting](./websites.md) - - [software repositories](./repositories.md) - - [CLI tools](./cli.md) - - [your own code](./code.md) - -### Generic instructions - -To configure S3-compatible software to interact with Garage, -you will need the following parameters: - -- An **API endpoint**: this corresponds to the HTTP or HTTPS address - used to contact the Garage server. When runing Garage locally this will usually - be `http://127.0.0.1:3900`. In a real-world setting, you would usually have a reverse-proxy - that adds TLS support and makes your Garage server available under a public hostname - such as `https://garage.example.com`. - -- An **API access key** and its associated **secret key**. These usually look something - like this: `GK3515373e4c851ebaad366558` (access key), - `7d37d093435a41f2aab8f13c19ba067d9776c90215f56614adad6ece597dbb34` (secret key). - These keys are created and managed using the `garage` CLI, as explained in the - [quick start](../quick_start/index.md) guide. - -Most S3 clients can be configured easily with these parameters, -provided that you follow the following guidelines: - -- **Force path style:** Garage does not support DNS-style buckets, which are now by default - on Amazon S3. Instead, Garage uses the legacy path-style bucket addressing. - Remember to configure your client to acknowledge this fact. - -- **Configuring the S3 region:** Garage requires your client to talk to the correct "S3 region", - which is set in the configuration file. This is often set just to `garage`. - If this is not configured explicitly, clients usually try to talk to region `us-east-1`. - Garage should normally redirect your client to the correct region, - but in case your client does not support this you might have to configure it manually. diff --git a/content/documentation/connect/repositories.md b/content/documentation/connect/repositories.md index 429a15a..52d27f3 100644 --- a/content/documentation/connect/repositories.md +++ b/content/documentation/connect/repositories.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Repositories (Docker, Nix, Git...)" +weight = 15 +++ # Repositories (Docker, Nix, Git...) diff --git a/content/documentation/connect/websites.md b/content/documentation/connect/websites.md index 70c9de6..1dd9170 100644 --- a/content/documentation/connect/websites.md +++ b/content/documentation/connect/websites.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Websites (Hugo, Jekyll, Publii...)" +weight = 10 +++ # Websites (Hugo, Jekyll, Publii...) diff --git a/content/documentation/cookbook/_index.md b/content/documentation/cookbook/_index.md new file mode 100644 index 0000000..c788f18 --- /dev/null +++ b/content/documentation/cookbook/_index.md @@ -0,0 +1,6 @@ ++++ +title="Cookbook" +weight = 2 +sort_by = "weight" +redirect_to = "documentation/cookbook/real-world/" ++++ \ No newline at end of file diff --git a/content/documentation/cookbook/exposing_websites.md b/content/documentation/cookbook/exposing_websites.md index 1b55a08..e592e6c 100644 --- a/content/documentation/cookbook/exposing_websites.md +++ b/content/documentation/cookbook/exposing_websites.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Exposing buckets as websites" +weight = 25 +++ # Exposing buckets as websites diff --git a/content/documentation/cookbook/from_source.md b/content/documentation/cookbook/from_source.md index 38ede4d..f33b7ab 100644 --- a/content/documentation/cookbook/from_source.md +++ b/content/documentation/cookbook/from_source.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Compiling Garage from source" +weight = 10 +++ # Compiling Garage from source diff --git a/content/documentation/cookbook/gateways.md b/content/documentation/cookbook/gateways.md index f176c5e..8bd3a66 100644 --- a/content/documentation/cookbook/gateways.md +++ b/content/documentation/cookbook/gateways.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Configuring a gateway node" +weight = 20 +++ # Gateways diff --git a/content/documentation/cookbook/index.md b/content/documentation/cookbook/index.md deleted file mode 100644 index 764ee0c..0000000 --- a/content/documentation/cookbook/index.md +++ /dev/null @@ -1,31 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Cookbook - -A cookbook, when you cook, is a collection of recipes. -Similarly, Garage's cookbook contains a collection of recipes that are known to works well! -This chapter could also be referred as "Tutorials" or "Best practices". - -- **[Multi-node deployment](real_world.md):** This page will walk you through all of the necessary - steps to deploy Garage in a real-world setting. - -- **[Building from source](from_source.md):** This page explains how to build Garage from - source in case a binary is not provided for your architecture, or if you want to - hack with us! - -- **[Integration with Systemd](systemd.md):** This page explains how to run Garage - as a Systemd service (instead of as a Docker container). - -- **[Configuring a gateway node](gateways.md):** This page explains how to run a gateway node in a Garage cluster, i.e. a Garage node that doesn't store data but accelerates access to data present on the other nodes. - -- **[Hosting a website](exposing_websites.md):** This page explains how to use Garage - to host a static website. - -- **[Configuring a reverse-proxy](reverse_proxy.md):** This page explains how to configure a reverse-proxy to add TLS support to your S3 api endpoint. - -- **[Recovering from failures](recovering.md):** Garage's first selling point is resilience - to hardware failures. This section explains how to recover from such a failure in the - best possible way. diff --git a/content/documentation/cookbook/real_world.md b/content/documentation/cookbook/real_world.md index 7d6d77e..049a54a 100644 --- a/content/documentation/cookbook/real_world.md +++ b/content/documentation/cookbook/real_world.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Multi-node deployment" +weight = 5 +++ # Deploying Garage on a real-world cluster diff --git a/content/documentation/cookbook/recovering.md b/content/documentation/cookbook/recovering.md index 2b8e2f0..7d75bfe 100644 --- a/content/documentation/cookbook/recovering.md +++ b/content/documentation/cookbook/recovering.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Recovering from failures" +weight = 35 +++ # Recovering from failures diff --git a/content/documentation/cookbook/reverse_proxy.md b/content/documentation/cookbook/reverse_proxy.md index 36d3535..36b9ea3 100644 --- a/content/documentation/cookbook/reverse_proxy.md +++ b/content/documentation/cookbook/reverse_proxy.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Configuring a reverse proxy" +weight = 30 +++ # Configuring a reverse proxy diff --git a/content/documentation/cookbook/systemd.md b/content/documentation/cookbook/systemd.md index cb40b6a..8e812e1 100644 --- a/content/documentation/cookbook/systemd.md +++ b/content/documentation/cookbook/systemd.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Starting Garage with systemd" +weight = 15 +++ # Starting Garage with systemd diff --git a/content/documentation/cookbook/website.md b/content/documentation/cookbook/website.md deleted file mode 100644 index a5e378f..0000000 --- a/content/documentation/cookbook/website.md +++ /dev/null @@ -1,8 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Hosting a website - -TODO diff --git a/content/documentation/design/_index.md b/content/documentation/design/_index.md new file mode 100644 index 0000000..1b4c7f7 --- /dev/null +++ b/content/documentation/design/_index.md @@ -0,0 +1,6 @@ ++++ +title = "Design" +weight = 5 +sort_by = "weight" +redirect_to = "documentation/design/goals/" ++++ \ No newline at end of file diff --git a/content/documentation/design/benchmarks.md b/content/documentation/design/benchmarks.md index b339df1..7829de4 100644 --- a/content/documentation/design/benchmarks.md +++ b/content/documentation/design/benchmarks.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Benchmarks" +weight = 10 +++ # Benchmarks diff --git a/content/documentation/design/goals.md b/content/documentation/design/goals.md index 539b3b4..aa1e0bc 100644 --- a/content/documentation/design/goals.md +++ b/content/documentation/design/goals.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Goals and use cases" +weight = 5 +++ # Goals and use cases diff --git a/content/documentation/design/index.md b/content/documentation/design/index.md deleted file mode 100644 index f84af11..0000000 --- a/content/documentation/design/index.md +++ /dev/null @@ -1,31 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Design - -The design section helps you to see Garage from a "big picture" -perspective. It will allow you to understand if Garage is a good fit for -you, how to better use it, how to contribute to it, what can Garage could -and could not do, etc. - -- **[Goals and use cases](goals.md):** This page explains why Garage was concieved and what practical use cases it targets. - -- **[Related work](related_work.md):** This pages presents the theoretical background on which Garage is built, and describes other software storage solutions and why they didn't work for us. - -- **[Internals](internals.md):** This page enters into more details on how Garage manages data internally. - -## Talks - -We love to talk and hear about Garage, that's why we keep a log here: - - - [(fr, 2021-11-13, video) Garage : Mille et une façons de stocker vos données](https://video.tedomum.net/w/moYKcv198dyMrT8hCS5jz9) and [slides (html)](https://rfid.deuxfleurs.fr/presentations/2021-11-13/garage/) - during [RFID#1](https://rfid.deuxfleurs.fr/programme/2021-11-13/) event - - - [(en, 2021-04-28) Distributed object storage is centralised](https://git.deuxfleurs.fr/Deuxfleurs/garage/raw/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2021-04-28_spirals-team/talk.pdf) - - - [(fr, 2020-12-02) Garage : jouer dans la cour des grands quand on est un hébergeur associatif](https://git.deuxfleurs.fr/Deuxfleurs/garage/raw/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2020-12-02_wide-team/talk.pdf) - -*Did you write or talk about Garage? [Open a pull request](https://git.deuxfleurs.fr/Deuxfleurs/garage/) to add a link here!* - - diff --git a/content/documentation/design/internals.md b/content/documentation/design/internals.md index 2cb5792..ba120b0 100644 --- a/content/documentation/design/internals.md +++ b/content/documentation/design/internals.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Internals" +weight = 20 +++ # Internals diff --git a/content/documentation/design/related_work.md b/content/documentation/design/related_work.md index e11652c..32b5141 100644 --- a/content/documentation/design/related_work.md +++ b/content/documentation/design/related_work.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Related work" +weight = 15 +++ # Related work diff --git a/content/documentation/development/_index.md b/content/documentation/development/_index.md new file mode 100644 index 0000000..56994af --- /dev/null +++ b/content/documentation/development/_index.md @@ -0,0 +1,6 @@ ++++ +title = "Development" +weight = 6 +sort_by = "weight" +redirect_to = "documentation/development/devenv/" ++++ \ No newline at end of file diff --git a/content/documentation/development/devenv.md b/content/documentation/development/devenv.md index 2979141..7ea577c 100644 --- a/content/documentation/development/devenv.md +++ b/content/documentation/development/devenv.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Setup your environment" +weight = 5 +++ # Setup your development environment diff --git a/content/documentation/development/index.md b/content/documentation/development/index.md deleted file mode 100644 index 9ca1f70..0000000 --- a/content/documentation/development/index.md +++ /dev/null @@ -1,19 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Development - -Now that you are a Garage expert, you want to enhance it, you are in the right place! -We discuss here how to hack on Garage, how we manage its development, etc. - -## Rust API (docs.rs) -If you encounter a specific bug in Garage or plan to patch it, you may jump directly to the source code's documentation! - - - [garage\_api](https://docs.rs/garage_api/latest/garage_api/) - contains the S3 standard API endpoint - - [garage\_model](https://docs.rs/garage_model/latest/garage_model/) - contains Garage's model built on the table abstraction - - [garage\_rpc](https://docs.rs/garage_rpc/latest/garage_rpc/) - contains Garage's federation protocol - - [garage\_table](https://docs.rs/garage_table/latest/garage_table/) - contains core Garage's CRDT datatypes - - [garage\_util](https://docs.rs/garage_util/latest/garage_util/) - contains garage helpers - - [garage\_web](https://docs.rs/garage_web/latest/garage_web/) - contains the S3 website endpoint diff --git a/content/documentation/development/miscellaneous_notes.md b/content/documentation/development/miscellaneous_notes.md index aba4802..aafb3cd 100644 --- a/content/documentation/development/miscellaneous_notes.md +++ b/content/documentation/development/miscellaneous_notes.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Miscellaneous notes" +weight = 20 +++ # Miscellaneous Notes diff --git a/content/documentation/development/release_process.md b/content/documentation/development/release_process.md index 50673ae..380f544 100644 --- a/content/documentation/development/release_process.md +++ b/content/documentation/development/release_process.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Release process" +weight = 15 +++ # Release process diff --git a/content/documentation/development/scripts.md b/content/documentation/development/scripts.md index bfcb047..a6db23c 100644 --- a/content/documentation/development/scripts.md +++ b/content/documentation/development/scripts.md @@ -1,6 +1,6 @@ +++ -title="Doc Post" -date=2018-08-20 +title = "Development scripts" +weight = 10 +++ # Development scripts diff --git a/content/documentation/intro.md b/content/documentation/intro.md deleted file mode 100644 index 8c97b8f..0000000 --- a/content/documentation/intro.md +++ /dev/null @@ -1,106 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -

- - Garage's Logo - -

- -

- [ Download - | Git repository - | Matrix channel - | Drone CI - ] -

- - -# Data resiliency for everyone - -Garage is an **open-source** distributed **storage service** you can **self-host** to fullfill many needs: - -

-Summary of the possible usages with a related icon: host a website, store media and backup target -

- -

-⮞ learn more about use cases ⮜ -

- -Garage implements the **[Amazon S3 API](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html)** and thus is already **compatible** with many applications: - -

-Garage is already compatible with Nextcloud, Mastodon, Matrix Synapse, Cyberduck, RClone and Peertube -

- -

-⮞ learn more about integrations ⮜ -

- - -Garage provides **data resiliency** by **replicating** data 3x over **distant** servers: - -

-An example deployment on a map with servers in 5 zones: UK, France, Belgium, Germany and Switzerland. Each chunk of data is replicated in 3 of these 5 zones. -

- -

-⮞ learn more about our design ⮜ -

- -Did you notice that *this website* is hosted and served by Garage? - -## Keeping requirements low - -We worked hard to keep requirements as low as possible as we target the largest possible public. - - * **CPU:** any x86\_64 CPU from the last 10 years, ARMv7 or ARMv8. - * **RAM:** 1GB - * **Disk Space:** at least 16GB - * **Network:** 200ms or less, 50 Mbps or more - * **Heterogeneous hardware:** build a cluster with whatever second-hand machines are available - -*For the network, as we do not use consensus algorithms like Paxos or Raft, Garage is not as latency sensitive.* -*Thanks to Rust and its zero-cost abstractions, we keep CPU and memory low.* - -## Built on the shoulder of giants - - - [Dynamo: Amazon’s Highly Available Key-value Store ](https://dl.acm.org/doi/abs/10.1145/1323293.1294281) by DeCandia et al. - - [Conflict-Free Replicated Data Types](https://link.springer.com/chapter/10.1007/978-3-642-24550-3_29) by Shapiro et al. - - [Maglev: A Fast and Reliable Software Network Load Balancer](https://www.usenix.org/conference/nsdi16/technical-sessions/presentation/eisenbud) by Eisenbud et al. - -## Talks - - - [(fr, 2021-11-13, video) Garage : Mille et une façons de stocker vos données](https://video.tedomum.net/w/moYKcv198dyMrT8hCS5jz9) and [slides (html)](https://rfid.deuxfleurs.fr/presentations/2021-11-13/garage/) - during [RFID#1](https://rfid.deuxfleurs.fr/programme/2021-11-13/) event - - - [(en, 2021-04-28, pdf) Distributed object storage is centralised](https://git.deuxfleurs.fr/Deuxfleurs/garage/raw/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2021-04-28_spirals-team/talk.pdf) - - - [(fr, 2020-12-02, pdf) Garage : jouer dans la cour des grands quand on est un hébergeur associatif](https://git.deuxfleurs.fr/Deuxfleurs/garage/raw/commit/b1f60579a13d3c5eba7f74b1775c84639ea9b51a/doc/talks/2020-12-02_wide-team/talk.pdf) - -## Community - -If you want to discuss with us, you can join our Matrix channel at [#garage:deuxfleurs.fr](https://matrix.to/#/#garage:deuxfleurs.fr). -Our code repository and issue tracker, which is the place where you should report bugs, is managed on [Deuxfleurs' Gitea](https://git.deuxfleurs.fr/Deuxfleurs/garage). - -## License - -Garage's source code, is released under the [AGPL v3 License](https://www.gnu.org/licenses/agpl-3.0.en.html). -Please note that if you patch Garage and then use it to provide any service over a network, you must share your code! - -# Sponsors and funding - -The Deuxfleurs association has received a grant from [NGI POINTER](https://pointer.ngi.eu/), to fund 3 people working on Garage full-time for a year: from October 2021 to September 2022. - -
- - NGI Pointer logo - - - EU flag logo - -
- -_This project has received funding from the European Union’s Horizon 2020 research and innovation programme within the framework of the NGI-POINTER Project funded under grant agreement N° 871528._ diff --git a/content/documentation/quick-start/_index.md b/content/documentation/quick-start/_index.md new file mode 100644 index 0000000..b8ec473 --- /dev/null +++ b/content/documentation/quick-start/_index.md @@ -0,0 +1,6 @@ ++++ +title = "Quick Start" +weight = 1 +sort_by = "weight" +redirect_to = "documentation/quick-start/overview/" ++++ \ No newline at end of file diff --git a/content/documentation/quick-start/overview.md b/content/documentation/quick-start/overview.md new file mode 100644 index 0000000..f09d748 --- /dev/null +++ b/content/documentation/quick-start/overview.md @@ -0,0 +1,284 @@ ++++ +title = "Overview" +weight = 5 ++++ + +# Quick Start + +Let's start your Garage journey! +In this chapter, we explain how to deploy Garage as a single-node server +and how to interact with it. + +Our goal is to introduce you to Garage's workflows. +Following this guide is recommended before moving on to +[configuring a multi-node cluster](../cookbook/real_world.md). + +Note that this kind of deployment should not be used in production, +as it provides no redundancy for your data! + +## Get a binary + +Download the latest Garage binary from the release pages on our repository: + + + +Place this binary somewhere in your `$PATH` so that you can invoke the `garage` +command directly (for instance you can copy the binary in `/usr/local/bin` +or in `~/.local/bin`). + +If a binary of the last version is not available for your architecture, +or if you want a build customized for your system, +you can [build Garage from source](../cookbook/from_source.md). + + +## Writing a first configuration file + +This first configuration file should allow you to get started easily with the simplest +possible Garage deployment. +**Save it as `/etc/garage.toml`.** +You can also store it somewhere else, but you will have to specify `-c path/to/garage.toml` +at each invocation of the `garage` binary (for example: `garage -c ./garage.toml server`, `garage -c ./garage.toml status`). + +```toml +metadata_dir = "/tmp/meta" +data_dir = "/tmp/data" + +replication_mode = "none" + +rpc_bind_addr = "[::]:3901" +rpc_public_addr = "127.0.0.1:3901" +rpc_secret = "1799bccfd7411eddcf9ebd316bc1f5287ad12a68094e1c6ac6abde7e6feae1ec" + +bootstrap_peers = [] + +[s3_api] +s3_region = "garage" +api_bind_addr = "[::]:3900" +root_domain = ".s3.garage" + +[s3_web] +bind_addr = "[::]:3902" +root_domain = ".web.garage" +index = "index.html" +``` + +The `rpc_secret` value provided above is just an example. It will work, but in +order to secure your cluster you will need to use another one. You can generate +such a value with `openssl rand -hex 32`. + + +As you can see in the `metadata_dir` and `data_dir` parameters, we are saving Garage's data +in `/tmp` which gets erased when your system reboots. This means that data stored on this +Garage server will not be persistent. Change these to locations on your local disk if you want +your data to be persisted properly. + + +# Launching the Garage server + +Use the following command to launch the Garage server with our configuration file: + +``` +RUST_LOG=garage=info garage server +``` + +You can tune Garage's verbosity as follows (from less verbose to more verbose): + +``` +RUST_LOG=garage=info garage server +RUST_LOG=garage=debug garage server +RUST_LOG=garage=trace garage server +``` + +Log level `info` is recommended for most use cases. +Log level `debug` can help you check why your S3 API calls are not working. + + +## Checking that Garage runs correctly + +The `garage` utility is also used as a CLI tool to configure your Garage deployment. +It uses values from the TOML configuration file to find the Garage daemon running on the +local node, therefore if your configuration file is not at `/etc/garage.toml` you will +again have to specify `-c path/to/garage.toml`. + +If the `garage` CLI is able to correctly detect the parameters of your local Garage node, +the following command should be enough to show the status of your cluster: + +``` +garage status +``` + +This should show something like this: + +``` +==== HEALTHY NODES ==== +ID Hostname Address Tag Zone Capacity +563e1ac825ee3323… linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED +``` + +## Creating a cluster layout + +Creating a cluster layout for a Garage deployment means informing Garage +of the disk space available on each node of the cluster +as well as the zone (e.g. datacenter) each machine is located in. + +For our test deployment, we are using only one node. The way in which we configure +it does not matter, you can simply write: + +```bash +garage layout assign -z dc1 -c 1 +``` + +where `` corresponds to the identifier of the node shown by `garage status` (first column). +You can enter simply a prefix of that identifier. +For instance here you could write just `garage layout assign -z dc1 -c 1 563e`. + +The layout then has to be applied to the cluster, using: + +```bash +garage layout apply +``` + + +## Creating buckets and keys + +In this section, we will suppose that we want to create a bucket named `nextcloud-bucket` +that will be accessed through a key named `nextcloud-app-key`. + +Don't forget that `help` command and `--help` subcommands can help you anywhere, +the CLI tool is self-documented! Two examples: + +``` +garage help +garage bucket allow --help +``` + +#### Create a bucket + +Let's take an example where we want to deploy NextCloud using Garage as the +main data storage. + +First, create a bucket with the following command: + +``` +garage bucket create nextcloud-bucket +``` + +Check that everything went well: + +``` +garage bucket list +garage bucket info nextcloud-bucket +``` + +#### Create an API key + +The `nextcloud-bucket` bucket now exists on the Garage server, +however it cannot be accessed until we add an API key with the proper access rights. + +Note that API keys are independent of buckets: +one key can access multiple buckets, multiple keys can access one bucket. + +Create an API key using the following command: + +``` +garage key new --name nextcloud-app-key +``` + +The output should look as follows: + +``` +Key name: nextcloud-app-key +Key ID: GK3515373e4c851ebaad366558 +Secret key: 7d37d093435a41f2aab8f13c19ba067d9776c90215f56614adad6ece597dbb34 +Authorized buckets: +``` + +Check that everything works as intended: + +``` +garage key list +garage key info nextcloud-app-key +``` + +#### Allow a key to access a bucket + +Now that we have a bucket and a key, we need to give permissions to the key on the bucket: + +``` +garage bucket allow \ + --read \ + --write \ + nextcloud-bucket \ + --key nextcloud-app-key +``` + +You can check at any time the allowed keys on your bucket with: + +``` +garage bucket info nextcloud-bucket +``` + + +## Uploading and downlading from Garage + +We recommend the use of MinIO Client to interact with Garage files (`mc`). +Instructions to install it and use it are provided on the +[MinIO website](https://docs.min.io/docs/minio-client-quickstart-guide.html). +Before reading the following, you need a working `mc` command on your path. + +Note that on certain Linux distributions such as Arch Linux, the Minio client binary +is called `mcli` instead of `mc` (to avoid name clashes with the Midnight Commander). + +#### Configure `mc` + +You need your access key and secret key created above. +We will assume you are invoking `mc` on the same machine as the Garage server, +your S3 API endpoint is therefore `http://127.0.0.1:3900`. +For this whole configuration, you must set an alias name: we chose `my-garage`, that you will used for all commands. + +Adapt the following command accordingly and run it: + +```bash +mc alias set \ + my-garage \ + http://127.0.0.1:3900 \ + \ + \ + --api S3v4 +``` + +You must also add an environment variable to your configuration to +inform MinIO of our region (`garage` by default, corresponding to the `s3_region` parameter +in the configuration file). +The best way is to add the following snippet to your `$HOME/.bash_profile` +or `$HOME/.bashrc` file: + +```bash +export MC_REGION=garage +``` + +#### Use `mc` + +You can not list buckets from `mc` currently. + +But the following commands and many more should work: + +```bash +mc cp image.png my-garage/nextcloud-bucket +mc cp my-garage/nextcloud-bucket/image.png . +mc ls my-garage/nextcloud-bucket +mc mirror localdir/ my-garage/another-bucket +``` + + +#### Other tools for interacting with Garage + +The following tools can also be used to send and recieve files from/to Garage: + +- the [AWS CLI](https://aws.amazon.com/cli/) +- [`rclone`](https://rclone.org/) +- [Cyberduck](https://cyberduck.io/) +- [`s3cmd`](https://s3tools.org/s3cmd) + +Refer to the ["Integrations" section](../connect/index.md) to learn how to +configure application and command line utilities to integrate with Garage. diff --git a/content/documentation/quick_start/index.md b/content/documentation/quick_start/index.md deleted file mode 100644 index 54f6f8d..0000000 --- a/content/documentation/quick_start/index.md +++ /dev/null @@ -1,284 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Quick Start - -Let's start your Garage journey! -In this chapter, we explain how to deploy Garage as a single-node server -and how to interact with it. - -Our goal is to introduce you to Garage's workflows. -Following this guide is recommended before moving on to -[configuring a multi-node cluster](../cookbook/real_world.md). - -Note that this kind of deployment should not be used in production, -as it provides no redundancy for your data! - -## Get a binary - -Download the latest Garage binary from the release pages on our repository: - - - -Place this binary somewhere in your `$PATH` so that you can invoke the `garage` -command directly (for instance you can copy the binary in `/usr/local/bin` -or in `~/.local/bin`). - -If a binary of the last version is not available for your architecture, -or if you want a build customized for your system, -you can [build Garage from source](../cookbook/from_source.md). - - -## Writing a first configuration file - -This first configuration file should allow you to get started easily with the simplest -possible Garage deployment. -**Save it as `/etc/garage.toml`.** -You can also store it somewhere else, but you will have to specify `-c path/to/garage.toml` -at each invocation of the `garage` binary (for example: `garage -c ./garage.toml server`, `garage -c ./garage.toml status`). - -```toml -metadata_dir = "/tmp/meta" -data_dir = "/tmp/data" - -replication_mode = "none" - -rpc_bind_addr = "[::]:3901" -rpc_public_addr = "127.0.0.1:3901" -rpc_secret = "1799bccfd7411eddcf9ebd316bc1f5287ad12a68094e1c6ac6abde7e6feae1ec" - -bootstrap_peers = [] - -[s3_api] -s3_region = "garage" -api_bind_addr = "[::]:3900" -root_domain = ".s3.garage" - -[s3_web] -bind_addr = "[::]:3902" -root_domain = ".web.garage" -index = "index.html" -``` - -The `rpc_secret` value provided above is just an example. It will work, but in -order to secure your cluster you will need to use another one. You can generate -such a value with `openssl rand -hex 32`. - - -As you can see in the `metadata_dir` and `data_dir` parameters, we are saving Garage's data -in `/tmp` which gets erased when your system reboots. This means that data stored on this -Garage server will not be persistent. Change these to locations on your local disk if you want -your data to be persisted properly. - - -## Launching the Garage server - -Use the following command to launch the Garage server with our configuration file: - -``` -RUST_LOG=garage=info garage server -``` - -You can tune Garage's verbosity as follows (from less verbose to more verbose): - -``` -RUST_LOG=garage=info garage server -RUST_LOG=garage=debug garage server -RUST_LOG=garage=trace garage server -``` - -Log level `info` is recommended for most use cases. -Log level `debug` can help you check why your S3 API calls are not working. - - -## Checking that Garage runs correctly - -The `garage` utility is also used as a CLI tool to configure your Garage deployment. -It uses values from the TOML configuration file to find the Garage daemon running on the -local node, therefore if your configuration file is not at `/etc/garage.toml` you will -again have to specify `-c path/to/garage.toml`. - -If the `garage` CLI is able to correctly detect the parameters of your local Garage node, -the following command should be enough to show the status of your cluster: - -``` -garage status -``` - -This should show something like this: - -``` -==== HEALTHY NODES ==== -ID Hostname Address Tag Zone Capacity -563e1ac825ee3323… linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED -``` - -## Creating a cluster layout - -Creating a cluster layout for a Garage deployment means informing Garage -of the disk space available on each node of the cluster -as well as the zone (e.g. datacenter) each machine is located in. - -For our test deployment, we are using only one node. The way in which we configure -it does not matter, you can simply write: - -```bash -garage layout assign -z dc1 -c 1 -``` - -where `` corresponds to the identifier of the node shown by `garage status` (first column). -You can enter simply a prefix of that identifier. -For instance here you could write just `garage layout assign -z dc1 -c 1 563e`. - -The layout then has to be applied to the cluster, using: - -```bash -garage layout apply -``` - - -## Creating buckets and keys - -In this section, we will suppose that we want to create a bucket named `nextcloud-bucket` -that will be accessed through a key named `nextcloud-app-key`. - -Don't forget that `help` command and `--help` subcommands can help you anywhere, -the CLI tool is self-documented! Two examples: - -``` -garage help -garage bucket allow --help -``` - -#### Create a bucket - -Let's take an example where we want to deploy NextCloud using Garage as the -main data storage. - -First, create a bucket with the following command: - -``` -garage bucket create nextcloud-bucket -``` - -Check that everything went well: - -``` -garage bucket list -garage bucket info nextcloud-bucket -``` - -#### Create an API key - -The `nextcloud-bucket` bucket now exists on the Garage server, -however it cannot be accessed until we add an API key with the proper access rights. - -Note that API keys are independent of buckets: -one key can access multiple buckets, multiple keys can access one bucket. - -Create an API key using the following command: - -``` -garage key new --name nextcloud-app-key -``` - -The output should look as follows: - -``` -Key name: nextcloud-app-key -Key ID: GK3515373e4c851ebaad366558 -Secret key: 7d37d093435a41f2aab8f13c19ba067d9776c90215f56614adad6ece597dbb34 -Authorized buckets: -``` - -Check that everything works as intended: - -``` -garage key list -garage key info nextcloud-app-key -``` - -#### Allow a key to access a bucket - -Now that we have a bucket and a key, we need to give permissions to the key on the bucket: - -``` -garage bucket allow \ - --read \ - --write \ - nextcloud-bucket \ - --key nextcloud-app-key -``` - -You can check at any time the allowed keys on your bucket with: - -``` -garage bucket info nextcloud-bucket -``` - - -## Uploading and downlading from Garage - -We recommend the use of MinIO Client to interact with Garage files (`mc`). -Instructions to install it and use it are provided on the -[MinIO website](https://docs.min.io/docs/minio-client-quickstart-guide.html). -Before reading the following, you need a working `mc` command on your path. - -Note that on certain Linux distributions such as Arch Linux, the Minio client binary -is called `mcli` instead of `mc` (to avoid name clashes with the Midnight Commander). - -#### Configure `mc` - -You need your access key and secret key created above. -We will assume you are invoking `mc` on the same machine as the Garage server, -your S3 API endpoint is therefore `http://127.0.0.1:3900`. -For this whole configuration, you must set an alias name: we chose `my-garage`, that you will used for all commands. - -Adapt the following command accordingly and run it: - -```bash -mc alias set \ - my-garage \ - http://127.0.0.1:3900 \ - \ - \ - --api S3v4 -``` - -You must also add an environment variable to your configuration to -inform MinIO of our region (`garage` by default, corresponding to the `s3_region` parameter -in the configuration file). -The best way is to add the following snippet to your `$HOME/.bash_profile` -or `$HOME/.bashrc` file: - -```bash -export MC_REGION=garage -``` - -#### Use `mc` - -You can not list buckets from `mc` currently. - -But the following commands and many more should work: - -```bash -mc cp image.png my-garage/nextcloud-bucket -mc cp my-garage/nextcloud-bucket/image.png . -mc ls my-garage/nextcloud-bucket -mc mirror localdir/ my-garage/another-bucket -``` - - -#### Other tools for interacting with Garage - -The following tools can also be used to send and recieve files from/to Garage: - -- the [AWS CLI](https://aws.amazon.com/cli/) -- [`rclone`](https://rclone.org/) -- [Cyberduck](https://cyberduck.io/) -- [`s3cmd`](https://s3tools.org/s3cmd) - -Refer to the ["Integrations" section](../connect/index.md) to learn how to -configure application and command line utilities to integrate with Garage. diff --git a/content/documentation/reference-manual/_index.md b/content/documentation/reference-manual/_index.md new file mode 100644 index 0000000..ee79581 --- /dev/null +++ b/content/documentation/reference-manual/_index.md @@ -0,0 +1,6 @@ ++++ +title = "Reference Manual" +weight = 4 +sort_by = "weight" +redirect_to = "documentation/reference-manual/configuration/" ++++ \ No newline at end of file diff --git a/content/documentation/reference-manual/cli.md b/content/documentation/reference-manual/cli.md new file mode 100644 index 0000000..2f0525f --- /dev/null +++ b/content/documentation/reference-manual/cli.md @@ -0,0 +1,9 @@ ++++ +title = "Garage CLI" +weight = 15 ++++ + +# Garage CLI + +The Garage CLI is mostly self-documented. Make use of the `help` subcommand +and the `--help` flag to discover all available options. diff --git a/content/documentation/reference-manual/configuration.md b/content/documentation/reference-manual/configuration.md new file mode 100644 index 0000000..c9ebee2 --- /dev/null +++ b/content/documentation/reference-manual/configuration.md @@ -0,0 +1,242 @@ ++++ +title = "Garage configuration file" +weight = 5 ++++ + +# Garage configuration file format reference + +Here is an example `garage.toml` configuration file that illustrates all of the possible options: + +```toml +metadata_dir = "/var/lib/garage/meta" +data_dir = "/var/lib/garage/data" + +block_size = 1048576 + +replication_mode = "3" + +compression_level = 1 + +rpc_secret = "4425f5c26c5e11581d3223904324dcb5b5d5dfb14e5e7f35e38c595424f5f1e6" +rpc_bind_addr = "[::]:3901" +rpc_public_addr = "[fc00:1::1]:3901" + +bootstrap_peers = [ + "563e1ac825ee3323aa441e72c26d1030d6d4414aeb3dd25287c531e7fc2bc95d@[fc00:1::1]:3901", + "86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332[fc00:1::2]:3901", + "681456ab91350f92242e80a531a3ec9392cb7c974f72640112f90a600d7921a4@[fc00:B::1]:3901", + "212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901", +] + +consul_host = "consul.service" +consul_service_name = "garage-daemon" + +sled_cache_capacity = 134217728 +sled_flush_every_ms = 2000 + +[s3_api] +api_bind_addr = "[::]:3900" +s3_region = "garage" +root_domain = ".s3.garage" + +[s3_web] +bind_addr = "[::]:3902" +root_domain = ".web.garage" +index = "index.html" +``` + +The following gives details about each available configuration option. + +## Available configuration options + +#### `metadata_dir` + +The directory in which Garage will store its metadata. This contains the node identifier, +the network configuration and the peer list, the list of buckets and keys as well +as the index of all objects, object version and object blocks. + +Store this folder on a fast SSD drive if possible to maximize Garage's performance. + +#### `data_dir` + +The directory in which Garage will store the data blocks of objects. +This folder can be placed on an HDD. The space available for `data_dir` +should be counted to determine a node's capacity +when [configuring it](../getting_started/05_cluster.md). + +#### `block_size` + +Garage splits stored objects in consecutive chunks of size `block_size` +(except the last one which might be smaller). The default size is 1MB and +should work in most cases. If you are interested in tuning this, feel free +to do so (and remember to report your findings to us!). If this value is +changed for a running Garage installation, only files newly uploaded will be +affected. Previously uploaded files will remain available. This however +means that chunks from existing files will not be deduplicated with chunks +from newly uploaded files, meaning you might use more storage space that is +optimally possible. + +#### `replication_mode` + +Garage supports the following replication modes: + +- `none` or `1`: data stored on Garage is stored on a single node. There is no redundancy, + and data will be unavailable as soon as one node fails or its network is disconnected. + Do not use this for anything else than test deployments. + +- `2`: data stored on Garage will be stored on two different nodes, if possible in different + zones. Garage tolerates one node failure before losing data. Data should be available + read-only when one node is down, but write operations will fail. + Use this only if you really have to. + +- `3`: data stored on Garage will be stored on three different nodes, if possible each in + a different zones. + Garage tolerates two node failure before losing data. Data should be available + read-only when two nodes are down, and writes should be possible if only a single node + is down. + +Note that in modes `2` and `3`, +if at least the same number of zones are available, an arbitrary number of failures in +any given zone is tolerated as copies of data will be spread over several zones. + +**Make sure `replication_mode` is the same in the configuration files of all nodes. +Never run a Garage cluster where that is not the case.** + +Changing the `replication_mode` of a cluster might work (make sure to shut down all nodes +and changing it everywhere at the time), but is not officially supported. + +### `compression_level` + +Zstd compression level to use for storing blocks. + +Values between `1` (faster compression) and `19` (smaller file) are standard compression +levels for zstd. From `20` to `22`, compression levels are referred as "ultra" and must be +used with extra care as it will use lot of memory. A value of `0` will let zstd choose a +default value (currently `3`). Finally, zstd has also compression designed to be faster +than default compression levels, they range from `-1` (smaller file) to `-99` (faster +compression). + +If you do not specify a `compression_level` entry, garage will set it to `1` for you. With +this parameters, zstd consumes low amount of cpu and should work faster than line speed in +most situations, while saving some space and intra-cluster +bandwidth. + +If you want to totally deactivate zstd in garage, you can pass the special value `'none'`. No +zstd related code will be called, your chunks will be stored on disk without any processing. + +Compression is done synchronously, setting a value too high will add latency to write queries. + +This value can be different between nodes, compression is done by the node which receive the +API call. + +#### `rpc_secret` + +Garage uses a secret key that is shared between all nodes of the cluster +in order to identify these nodes and allow them to communicate together. +This key should be specified here in the form of a 32-byte hex-encoded +random string. Such a string can be generated with a command +such as `openssl rand -hex 32`. + +#### `rpc_bind_addr` + +The address and port on which to bind for inter-cluster communcations +(reffered to as RPC for remote procedure calls). +The port specified here should be the same one that other nodes will used to contact +the node, even in the case of a NAT: the NAT should be configured to forward the external +port number to the same internal port nubmer. This means that if you have several nodes running +behind a NAT, they should each use a different RPC port number. + +#### `rpc_public_addr` + +The address and port that other nodes need to use to contact this node for +RPC calls. **This parameter is optional but recommended.** In case you have +a NAT that binds the RPC port to a port that is different on your public IP, +this field might help making it work. + +#### `bootstrap_peers` + +A list of peer identifiers on which to contact other Garage peers of this cluster. +These peer identifiers have the following syntax: + +``` +@: +``` + +In the case where `rpc_public_addr` is correctly specified in the +configuration file, the full identifier of a node including IP and port can +be obtained by running `garage node id` and then included directly in the +`bootstrap_peers` list of other nodes. Otherwise, only the node's public +key will be returned by `garage node id` and you will have to add the IP +yourself. + +#### `consul_host` and `consul_service_name` + +Garage supports discovering other nodes of the cluster using Consul. +This works only when nodes are announced in Consul by an orchestrator such as Nomad, +as Garage is not able to announce itself. + +The `consul_host` parameter should be set to the hostname of the Consul server, +and `consul_service_name` should be set to the service name under which Garage's +RPC ports are announced. + +#### `sled_cache_capacity` + +This parameter can be used to tune the capacity of the cache used by +[sled](https://sled.rs), the database Garage uses internally to store metadata. +Tune this to fit the RAM you wish to make available to your Garage instance. +More cache means faster Garage, but the default value (128MB) should be plenty +for most use cases. + +#### `sled_flush_every_ms` + +This parameters can be used to tune the flushing interval of sled. +Increase this if sled is thrashing your SSD, at the risk of losing more data in case +of a power outage (though this should not matter much as data is replicated on other +nodes). The default value, 2000ms, should be appropriate for most use cases. + + +## The `[s3_api]` section + +#### `api_bind_addr` + +The IP and port on which to bind for accepting S3 API calls. +This endpoint does not suport TLS: a reverse proxy should be used to provide it. + +#### `s3_region` + +Garage will accept S3 API calls that are targetted to the S3 region defined here. +API calls targetted to other regions will fail with a AuthorizationHeaderMalformed error +message that redirects the client to the correct region. + +#### `root_domain` + +The optionnal suffix to access bucket using vhost-style in addition to path-style request. +Note path-style requests are always enabled, whether or not vhost-style is configured. +Configuring vhost-style S3 required a wildcard DNS entry, and possibly a wildcard TLS certificate, +but might be required by softwares not supporting path-style requests. + +If `root_domain` is `s3.garage.eu`, a bucket called `my-bucket` can be interacted with +using the hostname `my-bucket.s3.garage.eu`. + +## The `[s3_web]` section + +Garage allows to publish content of buckets as websites. This section configures the +behaviour of this module. + +#### `bind_addr` + +The IP and port on which to bind for accepting HTTP requests to buckets configured +for website access. +This endpoint does not suport TLS: a reverse proxy should be used to provide it. + +#### `root_domain` + +The optionnal suffix appended to bucket names for the corresponding HTTP Host. + +For instance, if `root_domain` is `web.garage.eu`, a bucket called `deuxfleurs.fr` +will be accessible either with hostname `deuxfleurs.fr.web.garage.eu` +or with hostname `deuxfleurs.fr`. + +#### `index` + +The name of the index file to return for requests ending with `/` (usually `index.html`). diff --git a/content/documentation/reference-manual/layout.md b/content/documentation/reference-manual/layout.md new file mode 100644 index 0000000..32706d0 --- /dev/null +++ b/content/documentation/reference-manual/layout.md @@ -0,0 +1,79 @@ ++++ +title = "Cluster layout management" +weight = 10 ++++ + +# Creating and updating a cluster layout + +The cluster layout in Garage is a table that assigns to each node a role in +the cluster. The role of a node in Garage can either be a storage node with +a certain capacity, or a gateway node that does not store data and is only +used as an API entry point for faster cluster access. +An introduction to building cluster layouts can be found in the [production deployment](/cookbook/real_world.md) page. + +## How cluster layouts work in Garage + +In Garage, a cluster layout is composed of the following components: + +- a table of roles assigned to nodes +- a version number + +Garage nodes will always use the cluster layout with the highest version number. + +Garage nodes also maintain and synchronize between them a set of proposed role +changes that haven't yet been applied. These changes will be applied (or +canceled) in the next version of the layout + +The following commands insert modifications to the set of proposed role changes +for the next layout version (but they do not create the new layout immediately): + +```bash +garage layout assign [...] +garage layout remove [...] +``` + +The following command can be used to inspect the layout that is currently set in the cluster +and the changes proposed for the next layout version, if any: + +```bash +garage layout show +``` + +The following commands create a new layout with the specified version number, +that either takes into account the proposed changes or cancels them: + +```bash +garage layout apply --version +garage layout revert --version +``` + +The version number of the new layout to create must be 1 + the version number +of the previous layout that existed in the cluster. The `apply` and `revert` +commands will fail otherwise. + +## Warnings about Garage cluster layout management + +**Warning: never make several calls to `garage layout apply` or `garage layout +revert` with the same value of the `--version` flag. Doing so can lead to the +creation of several different layouts with the same version number, in which +case your Garage cluster will become inconsistent until fixed.** If a call to +`garage layout apply` or `garage layout revert` has failed and `garage layout +show` indicates that a new layout with the given version number has not been +set in the cluster, then it is fine to call the command again with the same +version number. + +If you are using the `garage` CLI by typing individual commands in your +shell, you shouldn't have much issues as long as you run commands one after +the other and take care of checking the output of `garage layout show` +before applying any changes. + +If you are using the `garage` CLI to script layout changes, follow the following recommendations: + +- Make all of your `garage` CLI calls to the same RPC host. Do not use the + `garage` CLI to connect to individual nodes to send them each a piece of the + layout changes you are making, as the changes propagate asynchronously + between nodes and might not all be taken into account at the time when the + new layout is applied. + +- **Only call `garage layout apply` once**, and call it **strictly after** all + of the `layout assign` and `layout remove` commands have returned. diff --git a/content/documentation/reference-manual/s3_compatibility.md b/content/documentation/reference-manual/s3_compatibility.md new file mode 100644 index 0000000..7160da9 --- /dev/null +++ b/content/documentation/reference-manual/s3_compatibility.md @@ -0,0 +1,65 @@ ++++ +title = "S3 Compatibility status" +weight = 20 ++++ + +# S3 Compatibility status + +## Global S3 features + +Implemented: + +- path-style URLs (`garage.tld/bucket/key`) +- vhost-style URLs (`bucket.garage.tld/key`) +- putting and getting objects in buckets +- multipart uploads +- listing objects +- access control on a per-key-per-bucket basis + +Not implemented: + +- object-level ACL +- [object versioning](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/166) +- encryption +- most `x-amz-` headers + + +## Endpoint implementation + +All APIs that are not mentionned are not implemented and will return a 501 Not Implemented. + +| Endpoint | Status | +|------------------------------|----------------------------------| +| AbortMultipartUpload | Implemented | +| CompleteMultipartUpload | Implemented | +| CopyObject | Implemented | +| CreateBucket | Implemented | +| CreateMultipartUpload | Implemented | +| DeleteBucket | Implemented | +| DeleteBucketWebsite | Implemented | +| DeleteObject | Implemented | +| DeleteObjects | Implemented | +| GetBucketLocation | Implemented | +| GetBucketVersioning | Stub (see below) | +| GetBucketWebsite | Implemented | +| GetObject | Implemented | +| HeadBucket | Implemented | +| HeadObject | Implemented | +| ListBuckets | Implemented | +| ListObjects | Implemented, bugs? (see below) | +| ListObjectsV2 | Implemented | +| ListMultipartUpload | Implemented | +| ListParts | Implemented | +| PutObject | Implemented | +| PutBucketWebsite | Partially implemented (see below)| +| UploadPart | Implemented | +| UploadPartCopy | Implemented | + + +- **GetBucketVersioning:** Stub implementation (Garage does not yet support versionning so this always returns +"versionning not enabled"). + +- **ListObjects:** Implemented, but there isn't a very good specification of what `encoding-type=url` covers so there might be some encoding bugs. In our implementation the url-encoded fields are in the same in ListObjects as they are in ListObjectsV2. + +- **PutBucketWebsite:** Implemented, but only stores the index document suffix and the error document path. Redirects are not supported. + diff --git a/content/documentation/reference_manual/cli.md b/content/documentation/reference_manual/cli.md deleted file mode 100644 index 3f7bd7a..0000000 --- a/content/documentation/reference_manual/cli.md +++ /dev/null @@ -1,9 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Garage CLI - -The Garage CLI is mostly self-documented. Make use of the `help` subcommand -and the `--help` flag to discover all available options. diff --git a/content/documentation/reference_manual/configuration.md b/content/documentation/reference_manual/configuration.md deleted file mode 100644 index 9a2e314..0000000 --- a/content/documentation/reference_manual/configuration.md +++ /dev/null @@ -1,242 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Garage configuration file format reference - -Here is an example `garage.toml` configuration file that illustrates all of the possible options: - -```toml -metadata_dir = "/var/lib/garage/meta" -data_dir = "/var/lib/garage/data" - -block_size = 1048576 - -replication_mode = "3" - -compression_level = 1 - -rpc_secret = "4425f5c26c5e11581d3223904324dcb5b5d5dfb14e5e7f35e38c595424f5f1e6" -rpc_bind_addr = "[::]:3901" -rpc_public_addr = "[fc00:1::1]:3901" - -bootstrap_peers = [ - "563e1ac825ee3323aa441e72c26d1030d6d4414aeb3dd25287c531e7fc2bc95d@[fc00:1::1]:3901", - "86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332[fc00:1::2]:3901", - "681456ab91350f92242e80a531a3ec9392cb7c974f72640112f90a600d7921a4@[fc00:B::1]:3901", - "212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901", -] - -consul_host = "consul.service" -consul_service_name = "garage-daemon" - -sled_cache_capacity = 134217728 -sled_flush_every_ms = 2000 - -[s3_api] -api_bind_addr = "[::]:3900" -s3_region = "garage" -root_domain = ".s3.garage" - -[s3_web] -bind_addr = "[::]:3902" -root_domain = ".web.garage" -index = "index.html" -``` - -The following gives details about each available configuration option. - -## Available configuration options - -#### `metadata_dir` - -The directory in which Garage will store its metadata. This contains the node identifier, -the network configuration and the peer list, the list of buckets and keys as well -as the index of all objects, object version and object blocks. - -Store this folder on a fast SSD drive if possible to maximize Garage's performance. - -#### `data_dir` - -The directory in which Garage will store the data blocks of objects. -This folder can be placed on an HDD. The space available for `data_dir` -should be counted to determine a node's capacity -when [configuring it](../getting_started/05_cluster.md). - -#### `block_size` - -Garage splits stored objects in consecutive chunks of size `block_size` -(except the last one which might be smaller). The default size is 1MB and -should work in most cases. If you are interested in tuning this, feel free -to do so (and remember to report your findings to us!). If this value is -changed for a running Garage installation, only files newly uploaded will be -affected. Previously uploaded files will remain available. This however -means that chunks from existing files will not be deduplicated with chunks -from newly uploaded files, meaning you might use more storage space that is -optimally possible. - -#### `replication_mode` - -Garage supports the following replication modes: - -- `none` or `1`: data stored on Garage is stored on a single node. There is no redundancy, - and data will be unavailable as soon as one node fails or its network is disconnected. - Do not use this for anything else than test deployments. - -- `2`: data stored on Garage will be stored on two different nodes, if possible in different - zones. Garage tolerates one node failure before losing data. Data should be available - read-only when one node is down, but write operations will fail. - Use this only if you really have to. - -- `3`: data stored on Garage will be stored on three different nodes, if possible each in - a different zones. - Garage tolerates two node failure before losing data. Data should be available - read-only when two nodes are down, and writes should be possible if only a single node - is down. - -Note that in modes `2` and `3`, -if at least the same number of zones are available, an arbitrary number of failures in -any given zone is tolerated as copies of data will be spread over several zones. - -**Make sure `replication_mode` is the same in the configuration files of all nodes. -Never run a Garage cluster where that is not the case.** - -Changing the `replication_mode` of a cluster might work (make sure to shut down all nodes -and changing it everywhere at the time), but is not officially supported. - -### `compression_level` - -Zstd compression level to use for storing blocks. - -Values between `1` (faster compression) and `19` (smaller file) are standard compression -levels for zstd. From `20` to `22`, compression levels are referred as "ultra" and must be -used with extra care as it will use lot of memory. A value of `0` will let zstd choose a -default value (currently `3`). Finally, zstd has also compression designed to be faster -than default compression levels, they range from `-1` (smaller file) to `-99` (faster -compression). - -If you do not specify a `compression_level` entry, garage will set it to `1` for you. With -this parameters, zstd consumes low amount of cpu and should work faster than line speed in -most situations, while saving some space and intra-cluster -bandwidth. - -If you want to totally deactivate zstd in garage, you can pass the special value `'none'`. No -zstd related code will be called, your chunks will be stored on disk without any processing. - -Compression is done synchronously, setting a value too high will add latency to write queries. - -This value can be different between nodes, compression is done by the node which receive the -API call. - -#### `rpc_secret` - -Garage uses a secret key that is shared between all nodes of the cluster -in order to identify these nodes and allow them to communicate together. -This key should be specified here in the form of a 32-byte hex-encoded -random string. Such a string can be generated with a command -such as `openssl rand -hex 32`. - -#### `rpc_bind_addr` - -The address and port on which to bind for inter-cluster communcations -(reffered to as RPC for remote procedure calls). -The port specified here should be the same one that other nodes will used to contact -the node, even in the case of a NAT: the NAT should be configured to forward the external -port number to the same internal port nubmer. This means that if you have several nodes running -behind a NAT, they should each use a different RPC port number. - -#### `rpc_public_addr` - -The address and port that other nodes need to use to contact this node for -RPC calls. **This parameter is optional but recommended.** In case you have -a NAT that binds the RPC port to a port that is different on your public IP, -this field might help making it work. - -#### `bootstrap_peers` - -A list of peer identifiers on which to contact other Garage peers of this cluster. -These peer identifiers have the following syntax: - -``` -@: -``` - -In the case where `rpc_public_addr` is correctly specified in the -configuration file, the full identifier of a node including IP and port can -be obtained by running `garage node id` and then included directly in the -`bootstrap_peers` list of other nodes. Otherwise, only the node's public -key will be returned by `garage node id` and you will have to add the IP -yourself. - -#### `consul_host` and `consul_service_name` - -Garage supports discovering other nodes of the cluster using Consul. -This works only when nodes are announced in Consul by an orchestrator such as Nomad, -as Garage is not able to announce itself. - -The `consul_host` parameter should be set to the hostname of the Consul server, -and `consul_service_name` should be set to the service name under which Garage's -RPC ports are announced. - -#### `sled_cache_capacity` - -This parameter can be used to tune the capacity of the cache used by -[sled](https://sled.rs), the database Garage uses internally to store metadata. -Tune this to fit the RAM you wish to make available to your Garage instance. -More cache means faster Garage, but the default value (128MB) should be plenty -for most use cases. - -#### `sled_flush_every_ms` - -This parameters can be used to tune the flushing interval of sled. -Increase this if sled is thrashing your SSD, at the risk of losing more data in case -of a power outage (though this should not matter much as data is replicated on other -nodes). The default value, 2000ms, should be appropriate for most use cases. - - -## The `[s3_api]` section - -#### `api_bind_addr` - -The IP and port on which to bind for accepting S3 API calls. -This endpoint does not suport TLS: a reverse proxy should be used to provide it. - -#### `s3_region` - -Garage will accept S3 API calls that are targetted to the S3 region defined here. -API calls targetted to other regions will fail with a AuthorizationHeaderMalformed error -message that redirects the client to the correct region. - -#### `root_domain` - -The optionnal suffix to access bucket using vhost-style in addition to path-style request. -Note path-style requests are always enabled, whether or not vhost-style is configured. -Configuring vhost-style S3 required a wildcard DNS entry, and possibly a wildcard TLS certificate, -but might be required by softwares not supporting path-style requests. - -If `root_domain` is `s3.garage.eu`, a bucket called `my-bucket` can be interacted with -using the hostname `my-bucket.s3.garage.eu`. - -## The `[s3_web]` section - -Garage allows to publish content of buckets as websites. This section configures the -behaviour of this module. - -#### `bind_addr` - -The IP and port on which to bind for accepting HTTP requests to buckets configured -for website access. -This endpoint does not suport TLS: a reverse proxy should be used to provide it. - -#### `root_domain` - -The optionnal suffix appended to bucket names for the corresponding HTTP Host. - -For instance, if `root_domain` is `web.garage.eu`, a bucket called `deuxfleurs.fr` -will be accessible either with hostname `deuxfleurs.fr.web.garage.eu` -or with hostname `deuxfleurs.fr`. - -#### `index` - -The name of the index file to return for requests ending with `/` (usually `index.html`). diff --git a/content/documentation/reference_manual/index.md b/content/documentation/reference_manual/index.md deleted file mode 100644 index cdff814..0000000 --- a/content/documentation/reference_manual/index.md +++ /dev/null @@ -1,10 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Reference Manual - -A reference manual contains some extensive descriptions about the features and the behaviour of the software. -Reading of this chapter is recommended once you have a good knowledge/understanding of Garage. -It will be useful if you want to tune it or to use it in some exotic conditions. diff --git a/content/documentation/reference_manual/layout.md b/content/documentation/reference_manual/layout.md deleted file mode 100644 index 3d325c7..0000000 --- a/content/documentation/reference_manual/layout.md +++ /dev/null @@ -1,79 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Creating and updating a cluster layout - -The cluster layout in Garage is a table that assigns to each node a role in -the cluster. The role of a node in Garage can either be a storage node with -a certain capacity, or a gateway node that does not store data and is only -used as an API entry point for faster cluster access. -An introduction to building cluster layouts can be found in the [production deployment](/cookbook/real_world.md) page. - -## How cluster layouts work in Garage - -In Garage, a cluster layout is composed of the following components: - -- a table of roles assigned to nodes -- a version number - -Garage nodes will always use the cluster layout with the highest version number. - -Garage nodes also maintain and synchronize between them a set of proposed role -changes that haven't yet been applied. These changes will be applied (or -canceled) in the next version of the layout - -The following commands insert modifications to the set of proposed role changes -for the next layout version (but they do not create the new layout immediately): - -```bash -garage layout assign [...] -garage layout remove [...] -``` - -The following command can be used to inspect the layout that is currently set in the cluster -and the changes proposed for the next layout version, if any: - -```bash -garage layout show -``` - -The following commands create a new layout with the specified version number, -that either takes into account the proposed changes or cancels them: - -```bash -garage layout apply --version -garage layout revert --version -``` - -The version number of the new layout to create must be 1 + the version number -of the previous layout that existed in the cluster. The `apply` and `revert` -commands will fail otherwise. - -## Warnings about Garage cluster layout management - -**Warning: never make several calls to `garage layout apply` or `garage layout -revert` with the same value of the `--version` flag. Doing so can lead to the -creation of several different layouts with the same version number, in which -case your Garage cluster will become inconsistent until fixed.** If a call to -`garage layout apply` or `garage layout revert` has failed and `garage layout -show` indicates that a new layout with the given version number has not been -set in the cluster, then it is fine to call the command again with the same -version number. - -If you are using the `garage` CLI by typing individual commands in your -shell, you shouldn't have much issues as long as you run commands one after -the other and take care of checking the output of `garage layout show` -before applying any changes. - -If you are using the `garage` CLI to script layout changes, follow the following recommendations: - -- Make all of your `garage` CLI calls to the same RPC host. Do not use the - `garage` CLI to connect to individual nodes to send them each a piece of the - layout changes you are making, as the changes propagate asynchronously - between nodes and might not all be taken into account at the time when the - new layout is applied. - -- **Only call `garage layout apply` once**, and call it **strictly after** all - of the `layout assign` and `layout remove` commands have returned. diff --git a/content/documentation/reference_manual/s3_compatibility.md b/content/documentation/reference_manual/s3_compatibility.md deleted file mode 100644 index 98b6170..0000000 --- a/content/documentation/reference_manual/s3_compatibility.md +++ /dev/null @@ -1,65 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# S3 Compatibility status - -## Global S3 features - -Implemented: - -- path-style URLs (`garage.tld/bucket/key`) -- vhost-style URLs (`bucket.garage.tld/key`) -- putting and getting objects in buckets -- multipart uploads -- listing objects -- access control on a per-key-per-bucket basis - -Not implemented: - -- object-level ACL -- [object versioning](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/166) -- encryption -- most `x-amz-` headers - - -## Endpoint implementation - -All APIs that are not mentionned are not implemented and will return a 501 Not Implemented. - -| Endpoint | Status | -|------------------------------|----------------------------------| -| AbortMultipartUpload | Implemented | -| CompleteMultipartUpload | Implemented | -| CopyObject | Implemented | -| CreateBucket | Implemented | -| CreateMultipartUpload | Implemented | -| DeleteBucket | Implemented | -| DeleteBucketWebsite | Implemented | -| DeleteObject | Implemented | -| DeleteObjects | Implemented | -| GetBucketLocation | Implemented | -| GetBucketVersioning | Stub (see below) | -| GetBucketWebsite | Implemented | -| GetObject | Implemented | -| HeadBucket | Implemented | -| HeadObject | Implemented | -| ListBuckets | Implemented | -| ListObjects | Implemented, bugs? (see below) | -| ListObjectsV2 | Implemented | -| ListMultipartUpload | Implemented | -| ListParts | Implemented | -| PutObject | Implemented | -| PutBucketWebsite | Partially implemented (see below)| -| UploadPart | Implemented | -| UploadPartCopy | Implemented | - - -- **GetBucketVersioning:** Stub implementation (Garage does not yet support versionning so this always returns -"versionning not enabled"). - -- **ListObjects:** Implemented, but there isn't a very good specification of what `encoding-type=url` covers so there might be some encoding bugs. In our implementation the url-encoded fields are in the same in ListObjects as they are in ListObjectsV2. - -- **PutBucketWebsite:** Implemented, but only stores the index document suffix and the error document path. Redirects are not supported. - diff --git a/content/documentation/working-documents/_index.md b/content/documentation/working-documents/_index.md new file mode 100644 index 0000000..8b65867 --- /dev/null +++ b/content/documentation/working-documents/_index.md @@ -0,0 +1,6 @@ ++++ +title = "Working Documents" +weight = 7 +sort_by = "weight" +redirect_to = "documentation/working-documents/compatibility-target/" ++++ \ No newline at end of file diff --git a/content/documentation/working-documents/compatibility_target.md b/content/documentation/working-documents/compatibility_target.md new file mode 100644 index 0000000..3f1b357 --- /dev/null +++ b/content/documentation/working-documents/compatibility_target.md @@ -0,0 +1,110 @@ ++++ +title = "S3 compatibility target" +weight = 5 ++++ + +# S3 compatibility target + +If there is a specific S3 functionnality you have a need for, feel free to open +a PR to put the corresponding endpoints higher in the list. Please explain +your motivations for doing so in the PR message. + +| Priority | Endpoints | +| -------------------------- | --------- | +| **S-tier** (high priority) | | +| | HeadBucket | +| | GetBucketLocation | +| | CreateBucket | +| | DeleteBucket | +| | ListBuckets | +| | ListObjects | +| | ListObjectsV2 | +| | HeadObject | +| | GetObject | +| | PutObject | +| | CopyObject | +| | DeleteObject | +| | DeleteObjects | +| | CreateMultipartUpload | +| | CompleteMultipartUpload | +| | AbortMultipartUpload | +| | UploadPart | +| | [*ListMultipartUploads*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/103) | +| | [*ListParts*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/103) | +| **A-tier** (will implement) | | +| | [*GetBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | +| | [*PutBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | +| | [*DeleteBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | +| | UploadPartCopy | +| | GetBucketWebsite | +| | PutBucketWebsite | +| | DeleteBucketWebsite | +| ~~~~~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | +| **B-tier** | | +| | GetBucketAcl | +| | PutBucketAcl | +| | GetObjectLockConfiguration | +| | PutObjectLockConfiguration | +| | GetObjectRetention | +| | PutObjectRetention | +| | GetObjectLegalHold | +| | PutObjectLegalHold | +| **C-tier** | | +| | GetBucketVersioning | +| | PutBucketVersioning | +| | ListObjectVersions | +| | GetObjectAcl | +| | PutObjectAcl | +| | GetBucketLifecycleConfiguration | +| | PutBucketLifecycleConfiguration | +| | DeleteBucketLifecycle | +| **garbage-tier** | | +| | DeleteBucketEncryption | +| | DeleteBucketAnalyticsConfiguration | +| | DeleteBucketIntelligentTieringConfiguration | +| | DeleteBucketInventoryConfiguration | +| | DeleteBucketMetricsConfiguration | +| | DeleteBucketOwnershipControls | +| | DeleteBucketPolicy | +| | DeleteBucketReplication | +| | DeleteBucketTagging | +| | DeleteObjectTagging | +| | DeletePublicAccessBlock | +| | GetBucketAccelerateConfiguration | +| | GetBucketAnalyticsConfiguration | +| | GetBucketEncryption | +| | GetBucketIntelligentTieringConfiguration | +| | GetBucketInventoryConfiguration | +| | GetBucketLogging | +| | GetBucketMetricsConfiguration | +| | GetBucketNotificationConfiguration | +| | GetBucketOwnershipControls | +| | GetBucketPolicy | +| | GetBucketPolicyStatus | +| | GetBucketReplication | +| | GetBucketRequestPayment | +| | GetBucketTagging | +| | GetObjectTagging | +| | GetObjectTorrent | +| | GetPublicAccessBlock | +| | ListBucketAnalyticsConfigurations | +| | ListBucketIntelligentTieringConfigurations | +| | ListBucketInventoryConfigurations | +| | ListBucketMetricsConfigurations | +| | PutBucketAccelerateConfiguration | +| | PutBucketAnalyticsConfiguration | +| | PutBucketEncryption | +| | PutBucketIntelligentTieringConfiguration | +| | PutBucketInventoryConfiguration | +| | PutBucketLogging | +| | PutBucketMetricsConfiguration | +| | PutBucketNotificationConfiguration | +| | PutBucketOwnershipControls | +| | PutBucketPolicy | +| | PutBucketReplication | +| | PutBucketRequestPayment | +| | PutBucketTagging | +| | PutObjectTagging | +| | PutPublicAccessBlock | +| | RestoreObject | +| | SelectObjectContent | diff --git a/content/documentation/working-documents/design_draft.md b/content/documentation/working-documents/design_draft.md new file mode 100644 index 0000000..90c2d1c --- /dev/null +++ b/content/documentation/working-documents/design_draft.md @@ -0,0 +1,167 @@ ++++ +title = "Design draft" +weight = 25 ++++ + +# Design draft + +**WARNING: this documentation is a design draft which was written before Garage's actual implementation. +The general principle are similar, but details have not been updated.** + + +#### Modules + +- `membership/`: configuration, membership management (gossip of node's presence and status), ring generation --> what about Serf (used by Consul/Nomad) : https://www.serf.io/? Seems a huge library with many features so maybe overkill/hard to integrate +- `metadata/`: metadata management +- `blocks/`: block management, writing, GC and rebalancing +- `internal/`: server to server communication (HTTP server and client that reuses connections, TLS if we want, etc) +- `api/`: S3 API +- `web/`: web management interface + +#### Metadata tables + +**Objects:** + +- *Hash key:* Bucket name (string) +- *Sort key:* Object key (string) +- *Sort key:* Version timestamp (int) +- *Sort key:* Version UUID (string) +- Complete: bool +- Inline: bool, true for objects < threshold (say 1024) +- Object size (int) +- Mime type (string) +- Data for inlined objects (blob) +- Hash of first block otherwise (string) + +*Having only a hash key on the bucket name will lead to storing all file entries of this table for a specific bucket on a single node. At the same time, it is the only way I see to rapidly being able to list all bucket entries...* + +**Blocks:** + +- *Hash key:* Version UUID (string) +- *Sort key:* Offset of block in total file (int) +- Hash of data block (string) + +A version is defined by the existence of at least one entry in the blocks table for a certain version UUID. +We must keep the following invariant: if a version exists in the blocks table, it has to be referenced in the objects table. +We explicitly manage concurrent versions of an object: the version timestamp and version UUID columns are index columns, thus we may have several concurrent versions of an object. +Important: before deleting an older version from the objects table, we must make sure that we did a successfull delete of the blocks of that version from the blocks table. + +Thus, the workflow for reading an object is as follows: + +1. Check permissions (LDAP) +2. Read entry in object table. If data is inline, we have its data, stop here. + -> if several versions, take newest one and launch deletion of old ones in background +3. Read first block from cluster. If size <= 1 block, stop here. +4. Simultaneously with previous step, if size > 1 block: query the Blocks table for the IDs of the next blocks +5. Read subsequent blocks from cluster + +Workflow for PUT: + +1. Check write permission (LDAP) +2. Select a new version UUID +3. Write a preliminary entry for the new version in the objects table with complete = false +4. Send blocks to cluster and write entries in the blocks table +5. Update the version with complete = true and all of the accurate information (size, etc) +6. Return success to the user +7. Launch a background job to check and delete older versions + +Workflow for DELETE: + +1. Check write permission (LDAP) +2. Get current version (or versions) in object table +3. Do the deletion of those versions NOT IN A BACKGROUND JOB THIS TIME +4. Return succes to the user if we were able to delete blocks from the blocks table and entries from the object table + +To delete a version: + +1. List the blocks from Cassandra +2. For each block, delete it from cluster. Don't care if some deletions fail, we can do GC. +3. Delete all of the blocks from the blocks table +4. Finally, delete the version from the objects table + +Known issue: if someone is reading from a version that we want to delete and the object is big, the read might be interrupted. I think it is ok to leave it like this, we just cut the connection if data disappears during a read. + +("Soit P un problème, on s'en fout est une solution à ce problème") + +#### Block storage on disk + +**Blocks themselves:** + +- file path = /blobs/(first 3 hex digits of hash)/(rest of hash) + +**Reverse index for GC & other block-level metadata:** + +- file path = /meta/(first 3 hex digits of hash)/(rest of hash) +- map block hash -> set of version UUIDs where it is referenced + +Usefull metadata: + +- list of versions that reference this block in the Casandra table, so that we can do GC by checking in Cassandra that the lines still exist +- list of other nodes that we know have acknowledged a write of this block, usefull in the rebalancing algorithm + +Write strategy: have a single thread that does all write IO so that it is serialized (or have several threads that manage independent parts of the hash space). When writing a blob, write it to a temporary file, close, then rename so that a concurrent read gets a consistent result (either not found or found with whole content). + +Read strategy: the only read operation is get(hash) that returns either the data or not found (can do a corruption check as well and return corrupted state if it is the case). Can be done concurrently with writes. + +**Internal API:** + +- get(block hash) -> ok+data/not found/corrupted +- put(block hash & data, version uuid + offset) -> ok/error +- put with no data(block hash, version uuid + offset) -> ok/not found plz send data/error +- delete(block hash, version uuid + offset) -> ok/error + +GC: when last ref is deleted, delete block. +Long GC procedure: check in Cassandra that version UUIDs still exist and references this block. + +Rebalancing: takes as argument the list of newly added nodes. + +- List all blocks that we have. For each block: +- If it hits a newly introduced node, send it to them. + Use put with no data first to check if it has to be sent to them already or not. + Use a random listing order to avoid race conditions (they do no harm but we might have two nodes sending the same thing at the same time thus wasting time). +- If it doesn't hit us anymore, delete it and its reference list. + +Only one balancing can be running at a same time. It can be restarted at the beginning with new parameters. + +#### Membership management + +Two sets of nodes: + +- set of nodes from which a ping was recently received, with status: number of stored blocks, request counters, error counters, GC%, rebalancing% + (eviction from this set after say 30 seconds without ping) +- set of nodes that are part of the system, explicitly modified by the operator using the web UI (persisted to disk), + is a CRDT using a version number for the value of the whole set + +Thus, three states for nodes: + +- healthy: in both sets +- missing: not pingable but part of desired cluster +- unused/draining: currently present but not part of the desired cluster, empty = if contains nothing, draining = if still contains some blocks + +Membership messages between nodes: + +- ping with current state + hash of current membership info -> reply with same info +- send&get back membership info (the ids of nodes that are in the two sets): used when no local membership change in a long time and membership info hash discrepancy detected with first message (passive membership fixing with full CRDT gossip) +- inform of newly pingable node(s) -> no result, when receive new info repeat to all (reliable broadcast) +- inform of operator membership change -> no result, when receive new info repeat to all (reliable broadcast) + +Ring: generated from the desired set of nodes, however when doing read/writes on the ring, skip nodes that are known to be not pingable. +The tokens are generated in a deterministic fashion from node IDs (hash of node id + token number from 1 to K). +Number K of tokens per node: decided by the operator & stored in the operator's list of nodes CRDT. Default value proposal: with node status information also broadcast disk total size and free space, and propose a default number of tokens equal to 80%Free space / 10Gb. (this is all user interface) + + +#### Constants + +- Block size: around 1MB ? --> Exoscale use 16MB chunks +- Number of tokens in the hash ring: one every 10Gb of allocated storage +- Threshold for storing data directly in Cassandra objects table: 1kb bytes (maybe up to 4kb?) +- Ping timeout (time after which a node is registered as unresponsive/missing): 30 seconds +- Ping interval: 10 seconds +- ?? + +#### Links + +- CDC: +- Erasure coding: +- [Openstack Storage Concepts](https://docs.openstack.org/arch-design/design-storage/design-storage-concepts.html) +- [RADOS](https://ceph.com/wp-content/uploads/2016/08/weil-rados-pdsw07.pdf) diff --git a/content/documentation/working-documents/load_balancing.md b/content/documentation/working-documents/load_balancing.md new file mode 100644 index 0000000..58a1bec --- /dev/null +++ b/content/documentation/working-documents/load_balancing.md @@ -0,0 +1,204 @@ ++++ +title = "Load balancing data" +weight = 10 ++++ + +# Load Balancing Data (planned for version 0.2) + +**This is being yet improved in release 0.5. The working document has not been updated yet, it still only applies to Garage 0.2 through 0.4.** + +I have conducted a quick study of different methods to load-balance data over different Garage nodes using consistent hashing. + +## Requirements + +- *good balancing*: two nodes that have the same announced capacity should receive close to the same number of items + +- *multi-datacenter*: the replicas of a partition should be distributed over as many datacenters as possible + +- *minimal disruption*: when adding or removing a node, as few partitions as possible should have to move around + +- *order-agnostic*: the same set of nodes (each associated with a datacenter name + and a capacity) should always return the same distribution of partition + replicas, independently of the order in which nodes were added/removed (this + is to keep the implementation simple) + +## Methods + +### Naive multi-DC ring walking strategy + +This strategy can be used with any ring-like algorithm to make it aware of the *multi-datacenter* requirement: + +In this method, the ring is a list of positions, each associated with a single node in the cluster. +Partitions contain all the keys between two consecutive items of the ring. +To find the nodes that store replicas of a given partition: + +- select the node for the position of the partition's lower bound +- go clockwise on the ring, skipping nodes that: + - we halve already selected + - are in a datacenter of a node we have selected, except if we already have nodes from all possible datacenters + +In this way the selected nodes will always be distributed over +`min(n_datacenters, n_replicas)` different datacenters, which is the best we +can do. + +This method was implemented in the first version of Garage, with the basic +ring construction from Dynamo DB that consists in associating `n_token` random positions to +each node (I know it's not optimal, the Dynamo paper already studies this). + +### Better rings + +The ring construction that selects `n_token` random positions for each nodes gives a ring of positions that +is not well-balanced: the space between the tokens varies a lot, and some partitions are thus bigger than others. +This problem was demonstrated in the original Dynamo DB paper. + +To solve this, we want to apply a better second method for partitionning our dataset: + +1. fix an initially large number of partitions (say 1024) with evenly-spaced delimiters, + +2. attribute each partition randomly to a node, with a probability + proportionnal to its capacity (which `n_tokens` represented in the first + method) + +For now we continue using the multi-DC ring walking described above. + +I have studied two ways to do the attribution of partitions to nodes, in a way that is deterministic: + +- Min-hash: for each partition, select node that minimizes `hash(node, partition_number)` +- MagLev: see [here](https://blog.acolyer.org/2016/03/21/maglev-a-fast-and-reliable-software-network-load-balancer/) + +MagLev provided significantly better balancing, as it guarantees that the exact +same number of partitions is attributed to all nodes that have the same +capacity (and that this number is proportionnal to the node's capacity, except +for large values), however in both cases: + +- the distribution is still bad, because we use the naive multi-DC ring walking + that behaves strangely due to interactions between consecutive positions on + the ring + +- the disruption in case of adding/removing a node is not as low as it can be, + as we show with the following method. + +A quick description of MagLev (backend = node, lookup table = ring): + +> The basic idea of Maglev hashing is to assign a preference list of all the +> lookup table positions to each backend. Then all the backends take turns +> filling their most-preferred table positions that are still empty, until the +> lookup table is completely filled in. Hence, Maglev hashing gives an almost +> equal share of the lookup table to each of the backends. Heterogeneous +> backend weights can be achieved by altering the relative frequency of the +> backends’ turns… + +Here are some stats (run `scripts/simulate_ring.py` to reproduce): + +``` +##### Custom-ring (min-hash) ##### + +#partitions per node (capacity in parenthesis): +- datura (8) : 227 +- digitale (8) : 351 +- drosera (8) : 259 +- geant (16) : 476 +- gipsie (16) : 410 +- io (16) : 495 +- isou (8) : 231 +- mini (4) : 149 +- mixi (4) : 188 +- modi (4) : 127 +- moxi (4) : 159 + +Variance of load distribution for load normalized to intra-class mean +(a class being the set of nodes with the same announced capacity): 2.18% <-- REALLY BAD + +Disruption when removing nodes (partitions moved on 0/1/2/3 nodes): +removing atuin digitale : 63.09% 30.18% 6.64% 0.10% +removing atuin drosera : 72.36% 23.44% 4.10% 0.10% +removing atuin datura : 73.24% 21.48% 5.18% 0.10% +removing jupiter io : 48.34% 38.48% 12.30% 0.88% +removing jupiter isou : 74.12% 19.73% 6.05% 0.10% +removing grog mini : 84.47% 12.40% 2.93% 0.20% +removing grog mixi : 80.76% 16.60% 2.64% 0.00% +removing grog moxi : 83.59% 14.06% 2.34% 0.00% +removing grog modi : 87.01% 11.43% 1.46% 0.10% +removing grisou geant : 48.24% 37.40% 13.67% 0.68% +removing grisou gipsie : 53.03% 33.59% 13.09% 0.29% +on average: 69.84% 23.53% 6.40% 0.23% <-- COULD BE BETTER + +-------- + +##### MagLev ##### + +#partitions per node: +- datura (8) : 273 +- digitale (8) : 256 +- drosera (8) : 267 +- geant (16) : 452 +- gipsie (16) : 427 +- io (16) : 483 +- isou (8) : 272 +- mini (4) : 184 +- mixi (4) : 160 +- modi (4) : 144 +- moxi (4) : 154 + +Variance of load distribution: 0.37% <-- Already much better, but not optimal + +Disruption when removing nodes (partitions moved on 0/1/2/3 nodes): +removing atuin digitale : 62.60% 29.20% 7.91% 0.29% +removing atuin drosera : 65.92% 26.56% 7.23% 0.29% +removing atuin datura : 63.96% 27.83% 7.71% 0.49% +removing jupiter io : 44.63% 40.33% 14.06% 0.98% +removing jupiter isou : 63.38% 27.25% 8.98% 0.39% +removing grog mini : 72.46% 21.00% 6.35% 0.20% +removing grog mixi : 72.95% 22.46% 4.39% 0.20% +removing grog moxi : 74.22% 20.61% 4.98% 0.20% +removing grog modi : 75.98% 18.36% 5.27% 0.39% +removing grisou geant : 46.97% 36.62% 15.04% 1.37% +removing grisou gipsie : 49.22% 36.52% 12.79% 1.46% +on average: 62.94% 27.89% 8.61% 0.57% <-- WORSE THAN PREVIOUSLY +``` + +### The magical solution: multi-DC aware MagLev + +Suppose we want to select three replicas for each partition (this is what we do in our simulation and in most Garage deployments). +We apply MagLev three times consecutively, one for each replica selection. +The first time is pretty much the same as normal MagLev, but for the following times, when a node runs through its preference +list to select a partition to replicate, we skip partitions for which adding this node would not bring datacenter-diversity. +More precisely, we skip a partition in the preference list if: + +- the node already replicates the partition (from one of the previous rounds of MagLev) +- the node is in a datacenter where a node already replicates the partition and there are other datacenters available + +Refer to `method4` in the simulation script for a formal definition. + +``` +##### Multi-DC aware MagLev ##### + +#partitions per node: +- datura (8) : 268 <-- NODES WITH THE SAME CAPACITY +- digitale (8) : 267 HAVE THE SAME NUM OF PARTITIONS +- drosera (8) : 267 (+- 1) +- geant (16) : 470 +- gipsie (16) : 472 +- io (16) : 516 +- isou (8) : 268 +- mini (4) : 136 +- mixi (4) : 136 +- modi (4) : 136 +- moxi (4) : 136 + +Variance of load distribution: 0.06% <-- CAN'T DO BETTER THAN THIS + +Disruption when removing nodes (partitions moved on 0/1/2/3 nodes): +removing atuin digitale : 65.72% 33.01% 1.27% 0.00% +removing atuin drosera : 64.65% 33.89% 1.37% 0.10% +removing atuin datura : 66.11% 32.62% 1.27% 0.00% +removing jupiter io : 42.97% 53.42% 3.61% 0.00% +removing jupiter isou : 66.11% 32.32% 1.56% 0.00% +removing grog mini : 80.47% 18.85% 0.68% 0.00% +removing grog mixi : 80.27% 18.85% 0.88% 0.00% +removing grog moxi : 80.18% 19.04% 0.78% 0.00% +removing grog modi : 79.69% 19.92% 0.39% 0.00% +removing grisou geant : 44.63% 52.15% 3.22% 0.00% +removing grisou gipsie : 43.55% 52.54% 3.91% 0.00% +on average: 64.94% 33.33% 1.72% 0.01% <-- VERY GOOD (VERY LOW VALUES FOR 2 AND 3 NODES) +``` diff --git a/content/documentation/working-documents/migration_04.md b/content/documentation/working-documents/migration_04.md new file mode 100644 index 0000000..55dfdfe --- /dev/null +++ b/content/documentation/working-documents/migration_04.md @@ -0,0 +1,110 @@ ++++ +title = "Migrating from 0.3 to 0.4" +weight = 20 ++++ + +# Migrating from 0.3 to 0.4 + +**Migrating from 0.3 to 0.4 is unsupported. This document is only intended to +document the process internally for the Deuxfleurs cluster where we have to do +it. Do not try it yourself, you will lose your data and we will not help you.** + +**Migrating from 0.2 to 0.4 will break everything for sure. Never try it.** + +The internal data format of Garage hasn't changed much between 0.3 and 0.4. +The Sled database is still the same, and the data directory as well. + +The following has changed, all in the meta directory: + +- `node_id` in 0.3 contains the identifier of the current node. In 0.4, this + file does nothing and should be deleted. It is replaced by `node_key` (the + secret key) and `node_key.pub` (the associated public key). A node's + identifier on the ring is its public key. + +- `peer_info` in 0.3 contains the list of peers saved automatically by Garage. + The format has changed and it is now stored in `peer_list` (`peer_info` + should be deleted). + +When migrating, all node identifiers will change. This also means that the +affectation of data partitions on the ring will change, and lots of data will +have to be rebalanced. + +- If your cluster has only 3 nodes, all nodes store everything, therefore nothing has to be rebalanced. + +- If your cluster has only 4 nodes, for any partition there will always be at + least 2 nodes that stored data before that still store it after. Therefore + the migration should in theory be transparent and Garage should continue to + work during the rebalance. + +- If your cluster has 5 or more nodes, data will disappear during the + migration. Do not migrate (fortunately we don't have this scenario at + Deuxfleurs), or if you do, make Garage unavailable until things stabilize + (disable web and api access). + + +The migration steps are as follows: + +1. Prepare a new configuration file for 0.4. For each node, point to the same + meta and data directories as Garage 0.3. Basically, the things that change + are the following: + + - No more `rpc_tls` section + - You have to generate a shared `rpc_secret` and put it in all config files + - `bootstrap_peers` has a different syntax as it has to contain node keys. + Leave it empty and use `garage node-id` and `garage node connect` instead (new features of 0.4) + - put the publicly accessible RPC address of your node in `rpc_public_addr` if possible (its optional but recommended) + - If you are using Consul, change the `consul_service_name` to NOT be the name advertised by Nomad. + Now Garage is responsible for advertising its own service itself. + +2. Disable api and web access for some time (Garage does not support disabling + these endpoints but you can change the port number or stop your reverse + proxy for instance). + +3. Do `garage repair -a --yes tables` and `garage repair -a --yes blocks`, + check the logs and check that all data seems to be synced correctly between + nodes. + +4. Save somewhere the output of `garage status`. We will need this to remember + how to reconfigure nodes in 0.4. + +5. Turn off Garage 0.3 + +6. Backup metadata folders if you can (i.e. if you have space to do it + somewhere). Backuping data folders could also be usefull but that's much + harder to do. If your filesystem supports snapshots, this could be a good + time to use them. + +7. Turn on Garage 0.4 + +8. At this point, running `garage status` should indicate that all nodes of the + previous cluster are "unavailable". The nodes have new identifiers that + should appear in healthy nodes once they can talk to one another (use + `garage node connect` if necessary`). They should have NO ROLE ASSIGNED at + the moment. + +9. Prepare a script with several `garage node configure` commands that replace + each of the v0.3 node ID with the corresponding v0.4 node ID, with the same + zone/tag/capacity. For example if your node `drosera` had identifier `c24e` + before and now has identifier `789a`, and it was configured with capacity + `2` in zone `dc1`, put the following command in your script: + +```bash +garage node configure 789a -z dc1 -c 2 -t drosera --replace c24e +``` + +10. Run your reconfiguration script. Check that the new output of `garage + status` contains the correct node IDs with the correct values for capacity + and zone. Old nodes should no longer be mentioned. + +11. If your cluster has 4 nodes or less, and you are feeling adventurous, you + can reenable Web and API access now. Things will probably work. + +12. Garage might already be resyncing stuff. Issue a `garage repair -a --yes + tables` and `garage repair -a --yes blocks` to force it to do so. + +13. Wait for resyncing activity to stop in the logs. Do steps 12 and 13 two or + three times, until you see that when you issue the repair commands, nothing + gets resynced any longer. + +14. Your upgraded cluster should be in a working state. Re-enable API and Web + access and check that everything went well. diff --git a/content/documentation/working-documents/migration_06.md b/content/documentation/working-documents/migration_06.md new file mode 100644 index 0000000..692e309 --- /dev/null +++ b/content/documentation/working-documents/migration_06.md @@ -0,0 +1,51 @@ ++++ +title = "Migrating from 0.5 to 0.6" +weight = 15 ++++ + +# Migrating from 0.5 to 0.6 + +**This guide explains how to migrate to 0.6 if you have an existing 0.5 cluster. +We don't recommend trying to migrate directly from 0.4 or older to 0.6.** + +**We make no guarantee that this migration will work perfectly: +back up all your data before attempting it!** + +Garage v0.6 (not yet released) introduces a new data model for buckets, +that allows buckets to have many names (aliases). +Buckets can also have "private" aliases (called local aliases), +which are only visible when using a certain access key. + +This new data model means that the metadata tables have changed quite a bit in structure, +and a manual migration step is required. + +The migration steps are as follows: + +1. Disable api and web access for some time (Garage does not support disabling + these endpoints but you can change the port number or stop your reverse + proxy for instance). + +2. Do `garage repair -a --yes tables` and `garage repair -a --yes blocks`, + check the logs and check that all data seems to be synced correctly between + nodes. + +4. Turn off Garage 0.5 + +5. **Backup your metadata folders!!** + +6. Turn on Garage 0.6 + +7. At this point, `garage bucket list` should indicate that no buckets are present + in the cluster. `garage key list` should show all of the previously existing + access key, however these keys should not have any permissions to access buckets. + +8. Run `garage migrate buckets050`: this will populate the new bucket table with + the buckets that existed previously. This will also give access to API keys + as it was before. + +9. Check that all your buckets indeed appear in `garage bucket list`, and that + keys have the proper access flags set. If that is not the case, revert + everything and file a bug! + +10. Your upgraded cluster should be in a working state. Re-enable API and Web + access and check that everything went well. diff --git a/content/documentation/working_documents/compatibility_target.md b/content/documentation/working_documents/compatibility_target.md deleted file mode 100644 index 51c6e2e..0000000 --- a/content/documentation/working_documents/compatibility_target.md +++ /dev/null @@ -1,110 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# S3 compatibility target - -If there is a specific S3 functionnality you have a need for, feel free to open -a PR to put the corresponding endpoints higher in the list. Please explain -your motivations for doing so in the PR message. - -| Priority | Endpoints | -| -------------------------- | --------- | -| **S-tier** (high priority) | | -| | HeadBucket | -| | GetBucketLocation | -| | CreateBucket | -| | DeleteBucket | -| | ListBuckets | -| | ListObjects | -| | ListObjectsV2 | -| | HeadObject | -| | GetObject | -| | PutObject | -| | CopyObject | -| | DeleteObject | -| | DeleteObjects | -| | CreateMultipartUpload | -| | CompleteMultipartUpload | -| | AbortMultipartUpload | -| | UploadPart | -| | [*ListMultipartUploads*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/103) | -| | [*ListParts*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/103) | -| **A-tier** (will implement) | | -| | [*GetBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | -| | [*PutBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | -| | [*DeleteBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | -| | UploadPartCopy | -| | GetBucketWebsite | -| | PutBucketWebsite | -| | DeleteBucketWebsite | -| ~~~~~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | -| **B-tier** | | -| | GetBucketAcl | -| | PutBucketAcl | -| | GetObjectLockConfiguration | -| | PutObjectLockConfiguration | -| | GetObjectRetention | -| | PutObjectRetention | -| | GetObjectLegalHold | -| | PutObjectLegalHold | -| **C-tier** | | -| | GetBucketVersioning | -| | PutBucketVersioning | -| | ListObjectVersions | -| | GetObjectAcl | -| | PutObjectAcl | -| | GetBucketLifecycleConfiguration | -| | PutBucketLifecycleConfiguration | -| | DeleteBucketLifecycle | -| **garbage-tier** | | -| | DeleteBucketEncryption | -| | DeleteBucketAnalyticsConfiguration | -| | DeleteBucketIntelligentTieringConfiguration | -| | DeleteBucketInventoryConfiguration | -| | DeleteBucketMetricsConfiguration | -| | DeleteBucketOwnershipControls | -| | DeleteBucketPolicy | -| | DeleteBucketReplication | -| | DeleteBucketTagging | -| | DeleteObjectTagging | -| | DeletePublicAccessBlock | -| | GetBucketAccelerateConfiguration | -| | GetBucketAnalyticsConfiguration | -| | GetBucketEncryption | -| | GetBucketIntelligentTieringConfiguration | -| | GetBucketInventoryConfiguration | -| | GetBucketLogging | -| | GetBucketMetricsConfiguration | -| | GetBucketNotificationConfiguration | -| | GetBucketOwnershipControls | -| | GetBucketPolicy | -| | GetBucketPolicyStatus | -| | GetBucketReplication | -| | GetBucketRequestPayment | -| | GetBucketTagging | -| | GetObjectTagging | -| | GetObjectTorrent | -| | GetPublicAccessBlock | -| | ListBucketAnalyticsConfigurations | -| | ListBucketIntelligentTieringConfigurations | -| | ListBucketInventoryConfigurations | -| | ListBucketMetricsConfigurations | -| | PutBucketAccelerateConfiguration | -| | PutBucketAnalyticsConfiguration | -| | PutBucketEncryption | -| | PutBucketIntelligentTieringConfiguration | -| | PutBucketInventoryConfiguration | -| | PutBucketLogging | -| | PutBucketMetricsConfiguration | -| | PutBucketNotificationConfiguration | -| | PutBucketOwnershipControls | -| | PutBucketPolicy | -| | PutBucketReplication | -| | PutBucketRequestPayment | -| | PutBucketTagging | -| | PutObjectTagging | -| | PutPublicAccessBlock | -| | RestoreObject | -| | SelectObjectContent | diff --git a/content/documentation/working_documents/design_draft.md b/content/documentation/working_documents/design_draft.md deleted file mode 100644 index 2ac3e0d..0000000 --- a/content/documentation/working_documents/design_draft.md +++ /dev/null @@ -1,167 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Design draft - -**WARNING: this documentation is a design draft which was written before Garage's actual implementation. -The general principle are similar, but details have not been updated.** - - -#### Modules - -- `membership/`: configuration, membership management (gossip of node's presence and status), ring generation --> what about Serf (used by Consul/Nomad) : https://www.serf.io/? Seems a huge library with many features so maybe overkill/hard to integrate -- `metadata/`: metadata management -- `blocks/`: block management, writing, GC and rebalancing -- `internal/`: server to server communication (HTTP server and client that reuses connections, TLS if we want, etc) -- `api/`: S3 API -- `web/`: web management interface - -#### Metadata tables - -**Objects:** - -- *Hash key:* Bucket name (string) -- *Sort key:* Object key (string) -- *Sort key:* Version timestamp (int) -- *Sort key:* Version UUID (string) -- Complete: bool -- Inline: bool, true for objects < threshold (say 1024) -- Object size (int) -- Mime type (string) -- Data for inlined objects (blob) -- Hash of first block otherwise (string) - -*Having only a hash key on the bucket name will lead to storing all file entries of this table for a specific bucket on a single node. At the same time, it is the only way I see to rapidly being able to list all bucket entries...* - -**Blocks:** - -- *Hash key:* Version UUID (string) -- *Sort key:* Offset of block in total file (int) -- Hash of data block (string) - -A version is defined by the existence of at least one entry in the blocks table for a certain version UUID. -We must keep the following invariant: if a version exists in the blocks table, it has to be referenced in the objects table. -We explicitly manage concurrent versions of an object: the version timestamp and version UUID columns are index columns, thus we may have several concurrent versions of an object. -Important: before deleting an older version from the objects table, we must make sure that we did a successfull delete of the blocks of that version from the blocks table. - -Thus, the workflow for reading an object is as follows: - -1. Check permissions (LDAP) -2. Read entry in object table. If data is inline, we have its data, stop here. - -> if several versions, take newest one and launch deletion of old ones in background -3. Read first block from cluster. If size <= 1 block, stop here. -4. Simultaneously with previous step, if size > 1 block: query the Blocks table for the IDs of the next blocks -5. Read subsequent blocks from cluster - -Workflow for PUT: - -1. Check write permission (LDAP) -2. Select a new version UUID -3. Write a preliminary entry for the new version in the objects table with complete = false -4. Send blocks to cluster and write entries in the blocks table -5. Update the version with complete = true and all of the accurate information (size, etc) -6. Return success to the user -7. Launch a background job to check and delete older versions - -Workflow for DELETE: - -1. Check write permission (LDAP) -2. Get current version (or versions) in object table -3. Do the deletion of those versions NOT IN A BACKGROUND JOB THIS TIME -4. Return succes to the user if we were able to delete blocks from the blocks table and entries from the object table - -To delete a version: - -1. List the blocks from Cassandra -2. For each block, delete it from cluster. Don't care if some deletions fail, we can do GC. -3. Delete all of the blocks from the blocks table -4. Finally, delete the version from the objects table - -Known issue: if someone is reading from a version that we want to delete and the object is big, the read might be interrupted. I think it is ok to leave it like this, we just cut the connection if data disappears during a read. - -("Soit P un problème, on s'en fout est une solution à ce problème") - -#### Block storage on disk - -**Blocks themselves:** - -- file path = /blobs/(first 3 hex digits of hash)/(rest of hash) - -**Reverse index for GC & other block-level metadata:** - -- file path = /meta/(first 3 hex digits of hash)/(rest of hash) -- map block hash -> set of version UUIDs where it is referenced - -Usefull metadata: - -- list of versions that reference this block in the Casandra table, so that we can do GC by checking in Cassandra that the lines still exist -- list of other nodes that we know have acknowledged a write of this block, usefull in the rebalancing algorithm - -Write strategy: have a single thread that does all write IO so that it is serialized (or have several threads that manage independent parts of the hash space). When writing a blob, write it to a temporary file, close, then rename so that a concurrent read gets a consistent result (either not found or found with whole content). - -Read strategy: the only read operation is get(hash) that returns either the data or not found (can do a corruption check as well and return corrupted state if it is the case). Can be done concurrently with writes. - -**Internal API:** - -- get(block hash) -> ok+data/not found/corrupted -- put(block hash & data, version uuid + offset) -> ok/error -- put with no data(block hash, version uuid + offset) -> ok/not found plz send data/error -- delete(block hash, version uuid + offset) -> ok/error - -GC: when last ref is deleted, delete block. -Long GC procedure: check in Cassandra that version UUIDs still exist and references this block. - -Rebalancing: takes as argument the list of newly added nodes. - -- List all blocks that we have. For each block: -- If it hits a newly introduced node, send it to them. - Use put with no data first to check if it has to be sent to them already or not. - Use a random listing order to avoid race conditions (they do no harm but we might have two nodes sending the same thing at the same time thus wasting time). -- If it doesn't hit us anymore, delete it and its reference list. - -Only one balancing can be running at a same time. It can be restarted at the beginning with new parameters. - -#### Membership management - -Two sets of nodes: - -- set of nodes from which a ping was recently received, with status: number of stored blocks, request counters, error counters, GC%, rebalancing% - (eviction from this set after say 30 seconds without ping) -- set of nodes that are part of the system, explicitly modified by the operator using the web UI (persisted to disk), - is a CRDT using a version number for the value of the whole set - -Thus, three states for nodes: - -- healthy: in both sets -- missing: not pingable but part of desired cluster -- unused/draining: currently present but not part of the desired cluster, empty = if contains nothing, draining = if still contains some blocks - -Membership messages between nodes: - -- ping with current state + hash of current membership info -> reply with same info -- send&get back membership info (the ids of nodes that are in the two sets): used when no local membership change in a long time and membership info hash discrepancy detected with first message (passive membership fixing with full CRDT gossip) -- inform of newly pingable node(s) -> no result, when receive new info repeat to all (reliable broadcast) -- inform of operator membership change -> no result, when receive new info repeat to all (reliable broadcast) - -Ring: generated from the desired set of nodes, however when doing read/writes on the ring, skip nodes that are known to be not pingable. -The tokens are generated in a deterministic fashion from node IDs (hash of node id + token number from 1 to K). -Number K of tokens per node: decided by the operator & stored in the operator's list of nodes CRDT. Default value proposal: with node status information also broadcast disk total size and free space, and propose a default number of tokens equal to 80%Free space / 10Gb. (this is all user interface) - - -#### Constants - -- Block size: around 1MB ? --> Exoscale use 16MB chunks -- Number of tokens in the hash ring: one every 10Gb of allocated storage -- Threshold for storing data directly in Cassandra objects table: 1kb bytes (maybe up to 4kb?) -- Ping timeout (time after which a node is registered as unresponsive/missing): 30 seconds -- Ping interval: 10 seconds -- ?? - -#### Links - -- CDC: -- Erasure coding: -- [Openstack Storage Concepts](https://docs.openstack.org/arch-design/design-storage/design-storage-concepts.html) -- [RADOS](https://ceph.com/wp-content/uploads/2016/08/weil-rados-pdsw07.pdf) diff --git a/content/documentation/working_documents/index.md b/content/documentation/working_documents/index.md deleted file mode 100644 index 8c0f1f0..0000000 --- a/content/documentation/working_documents/index.md +++ /dev/null @@ -1,13 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Working Documents - -Working documents are documents that reflect the fact that Garage is a software that evolves quickly. -They are a way to communicate our ideas, our changes, and so on before or while we are implementing them in Garage. -If you like to live on the edge, it could also serve as a documentation of our next features to be released. - -Ideally, once the feature/patch has been merged, the working document should serve as a source to -update the rest of the documentation and then be removed. diff --git a/content/documentation/working_documents/load_balancing.md b/content/documentation/working_documents/load_balancing.md deleted file mode 100644 index e6fa3e8..0000000 --- a/content/documentation/working_documents/load_balancing.md +++ /dev/null @@ -1,204 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Load Balancing Data (planned for version 0.2) - -**This is being yet improved in release 0.5. The working document has not been updated yet, it still only applies to Garage 0.2 through 0.4.** - -I have conducted a quick study of different methods to load-balance data over different Garage nodes using consistent hashing. - -## Requirements - -- *good balancing*: two nodes that have the same announced capacity should receive close to the same number of items - -- *multi-datacenter*: the replicas of a partition should be distributed over as many datacenters as possible - -- *minimal disruption*: when adding or removing a node, as few partitions as possible should have to move around - -- *order-agnostic*: the same set of nodes (each associated with a datacenter name - and a capacity) should always return the same distribution of partition - replicas, independently of the order in which nodes were added/removed (this - is to keep the implementation simple) - -## Methods - -### Naive multi-DC ring walking strategy - -This strategy can be used with any ring-like algorithm to make it aware of the *multi-datacenter* requirement: - -In this method, the ring is a list of positions, each associated with a single node in the cluster. -Partitions contain all the keys between two consecutive items of the ring. -To find the nodes that store replicas of a given partition: - -- select the node for the position of the partition's lower bound -- go clockwise on the ring, skipping nodes that: - - we halve already selected - - are in a datacenter of a node we have selected, except if we already have nodes from all possible datacenters - -In this way the selected nodes will always be distributed over -`min(n_datacenters, n_replicas)` different datacenters, which is the best we -can do. - -This method was implemented in the first version of Garage, with the basic -ring construction from Dynamo DB that consists in associating `n_token` random positions to -each node (I know it's not optimal, the Dynamo paper already studies this). - -### Better rings - -The ring construction that selects `n_token` random positions for each nodes gives a ring of positions that -is not well-balanced: the space between the tokens varies a lot, and some partitions are thus bigger than others. -This problem was demonstrated in the original Dynamo DB paper. - -To solve this, we want to apply a better second method for partitionning our dataset: - -1. fix an initially large number of partitions (say 1024) with evenly-spaced delimiters, - -2. attribute each partition randomly to a node, with a probability - proportionnal to its capacity (which `n_tokens` represented in the first - method) - -For now we continue using the multi-DC ring walking described above. - -I have studied two ways to do the attribution of partitions to nodes, in a way that is deterministic: - -- Min-hash: for each partition, select node that minimizes `hash(node, partition_number)` -- MagLev: see [here](https://blog.acolyer.org/2016/03/21/maglev-a-fast-and-reliable-software-network-load-balancer/) - -MagLev provided significantly better balancing, as it guarantees that the exact -same number of partitions is attributed to all nodes that have the same -capacity (and that this number is proportionnal to the node's capacity, except -for large values), however in both cases: - -- the distribution is still bad, because we use the naive multi-DC ring walking - that behaves strangely due to interactions between consecutive positions on - the ring - -- the disruption in case of adding/removing a node is not as low as it can be, - as we show with the following method. - -A quick description of MagLev (backend = node, lookup table = ring): - -> The basic idea of Maglev hashing is to assign a preference list of all the -> lookup table positions to each backend. Then all the backends take turns -> filling their most-preferred table positions that are still empty, until the -> lookup table is completely filled in. Hence, Maglev hashing gives an almost -> equal share of the lookup table to each of the backends. Heterogeneous -> backend weights can be achieved by altering the relative frequency of the -> backends’ turns… - -Here are some stats (run `scripts/simulate_ring.py` to reproduce): - -``` -##### Custom-ring (min-hash) ##### - -#partitions per node (capacity in parenthesis): -- datura (8) : 227 -- digitale (8) : 351 -- drosera (8) : 259 -- geant (16) : 476 -- gipsie (16) : 410 -- io (16) : 495 -- isou (8) : 231 -- mini (4) : 149 -- mixi (4) : 188 -- modi (4) : 127 -- moxi (4) : 159 - -Variance of load distribution for load normalized to intra-class mean -(a class being the set of nodes with the same announced capacity): 2.18% <-- REALLY BAD - -Disruption when removing nodes (partitions moved on 0/1/2/3 nodes): -removing atuin digitale : 63.09% 30.18% 6.64% 0.10% -removing atuin drosera : 72.36% 23.44% 4.10% 0.10% -removing atuin datura : 73.24% 21.48% 5.18% 0.10% -removing jupiter io : 48.34% 38.48% 12.30% 0.88% -removing jupiter isou : 74.12% 19.73% 6.05% 0.10% -removing grog mini : 84.47% 12.40% 2.93% 0.20% -removing grog mixi : 80.76% 16.60% 2.64% 0.00% -removing grog moxi : 83.59% 14.06% 2.34% 0.00% -removing grog modi : 87.01% 11.43% 1.46% 0.10% -removing grisou geant : 48.24% 37.40% 13.67% 0.68% -removing grisou gipsie : 53.03% 33.59% 13.09% 0.29% -on average: 69.84% 23.53% 6.40% 0.23% <-- COULD BE BETTER - --------- - -##### MagLev ##### - -#partitions per node: -- datura (8) : 273 -- digitale (8) : 256 -- drosera (8) : 267 -- geant (16) : 452 -- gipsie (16) : 427 -- io (16) : 483 -- isou (8) : 272 -- mini (4) : 184 -- mixi (4) : 160 -- modi (4) : 144 -- moxi (4) : 154 - -Variance of load distribution: 0.37% <-- Already much better, but not optimal - -Disruption when removing nodes (partitions moved on 0/1/2/3 nodes): -removing atuin digitale : 62.60% 29.20% 7.91% 0.29% -removing atuin drosera : 65.92% 26.56% 7.23% 0.29% -removing atuin datura : 63.96% 27.83% 7.71% 0.49% -removing jupiter io : 44.63% 40.33% 14.06% 0.98% -removing jupiter isou : 63.38% 27.25% 8.98% 0.39% -removing grog mini : 72.46% 21.00% 6.35% 0.20% -removing grog mixi : 72.95% 22.46% 4.39% 0.20% -removing grog moxi : 74.22% 20.61% 4.98% 0.20% -removing grog modi : 75.98% 18.36% 5.27% 0.39% -removing grisou geant : 46.97% 36.62% 15.04% 1.37% -removing grisou gipsie : 49.22% 36.52% 12.79% 1.46% -on average: 62.94% 27.89% 8.61% 0.57% <-- WORSE THAN PREVIOUSLY -``` - -### The magical solution: multi-DC aware MagLev - -Suppose we want to select three replicas for each partition (this is what we do in our simulation and in most Garage deployments). -We apply MagLev three times consecutively, one for each replica selection. -The first time is pretty much the same as normal MagLev, but for the following times, when a node runs through its preference -list to select a partition to replicate, we skip partitions for which adding this node would not bring datacenter-diversity. -More precisely, we skip a partition in the preference list if: - -- the node already replicates the partition (from one of the previous rounds of MagLev) -- the node is in a datacenter where a node already replicates the partition and there are other datacenters available - -Refer to `method4` in the simulation script for a formal definition. - -``` -##### Multi-DC aware MagLev ##### - -#partitions per node: -- datura (8) : 268 <-- NODES WITH THE SAME CAPACITY -- digitale (8) : 267 HAVE THE SAME NUM OF PARTITIONS -- drosera (8) : 267 (+- 1) -- geant (16) : 470 -- gipsie (16) : 472 -- io (16) : 516 -- isou (8) : 268 -- mini (4) : 136 -- mixi (4) : 136 -- modi (4) : 136 -- moxi (4) : 136 - -Variance of load distribution: 0.06% <-- CAN'T DO BETTER THAN THIS - -Disruption when removing nodes (partitions moved on 0/1/2/3 nodes): -removing atuin digitale : 65.72% 33.01% 1.27% 0.00% -removing atuin drosera : 64.65% 33.89% 1.37% 0.10% -removing atuin datura : 66.11% 32.62% 1.27% 0.00% -removing jupiter io : 42.97% 53.42% 3.61% 0.00% -removing jupiter isou : 66.11% 32.32% 1.56% 0.00% -removing grog mini : 80.47% 18.85% 0.68% 0.00% -removing grog mixi : 80.27% 18.85% 0.88% 0.00% -removing grog moxi : 80.18% 19.04% 0.78% 0.00% -removing grog modi : 79.69% 19.92% 0.39% 0.00% -removing grisou geant : 44.63% 52.15% 3.22% 0.00% -removing grisou gipsie : 43.55% 52.54% 3.91% 0.00% -on average: 64.94% 33.33% 1.72% 0.01% <-- VERY GOOD (VERY LOW VALUES FOR 2 AND 3 NODES) -``` diff --git a/content/documentation/working_documents/migration_04.md b/content/documentation/working_documents/migration_04.md deleted file mode 100644 index 92ec71e..0000000 --- a/content/documentation/working_documents/migration_04.md +++ /dev/null @@ -1,110 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Migrating from 0.3 to 0.4 - -**Migrating from 0.3 to 0.4 is unsupported. This document is only intended to -document the process internally for the Deuxfleurs cluster where we have to do -it. Do not try it yourself, you will lose your data and we will not help you.** - -**Migrating from 0.2 to 0.4 will break everything for sure. Never try it.** - -The internal data format of Garage hasn't changed much between 0.3 and 0.4. -The Sled database is still the same, and the data directory as well. - -The following has changed, all in the meta directory: - -- `node_id` in 0.3 contains the identifier of the current node. In 0.4, this - file does nothing and should be deleted. It is replaced by `node_key` (the - secret key) and `node_key.pub` (the associated public key). A node's - identifier on the ring is its public key. - -- `peer_info` in 0.3 contains the list of peers saved automatically by Garage. - The format has changed and it is now stored in `peer_list` (`peer_info` - should be deleted). - -When migrating, all node identifiers will change. This also means that the -affectation of data partitions on the ring will change, and lots of data will -have to be rebalanced. - -- If your cluster has only 3 nodes, all nodes store everything, therefore nothing has to be rebalanced. - -- If your cluster has only 4 nodes, for any partition there will always be at - least 2 nodes that stored data before that still store it after. Therefore - the migration should in theory be transparent and Garage should continue to - work during the rebalance. - -- If your cluster has 5 or more nodes, data will disappear during the - migration. Do not migrate (fortunately we don't have this scenario at - Deuxfleurs), or if you do, make Garage unavailable until things stabilize - (disable web and api access). - - -The migration steps are as follows: - -1. Prepare a new configuration file for 0.4. For each node, point to the same - meta and data directories as Garage 0.3. Basically, the things that change - are the following: - - - No more `rpc_tls` section - - You have to generate a shared `rpc_secret` and put it in all config files - - `bootstrap_peers` has a different syntax as it has to contain node keys. - Leave it empty and use `garage node-id` and `garage node connect` instead (new features of 0.4) - - put the publicly accessible RPC address of your node in `rpc_public_addr` if possible (its optional but recommended) - - If you are using Consul, change the `consul_service_name` to NOT be the name advertised by Nomad. - Now Garage is responsible for advertising its own service itself. - -2. Disable api and web access for some time (Garage does not support disabling - these endpoints but you can change the port number or stop your reverse - proxy for instance). - -3. Do `garage repair -a --yes tables` and `garage repair -a --yes blocks`, - check the logs and check that all data seems to be synced correctly between - nodes. - -4. Save somewhere the output of `garage status`. We will need this to remember - how to reconfigure nodes in 0.4. - -5. Turn off Garage 0.3 - -6. Backup metadata folders if you can (i.e. if you have space to do it - somewhere). Backuping data folders could also be usefull but that's much - harder to do. If your filesystem supports snapshots, this could be a good - time to use them. - -7. Turn on Garage 0.4 - -8. At this point, running `garage status` should indicate that all nodes of the - previous cluster are "unavailable". The nodes have new identifiers that - should appear in healthy nodes once they can talk to one another (use - `garage node connect` if necessary`). They should have NO ROLE ASSIGNED at - the moment. - -9. Prepare a script with several `garage node configure` commands that replace - each of the v0.3 node ID with the corresponding v0.4 node ID, with the same - zone/tag/capacity. For example if your node `drosera` had identifier `c24e` - before and now has identifier `789a`, and it was configured with capacity - `2` in zone `dc1`, put the following command in your script: - -```bash -garage node configure 789a -z dc1 -c 2 -t drosera --replace c24e -``` - -10. Run your reconfiguration script. Check that the new output of `garage - status` contains the correct node IDs with the correct values for capacity - and zone. Old nodes should no longer be mentioned. - -11. If your cluster has 4 nodes or less, and you are feeling adventurous, you - can reenable Web and API access now. Things will probably work. - -12. Garage might already be resyncing stuff. Issue a `garage repair -a --yes - tables` and `garage repair -a --yes blocks` to force it to do so. - -13. Wait for resyncing activity to stop in the logs. Do steps 12 and 13 two or - three times, until you see that when you issue the repair commands, nothing - gets resynced any longer. - -14. Your upgraded cluster should be in a working state. Re-enable API and Web - access and check that everything went well. diff --git a/content/documentation/working_documents/migration_06.md b/content/documentation/working_documents/migration_06.md deleted file mode 100644 index dd180d5..0000000 --- a/content/documentation/working_documents/migration_06.md +++ /dev/null @@ -1,51 +0,0 @@ -+++ -title="Doc Post" -date=2018-08-20 -+++ - -# Migrating from 0.5 to 0.6 - -**This guide explains how to migrate to 0.6 if you have an existing 0.5 cluster. -We don't recommend trying to migrate directly from 0.4 or older to 0.6.** - -**We make no guarantee that this migration will work perfectly: -back up all your data before attempting it!** - -Garage v0.6 (not yet released) introduces a new data model for buckets, -that allows buckets to have many names (aliases). -Buckets can also have "private" aliases (called local aliases), -which are only visible when using a certain access key. - -This new data model means that the metadata tables have changed quite a bit in structure, -and a manual migration step is required. - -The migration steps are as follows: - -1. Disable api and web access for some time (Garage does not support disabling - these endpoints but you can change the port number or stop your reverse - proxy for instance). - -2. Do `garage repair -a --yes tables` and `garage repair -a --yes blocks`, - check the logs and check that all data seems to be synced correctly between - nodes. - -4. Turn off Garage 0.5 - -5. **Backup your metadata folders!!** - -6. Turn on Garage 0.6 - -7. At this point, `garage bucket list` should indicate that no buckets are present - in the cluster. `garage key list` should show all of the previously existing - access key, however these keys should not have any permissions to access buckets. - -8. Run `garage migrate buckets050`: this will populate the new bucket table with - the buckets that existed previously. This will also give access to API keys - as it was before. - -9. Check that all your buckets indeed appear in `garage bucket list`, and that - keys have the proper access flags set. If that is not the case, revert - everything and file a bug! - -10. Your upgraded cluster should be in a working state. Re-enable API and Web - access and check that everything went well. -- cgit v1.2.3