From c56de7fdd1b6f1606e0f1f9a9e3038fa9dd479f8 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 30 Jun 2020 14:59:55 +0200 Subject: Write some README --- README.md | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b4e0924a..615573d3 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,94 @@ # Garage -THIS IS ALL WORK IN PROGRESS. NOTHING TO SEE YET BUT THANKS FOR YOUR INTEREST. +Garage is a lightweight S3-compatible distributed object store, with the following goals: -Garage implements an S3-compatible object store with high resiliency to network failures, machine failure, and sysadmin failure. +- As self-contained as possible +- Easy to set up +- Highly resilient to network failures, network latency, disk failures, sysadmin failures +- Relatively simple +- Made for multi-datacenter deployments -## To log: +Non-goals include: + +- Extremely high performance +- Complete implementation of the S3 API +- Erasure coding (our replication model is simply to copy the data as is on several nodes) + +Our main use case is to provide a distributed storage layer for small-scale self hosted services such as [Deuxfleurs](https://deuxfleurs.fr). + +## Setting up Garage + +Use the `genkeys.sh` script to generate TLS keys for encrypting communications between Garage nodes. +The script takes no arguments and will generate keys in `pki/`. +This script creates a certificate authority `garage-ca` which signs certificates for individual Garage nodes. +Garage nodes from a same cluster authenticate themselves by verifying that they have certificates signed by the same certificate authority. + +Garage requires two locations to store its data: a metadata directory, and a data directory. +The metadata directory is used to store metadata such as object lists, and should ideally be located on an SSD drive. +The data directory is used to store the chunks of data of the objects stored in Garage. +In a typical deployment the data directory is stored on a standard HDD. + +Garage does not handle TLS for its S3 API endpoint. This should be handled by adding a reverse proxy. + +Create a configuration file with the following structure: ``` -RUST_LOG=garage=debug cargo run --release -- server -c config_file.toml +block_size = 1048576 # objects are split in blocks of maximum this number of bytes + +metadata_dir = "/path/to/ssd/metadata/directory" +data_dir = "/path/to/hdd/data/directory" + +rpc_bind_addr = "[::]:3901" # the port other Garage nodes will use to talk to this node + +bootstrap_peers = [ + # Ideally this list should contain the IP addresses of all other Garage nodes of the cluster. + # Use Ansible or any kind of configuration templating to generate this automatically. + "10.0.0.1:3901", + "10.0.0.2:3901", + "10.0.0.3:3901", +] + +[rpc_tls] +# NOT RECOMMENDED: you can skip this section if you don't want to encrypt intra-cluster traffic +# Thanks to genkeys.sh, generating the keys and certificates is easy, so there is NO REASON NOT TO DO IT. +ca_cert = "/path/to/garage/pki/garage-ca.crt" +node_cert = "/path/to/garage/pki/garage.crt" +node_key = "/path/to/garage/pki/garage.key" + +[s3_api] +api_bind_addr = "[::1]:3900" # the S3 API port, HTTP without TLS. Add a reverse proxy for the TLS part. +region = "garage" # set this to anything. S3 API calls will fail if they are not made against the region set here. ``` -## What to repair +Build Garage using `cargo build --release`. +Then, run it using either `./target/release/garage server -c path/to/config_file.toml` or `cargo run --release -- server -c path/to/config_file.toml`. + +Set the `RUST_LOG` environment to `garage=debug` to dump some debug information. +Set it to `garage=trace` to dump even more debug information. +Set it to `garage=warn` to show nothing except warnings and errors. + +## Setting up cluster nodes + +Once all your `garage` nodes are running, you will need to: + +1. check that they are correctly talking to one another; +2. configure them with their physical location (in the case of a multi-dc deployment) and a number of "ring tokens" proportionnal to the storage space available on each node; +3. create some S3 API keys and buckets; +4. ???; +5. profit! + +To run these administrative tasks, you will need to use the `garage` command line tool and it to connect to any of the cluster's nodes on the RPC port. +The `garage` CLI also needs TLS keys and certificates of its own to authenticate and be authenticated in the cluster. +A typicall invocation will be as follows: + +``` +./target/release/garage --ca-cert=pki/garage-ca.crt --client-cert=pki/garage-client.crt --client-key=pki/garage-client.key <...> +``` + + +## Notes to self + +### What to repair - `tables`: to do a full sync of metadata, should not be necessary because it is done every hour by the system - `versions` and `block_refs`: very time consuming, usefull if deletions have not been propagated, improves garbage collection -- cgit v1.2.3