diff options
author | Max Audron <audron@cocaine.farm> | 2022-03-06 14:50:00 +0100 |
---|---|---|
committer | Max Audron <audron@cocaine.farm> | 2022-03-12 13:05:52 +0100 |
commit | 9d44127245990cc55dbdff5a4bd0a1524348f110 (patch) | |
tree | 82110b06619b6cd7031398d8c1c6d42f7308b152 /src/rpc/system.rs | |
parent | c00b2c9948bc686a5f33805a5cc4295c933a723a (diff) | |
download | garage-9d44127245990cc55dbdff5a4bd0a1524348f110.tar.gz garage-9d44127245990cc55dbdff5a4bd0a1524348f110.zip |
add support for kubernetes service discovery
This commit adds support to discover garage instances running in
kubernetes.
Once enabled by setting `kubernetes_namespace` and
`kubernetes_service_name` garage will create a Custom Resources
`garagenodes.deuxfleurs.fr` with nodes public key as the resource name.
and IP and Port information as spec in the namespace configured by
`kubernetes_namespace`.
For discovering nodes the resources are filtered with the optionally set
`kubernetes_service_name` which sets a label
`garage.deuxfleurs.fr/service` on the resources.
This allows to separate multiple garage deployments in a single
namespace.
the `kubernetes_skip_crd` variable allows to disable the creation of the
CRD by garage itself. The user must deploy this manually.
Diffstat (limited to 'src/rpc/system.rs')
-rw-r--r-- | src/rpc/system.rs | 79 |
1 files changed, 78 insertions, 1 deletions
diff --git a/src/rpc/system.rs b/src/rpc/system.rs index 6bca6e3e..c8fc0ad5 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -1,7 +1,7 @@ //! Module containing structs related to membership management use std::collections::HashMap; use std::io::{Read, Write}; -use std::net::SocketAddr; +use std::net::{IpAddr, SocketAddr}; use std::path::Path; use std::sync::{Arc, RwLock}; use std::time::{Duration, Instant}; @@ -29,6 +29,7 @@ use garage_util::persister::Persister; use garage_util::time::*; use crate::consul::*; +use crate::kubernetes::*; use crate::layout::*; use crate::ring::*; use crate::rpc_helper::*; @@ -88,6 +89,11 @@ pub struct System { bootstrap_peers: Vec<(NodeID, SocketAddr)>, consul_host: Option<String>, consul_service_name: Option<String>, + + kubernetes_service_name: Option<String>, + kubernetes_namespace: Option<String>, + kubernetes_skip_crd: bool, + replication_factor: usize, /// The ring @@ -247,6 +253,10 @@ impl System { bootstrap_peers: config.bootstrap_peers.clone(), consul_host: config.consul_host.clone(), consul_service_name: config.consul_service_name.clone(), + kubernetes_service_name: config.kubernetes_service_name.clone(), + kubernetes_namespace: config.kubernetes_namespace.clone(), + kubernetes_skip_crd: config.kubernetes_skip_crd, + ring, update_ring: Mutex::new(update_ring), background, @@ -295,6 +305,44 @@ impl System { .err_context("Error while publishing Consul service") } + fn get_default_ip() -> IpAddr { + pnet::datalink::interfaces() + .iter() + .find(|e| e.is_up() && !e.is_loopback() && !e.ips.is_empty()) + .unwrap() + .ips + .first() + .unwrap() + .ip() + } + + async fn advertise_to_kubernetes(self: Arc<Self>) -> Result<(), Error> { + let (kubernetes_service_name, kubernetes_namespace) = + match (&self.kubernetes_service_name, &self.kubernetes_namespace) { + (Some(ch), Some(csn)) => (ch, csn), + _ => return Ok(()), + }; + + let rpc_public_addr = + match self.rpc_public_addr { + Some(addr) => addr, + None => { + warn!("No rpc_public_addr configured, using first address on first network interface"); + SocketAddr::new(Self::get_default_ip(), self.rpc_listen_addr.port()) + } + }; + + publish_kubernetes_node( + kubernetes_service_name, + kubernetes_namespace, + self.netapp.id, + &self.local_status.load_full().hostname, + rpc_public_addr, + ) + .await + .err_context("Error while publishing node to kubernetes") + } + /// Save network configuration to disc async fn save_cluster_layout(self: Arc<Self>) -> Result<(), Error> { let ring: Arc<Ring> = self.ring.borrow().clone(); @@ -470,6 +518,11 @@ impl System { _ => None, }; + let kubernetes_config = match (&self.kubernetes_service_name, &self.kubernetes_namespace) { + (Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())), + _ => None, + }; + while !*stop_signal.borrow() { let not_configured = !self.ring.borrow().layout.check(); let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor; @@ -503,6 +556,28 @@ impl System { } } + // Fetch peer list from Kubernetes + if let Some((kubernetes_service_name, kubernetes_namespace)) = &kubernetes_config { + if !self.kubernetes_skip_crd { + match create_kubernetes_crd().await { + Ok(()) => (), + Err(e) => { + error!("Failed to create kubernetes custom resource: {}", e) + } + }; + } + + match get_kubernetes_nodes(kubernetes_service_name, kubernetes_namespace).await + { + Ok(node_list) => { + ping_list.extend(node_list); + } + Err(e) => { + warn!("Could not retrieve node list from Kubernetes: {}", e); + } + } + } + for (node_id, node_addr) in ping_list { tokio::spawn( self.netapp @@ -518,6 +593,8 @@ impl System { } self.background.spawn(self.clone().advertise_to_consul()); + self.background + .spawn(self.clone().advertise_to_kubernetes()); let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL); select! { |