From aaff9b7d4ab0d2eeae88e7bb4a4f6512deaebb34 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 2 Feb 2023 16:30:00 +0100 Subject: Update df_consul dependency and avoid advertising failed backends (fix #2) --- Cargo.lock | 6 ++- Cargo.toml | 3 +- src/dns_config.rs | 138 +++++++----------------------------------------------- src/main.rs | 4 +- 4 files changed, 26 insertions(+), 125 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 744d731..541ab6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,15 +114,17 @@ dependencies = [ [[package]] name = "df-consul" -version = "0.2.0" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a351d00f138e768845cdefb9ae27b79aeed97c698745c73bb2805cad1167aa81" +checksum = "0e38cfbab431b53dfd2d09f2a9902510c636d3d7397645bac5cf1959cfde2999" dependencies = [ "anyhow", "bytes", + "futures", "log", "reqwest", "serde", + "tokio", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index ecab429..6af9ec2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,8 +12,9 @@ anyhow = "1.0.66" futures = "0.3" tracing = { version = "0.1.30" } tracing-subscriber = { version = "0.3", features = ["env-filter"] } -df-consul = "0.2.0" structopt = "0.3" tokio = { version = "1.22", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } reqwest = { version = "0.11", default-features = false, features = ["json", "rustls-tls-webpki-roots" ] } serde = { version = "1.0.107", features = ["derive"] } + +df-consul = "0.3.3" diff --git a/src/dns_config.rs b/src/dns_config.rs index 0697270..f4a95be 100644 --- a/src/dns_config.rs +++ b/src/dns_config.rs @@ -1,14 +1,9 @@ use std::collections::{HashMap, HashSet}; use std::fmt; use std::sync::Arc; -use std::{cmp, time::Duration}; +use std::time::Duration; -use anyhow::Result; - -use futures::future::BoxFuture; -use futures::stream::{FuturesUnordered, StreamExt}; - -use tokio::{select, sync::watch, time::sleep}; +use tokio::{select, sync::watch}; use tracing::*; use df_consul::*; @@ -59,7 +54,7 @@ impl DnsConfig { } } -fn parse_d53_tag(tag: &str, node: &ConsulNode) -> Option<(DnsEntryKey, DnsEntryValue)> { +fn parse_d53_tag(tag: &str, node: &catalog::Node) -> Option<(DnsEntryKey, DnsEntryValue)> { let splits = tag.split(' ').collect::>(); if splits.len() != 2 { return None; @@ -102,123 +97,36 @@ fn parse_d53_tag(tag: &str, node: &ConsulNode) -> Option<(DnsEntryKey, DnsEntryV )) } -fn parse_consul_catalog(catalog: &ConsulNodeCatalog, dns_config: &mut DnsConfig) { - trace!("Parsing node catalog: {:#?}", catalog); - - for (_, svc) in catalog.services.iter() { - for tag in svc.tags.iter() { - if let Some((k, v)) = parse_d53_tag(tag, &catalog.node) { - dns_config.add(k, v); - } - } - } -} - -#[derive(Default)] -struct NodeWatchState { - last_idx: Option, - last_catalog: Option, - retries: u32, -} - pub fn spawn_dns_config_task( - consul: Consul, + consul: &Consul, mut must_exit: watch::Receiver, ) -> watch::Receiver> { let (tx, rx) = watch::channel(Arc::new(DnsConfig::new())); - let consul = Arc::new(consul); + let mut catalog_rx = consul.watch_all_service_health(Duration::from_secs(60)); tokio::spawn(async move { - let mut nodes = HashMap::new(); - let mut watches = FuturesUnordered::)>>::new(); - while !*must_exit.borrow() { - let list_nodes = select! { - ln = consul.list_nodes() => ln, + select! { + _ = catalog_rx.changed() => (), _ = must_exit.changed() => continue, }; - match list_nodes { - Ok(consul_nodes) => { - debug!("Watched consul nodes: {:?}", consul_nodes); - for consul_node in consul_nodes { - let node = &consul_node.node; - if !nodes.contains_key(node) { - nodes.insert(node.clone(), NodeWatchState::default()); - - let node = node.to_string(); - let consul = consul.clone(); + let services = catalog_rx.borrow_and_update(); - watches.push(Box::pin(async move { - let res = consul.watch_node(&node, None).await; - (node, res) - })); + let mut dns_config = DnsConfig::new(); + for (_svc, nodes) in services.iter() { + for node in nodes.iter() { + // Do not take into account backends if any have status critical + if node.checks.iter().any(|x| x.status == "critical") { + continue; + } + for tag in node.service.tags.iter() { + if let Some((k, v)) = parse_d53_tag(tag, &node.node) { + dns_config.add(k, v); } } } - Err(e) => { - error!("Could not get Consul node list: {}", e); - } - } - - let next_watch = select! { - nw = watches.next() => nw, - _ = must_exit.changed() => continue, - }; - - let (node, res): (String, Result<_>) = match next_watch { - Some(v) => v, - None => { - warn!("No nodes currently watched in dns_config.rs"); - sleep(Duration::from_secs(10)).await; - continue; - } - }; - - match res { - Ok((catalog, new_idx)) => { - let mut watch_state = nodes.get_mut(&node).unwrap(); - watch_state.last_idx = Some(new_idx); - watch_state.last_catalog = catalog; - watch_state.retries = 0; - - let idx = watch_state.last_idx; - let consul = consul.clone(); - watches.push(Box::pin(async move { - let res = consul.watch_node(&node, idx).await; - (node, res) - })); - } - Err(e) => { - let mut watch_state = nodes.get_mut(&node).unwrap(); - watch_state.retries += 1; - watch_state.last_idx = None; - - let will_retry_in = - retry_to_time(watch_state.retries, Duration::from_secs(600)); - error!( - "Failed to query consul for node {}. Will retry in {}s. {}", - node, - will_retry_in.as_secs(), - e - ); - - let consul = consul.clone(); - watches.push(Box::pin(async move { - sleep(will_retry_in).await; - let res = consul.watch_node(&node, None).await; - (node, res) - })); - continue; - } - } - - let mut dns_config = DnsConfig::new(); - for (_, watch_state) in nodes.iter() { - if let Some(catalog) = &watch_state.last_catalog { - parse_consul_catalog(catalog, &mut dns_config); - } } tx.send(Arc::new(dns_config)).expect("Internal error"); @@ -228,16 +136,6 @@ pub fn spawn_dns_config_task( rx } -fn retry_to_time(retries: u32, max_time: Duration) -> Duration { - // 1.2^x seems to be a good value to exponentially increase time at a good pace - // eg. 1.2^32 = 341 seconds ~= 5 minutes - ie. after 32 retries we wait 5 - // minutes - Duration::from_secs(cmp::min( - max_time.as_secs(), - 1.2f64.powf(retries as f64) as u64, - )) -} - // ---- Display impls ---- impl std::fmt::Display for DnsRecordType { diff --git a/src/main.rs b/src/main.rs index 24193f7..1a41c18 100644 --- a/src/main.rs +++ b/src/main.rs @@ -76,7 +76,7 @@ async fn main() { let (exit_signal, _) = watch_ctrl_c(); - let consul_config = df_consul::ConsulConfig { + let consul_config = df_consul::Config { addr: opt.consul_addr.clone(), ca_cert: opt.consul_ca_cert.clone(), tls_skip_verify: opt.consul_tls_skip_verify, @@ -111,7 +111,7 @@ async fn main() { .map(ToString::to_string) .collect::>(); - let rx_dns_config = dns_config::spawn_dns_config_task(consul.clone(), exit_signal.clone()); + let rx_dns_config = dns_config::spawn_dns_config_task(&consul, exit_signal.clone()); let updater_task = tokio::spawn(dns_updater::dns_updater_task( rx_dns_config.clone(), -- cgit v1.2.3