feat: service manager for coordination, configureable smart graceful shutdown timeout

This commit is contained in:
2025-08-26 18:45:22 -05:00
parent cff672b30a
commit 87100a57d5
8 changed files with 365 additions and 124 deletions

View File

@@ -1,19 +1,17 @@
use serenity::all::{ClientBuilder, GatewayIntents};
use std::time::Duration;
use tokio::{signal, task::JoinSet};
use tokio::signal;
use tracing::{error, info, warn};
use tracing_subscriber::{EnvFilter, FmtSubscriber};
use crate::bot::{Data, age};
use crate::config::Config;
use crate::services::manager::ServiceManager;
use crate::services::{ServiceResult, bot::BotService, dummy::DummyService, run_service};
use crate::shutdown::ShutdownCoordinator;
use figment::{Figment, providers::Env};
mod bot;
mod config;
mod services;
mod shutdown;
#[tokio::main]
async fn main() {
@@ -22,7 +20,18 @@ async fn main() {
// Configure logging
let filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("warn,banner=debug"));
let subscriber = FmtSubscriber::builder().with_env_filter(filter).finish();
let subscriber = {
#[cfg(debug_assertions)]
{
FmtSubscriber::builder()
}
#[cfg(not(debug_assertions))]
{
FmtSubscriber::builder().json()
}
}
.with_env_filter(filter)
.finish();
tracing::subscriber::set_global_default(subscriber).expect("setting default subscriber failed");
let config: Config = Figment::new()
@@ -51,118 +60,87 @@ async fn main() {
.await
.expect("Failed to build client");
let shutdown_coordinator = ShutdownCoordinator::new();
// Extract shutdown timeout before moving config
let shutdown_timeout = config.shutdown_timeout;
// Create services
// Create service manager
let mut service_manager = ServiceManager::new();
// Create and add services
let bot_service = Box::new(BotService::new(client));
let dummy_service = Box::new(DummyService::new("background"));
// Start services using the unified runner
let bot_handle = {
let shutdown_rx = shutdown_coordinator.subscribe();
tokio::spawn(run_service(bot_service, shutdown_rx))
let bot_handle = tokio::spawn(run_service(bot_service, service_manager.subscribe()));
let dummy_handle = tokio::spawn(run_service(dummy_service, service_manager.subscribe()));
service_manager.add_service("bot".to_string(), bot_handle);
service_manager.add_service("background".to_string(), dummy_handle);
// Set up CTRL+C signal handling
let ctrl_c = async {
signal::ctrl_c()
.await
.expect("Failed to install CTRL+C signal handler");
info!("Received CTRL+C, gracefully shutting down...");
};
let dummy_handle = {
let shutdown_rx = shutdown_coordinator.subscribe();
tokio::spawn(run_service(dummy_service, shutdown_rx))
};
// Set up signal handling
let signal_handle = {
let shutdown_tx = shutdown_coordinator.shutdown_tx();
tokio::spawn(async move {
signal::ctrl_c()
.await
.expect("Failed to install CTRL+C signal handler");
info!("Received CTRL+C, initiating shutdown...");
let _ = shutdown_tx.send(());
ServiceResult::GracefulShutdown
})
};
// Put all services in a JoinSet for unified handling
let mut services = JoinSet::new();
services.spawn(bot_handle);
services.spawn(dummy_handle);
services.spawn(signal_handle);
// Wait for any service to complete or signal
// Main application loop - wait for services or CTRL+C
let mut exit_code = 0;
let first_completion = services.join_next().await;
match first_completion {
Some(Ok(Ok(service_result))) => {
// A service completed successfully
match &service_result {
let join = |strings: Vec<String>| {
strings
.iter()
.map(|s| format!("\"{}\"", s))
.collect::<Vec<_>>()
.join(", ")
};
tokio::select! {
(service_name, result) = service_manager.run() => {
// A service completed unexpectedly
match result {
ServiceResult::GracefulShutdown => {
// This means CTRL+C was pressed
info!(service = service_name, "Service completed gracefully");
}
ServiceResult::NormalCompletion => {
warn!("A service completed unexpectedly");
warn!(service = service_name, "Service completed unexpectedly");
exit_code = 1;
}
ServiceResult::Error(e) => {
error!("Service failure: {e}");
error!(service = service_name, "Service failed: {e}");
exit_code = 1;
}
}
}
Some(Ok(Err(e))) => {
error!("Service task panicked: {e}");
exit_code = 1;
}
Some(Err(e)) => {
error!("JoinSet error: {e}");
exit_code = 1;
}
None => {
warn!("No services running");
exit_code = 1;
}
};
// Signal all services to shut down
shutdown_coordinator.shutdown();
// Wait for graceful shutdown with timeout
let remaining_count = services.len();
if remaining_count > 0 {
info!("Waiting for {remaining_count} remaining services to shutdown (5s timeout)...");
let shutdown_result = tokio::time::timeout(Duration::from_secs(5), async {
while let Some(result) = services.join_next().await {
match result {
Ok(Ok(ServiceResult::GracefulShutdown)) => {
// Service shutdown logged by the service itself
}
Ok(Ok(ServiceResult::NormalCompletion)) => {
warn!("Service completed normally during shutdown");
}
Ok(Ok(ServiceResult::Error(e))) => {
error!("Service error during shutdown: {e}");
}
Ok(Err(e)) => {
error!("Service panic during shutdown: {e}");
}
Err(e) => {
error!("Service join error: {e}");
}
// Shutdown remaining services
match service_manager.shutdown(shutdown_timeout).await {
Ok(()) => {
info!("Graceful shutdown complete");
}
Err(pending_services) => {
warn!(
"Graceful shutdown elapsed - the following service(s) did not complete: {}",
join(pending_services)
);
exit_code = if exit_code == 0 { 2 } else { exit_code };
}
}
})
.await;
match shutdown_result {
Ok(()) => {
info!("All services shutdown completed");
}
Err(_) => {
warn!("Shutdown timeout - some services may not have completed");
exit_code = if exit_code == 0 { 2 } else { exit_code };
}
_ = ctrl_c => {
// User requested shutdown
match service_manager.shutdown(shutdown_timeout).await {
Ok(()) => {
info!("Graceful shutdown complete");
}
Err(pending_services) => {
warn!(
"Graceful shutdown elapsed - the following service(s) did not complete: {}",
join(pending_services)
);
exit_code = 2;
}
}
}
} else {
info!("No remaining services to shutdown");
}
info!("Application shutdown complete (exit code: {})", exit_code);