diff --git a/pacman-server/railway.json b/pacman-server/railway.json index 08cd570..9f844bc 100644 --- a/pacman-server/railway.json +++ b/pacman-server/railway.json @@ -1,6 +1,9 @@ { "$schema": "https://railway.com/railway.schema.json", "deploy": { - "drainingSeconds": 10 + "drainingSeconds": 10, + "healthcheckPath": "/health", + "healthcheckTimeout": 90, + "restartPolicyMaxRetries": 3 } } diff --git a/pacman-server/src/app.rs b/pacman-server/src/app.rs index 85c454f..04d0ed6 100644 --- a/pacman-server/src/app.rs +++ b/pacman-server/src/app.rs @@ -1,10 +1,38 @@ use dashmap::DashMap; use jsonwebtoken::{DecodingKey, EncodingKey}; use std::sync::Arc; +use tokio::sync::RwLock; use crate::data::pool::PgPool; use crate::{auth::AuthRegistry, config::Config}; +#[derive(Debug, Clone)] +pub struct Health { + migrations: bool, + database: bool, +} + +impl Health { + pub fn new() -> Self { + Self { + migrations: false, + database: false, + } + } + + pub fn ok(&self) -> bool { + self.migrations && self.database + } + + pub fn set_migrations(&mut self, done: bool) { + self.migrations = done; + } + + pub fn set_database(&mut self, ok: bool) { + self.database = ok; + } +} + #[derive(Clone)] pub struct AppState { pub config: Arc, @@ -13,6 +41,7 @@ pub struct AppState { pub jwt_encoding_key: Arc, pub jwt_decoding_key: Arc, pub db: Arc, + pub health: Arc>, } impl AppState { @@ -26,6 +55,7 @@ impl AppState { jwt_encoding_key: Arc::new(EncodingKey::from_secret(jwt_secret.as_bytes())), jwt_decoding_key: Arc::new(DecodingKey::from_secret(jwt_secret.as_bytes())), db: Arc::new(db), + health: Arc::new(RwLock::new(Health::new())), } } } diff --git a/pacman-server/src/main.rs b/pacman-server/src/main.rs index bccc3d8..444846b 100644 --- a/pacman-server/src/main.rs +++ b/pacman-server/src/main.rs @@ -12,8 +12,8 @@ mod config; mod data; mod errors; mod session; -use std::sync::Arc; use std::time::Instant; +use std::{sync::Arc, time::Duration}; #[cfg(unix)] use tokio::signal::unix::{signal, SignalKind}; use tokio::sync::{watch, Notify}; @@ -44,14 +44,22 @@ async fn main() { panic!("failed to run database migrations: {}", e); } + let app_state = AppState::new(config, auth, db); + { + // migrations succeeded + let mut h = app_state.health.write().await; + h.set_migrations(true); + } + let app = Router::new() .route("/", get(|| async { "Hello, World! Visit /auth/github to start OAuth flow." })) + .route("/health", get(routes::health_handler)) .route("/auth/providers", get(routes::list_providers_handler)) .route("/auth/{provider}", get(routes::oauth_authorize_handler)) .route("/auth/{provider}/callback", get(routes::oauth_callback_handler)) .route("/logout", get(routes::logout_handler)) .route("/profile", get(routes::profile_handler)) - .with_state(AppState::new(config, auth, db)) + .with_state(app_state.clone()) .layer(CookieLayer::default()); info!(%addr, "Starting HTTP server bind"); @@ -62,6 +70,42 @@ async fn main() { let notify = Arc::new(Notify::new()); let (tx_signal, rx_signal) = watch::channel::>(None); + // Spawn background health checker (listens for shutdown via notify) + { + let health_state = app_state.health.clone(); + let db_pool = app_state.db.clone(); + let notify_for_health = notify.clone(); + tokio::spawn(async move { + trace!("Health checker task started"); + let mut backoff: u32 = 1; + let mut next_sleep = Duration::from_secs(0); + loop { + tokio::select! { + _ = notify_for_health.notified() => { + trace!("Health checker received shutdown notification; exiting"); + break; + } + _ = tokio::time::sleep(next_sleep) => { + let ok = sqlx::query("SELECT 1").execute(&*db_pool).await.is_ok(); + { + let mut h = health_state.write().await; + h.set_database(ok); + } + if ok { + trace!(database_ok = true, "Health check succeeded; scheduling next run in 90s"); + backoff = 1; + next_sleep = Duration::from_secs(90); + } else { + backoff = (backoff.saturating_mul(2)).min(60); + trace!(database_ok = false, backoff, "Health check failed; backing off"); + next_sleep = Duration::from_secs(backoff as u64); + } + } + } + } + }); + } + { let notify = notify.clone(); let tx = tx_signal.clone(); diff --git a/pacman-server/src/routes.rs b/pacman-server/src/routes.rs index 70294fc..0e17e39 100644 --- a/pacman-server/src/routes.rs +++ b/pacman-server/src/routes.rs @@ -300,3 +300,10 @@ pub async fn list_providers_handler(State(app_state): State) -> axum:: .collect(); axum::Json(providers).into_response() } + +pub async fn health_handler(State(app_state): State) -> axum::response::Response { + let ok = app_state.health.read().await.ok(); + let status = if ok { StatusCode::OK } else { StatusCode::SERVICE_UNAVAILABLE }; + let body = serde_json::json!({ "ok": ok }); + (status, axum::Json(body)).into_response() +}