From ec6bb4ebfb94e5305c4c703760ac7c39a2997196 Mon Sep 17 00:00:00 2001 From: Xevion Date: Tue, 13 Jan 2026 15:08:38 -0600 Subject: [PATCH] refactor: replace shell entrypoint with TypeScript, add DB retry logic - Migrate Docker entrypoint from inline shell script to TypeScript for better maintainability - Add exponential backoff DB connection retry (prod only, dev fails fast) - Increase healthcheck start-period to account for DB warmup --- Dockerfile | 48 +++----------------------- src/db/mod.rs | 46 ++++++++++++++++++++++--- web/entrypoint.ts | 88 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 49 deletions(-) create mode 100644 web/entrypoint.ts diff --git a/Dockerfile b/Dockerfile index 528103a..fb3a694 100644 --- a/Dockerfile +++ b/Dockerfile @@ -89,48 +89,8 @@ COPY web/package.json web/bun.lock ./web/ RUN cd web && bun install --frozen-lockfile --production && \ ln -s /app/web/node_modules /app/web/build/node_modules -# Create inline entrypoint script -RUN cat > /entrypoint.sh << 'EOF' -#!/bin/sh -set -e - -cleanup() { - kill "$BUN_PID" "$RUST_PID" 2>/dev/null || true - rm -f /tmp/api.sock /tmp/bun.sock - exit 0 -} -trap cleanup SIGTERM SIGINT - -# Start Bun SSR (propagate LOG_JSON and set UPSTREAM_URL) -cd /app/web/build -SOCKET_PATH=/tmp/bun.sock LOG_JSON="${LOG_JSON}" UPSTREAM_URL=/tmp/api.sock bun --preload /app/web/console-logger.js index.js & -BUN_PID=$! - -# Wait for Bun socket -timeout=50 -while [ ! -S /tmp/bun.sock ] && [ $timeout -gt 0 ]; do - sleep 0.1 - timeout=$((timeout - 1)) -done - -if [ ! -S /tmp/bun.sock ]; then - echo "ERROR: Bun failed to create socket within 5s" - exit 1 -fi - -# Start Rust server -# Note: [::] binds to both IPv4 and IPv6 on Linux -/app/api \ - --listen "[::]:${PORT:-8080}" \ - --listen /tmp/api.sock \ - --downstream /tmp/bun.sock & -RUST_PID=$! - -# Wait for either process to exit -wait -n "$BUN_PID" "$RUST_PID" 2>/dev/null || wait "$BUN_PID" "$RUST_PID" -cleanup -EOF -RUN chmod +x /entrypoint.sh +# Copy entrypoint script +COPY web/entrypoint.ts ./web/ # Environment configuration # RUST_LOG - optional, overrides LOG_LEVEL with full tracing filter syntax @@ -142,7 +102,7 @@ ENV PORT=8080 \ EXPOSE 8080 -HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ +HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \ CMD wget -q --spider http://localhost:${PORT}/api/health || exit 1 -ENTRYPOINT ["/entrypoint.sh"] +ENTRYPOINT ["bun", "run", "/app/web/entrypoint.ts"] diff --git a/src/db/mod.rs b/src/db/mod.rs index 1519407..e3b14c0 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -8,14 +8,50 @@ pub use settings::*; pub use tags::*; use sqlx::{PgPool, postgres::PgPoolOptions, query}; +use std::time::Duration; +use tokio::time::sleep; -/// Database connection pool creation +/// Database connection pool creation with retry logic +/// +/// Production: Exponential backoff (1s -> 2s -> 4s... -> 30s cap), max 10 attempts +/// Development: Fail fast (1 attempt) pub async fn create_pool(database_url: &str) -> Result { - PgPoolOptions::new() + let max_attempts: u32 = if cfg!(debug_assertions) { 1 } else { 10 }; + let initial_delay = Duration::from_secs(1); + let max_delay = Duration::from_secs(30); + + let pool_options = PgPoolOptions::new() .max_connections(20) - .acquire_timeout(std::time::Duration::from_secs(3)) - .connect(database_url) - .await + .acquire_timeout(Duration::from_secs(3)); + + let mut last_error = None; + let mut delay = initial_delay; + + for attempt in 1..=max_attempts { + match pool_options.clone().connect(database_url).await { + Ok(pool) => { + if attempt > 1 { + tracing::info!(attempt, "Database connection established after retry"); + } + return Ok(pool); + } + Err(e) => { + last_error = Some(e); + if attempt < max_attempts { + tracing::warn!( + attempt, + max_attempts, + delay_secs = delay.as_secs(), + "Database connection failed, retrying..." + ); + sleep(delay).await; + delay = (delay * 2).min(max_delay); + } + } + } + } + + Err(last_error.unwrap()) } /// Health check query diff --git a/web/entrypoint.ts b/web/entrypoint.ts new file mode 100644 index 0000000..04d879c --- /dev/null +++ b/web/entrypoint.ts @@ -0,0 +1,88 @@ +import { spawn, type Subprocess } from "bun"; +import { unlinkSync, existsSync } from "fs"; + +const BUN_SOCKET = "/tmp/bun.sock"; +const API_SOCKET = "/tmp/api.sock"; +const PORT = process.env.PORT || "8080"; +const LOG_JSON = process.env.LOG_JSON || "true"; + +function cleanup() { + try { + unlinkSync(BUN_SOCKET); + } catch {} + try { + unlinkSync(API_SOCKET); + } catch {} +} + +// Cleanup on signals +process.on("SIGTERM", () => { + cleanup(); + process.exit(0); +}); +process.on("SIGINT", () => { + cleanup(); + process.exit(0); +}); + +// Start Bun SSR +console.log("Starting Bun SSR..."); +const bunProc = spawn({ + cmd: ["bun", "--preload", "/app/web/console-logger.js", "index.js"], + cwd: "/app/web/build", + env: { + ...process.env, + SOCKET_PATH: BUN_SOCKET, + LOG_JSON, + UPSTREAM_URL: API_SOCKET, + }, + stdout: "inherit", + stderr: "inherit", +}); + +// Wait for Bun socket (5s timeout) +const startTime = Date.now(); +while (!existsSync(BUN_SOCKET)) { + if (Date.now() - startTime > 5000) { + console.error("ERROR: Bun failed to create socket within 5s"); + bunProc.kill(); + cleanup(); + process.exit(1); + } + await Bun.sleep(100); +} + +// Start Rust server +console.log("Starting Rust API..."); +const rustProc = spawn({ + cmd: [ + "/app/api", + "--listen", + `[::]:${PORT}`, + "--listen", + API_SOCKET, + "--downstream", + BUN_SOCKET, + ], + stdout: "inherit", + stderr: "inherit", +}); + +// Monitor both processes - exit if either dies +async function monitor(name: string, proc: Subprocess) { + const exitCode = await proc.exited; + console.error(`${name} exited with code ${exitCode}`); + return { name, exitCode }; +} + +const result = await Promise.race([ + monitor("Bun", bunProc), + monitor("Rust", rustProc), +]); + +// Kill the other process +console.error(`${result.name} died, shutting down...`); +bunProc.kill(); +rustProc.kill(); +cleanup(); +process.exit(result.exitCode || 1);