From 5e0029c98b85a456390c5e29f6e95547ff711614 Mon Sep 17 00:00:00 2001 From: Xevion Date: Thu, 15 Jan 2026 15:50:55 -0600 Subject: [PATCH] feat: add multi-encoding compression with lazy caching and pre-compressed assets - Build-time: pre-compress static assets (.gz, .br, .zst) via compress-assets.ts - Runtime: serve pre-compressed embedded assets with Accept-Encoding negotiation - ISR cache: lazy per-encoding compression (compress on first request, cache result) - tower-http: enable runtime compression for API/SSR responses (respects Content-Encoding) --- Cargo.lock | 103 ++++++++++ Cargo.toml | 7 +- Dockerfile | 6 + Justfile | 6 + src/assets.rs | 78 +++++++- src/cache.rs | 51 ++++- src/cli/serve.rs | 24 +++ src/encoding.rs | 342 +++++++++++++++++++++++++++++++++ src/main.rs | 1 + src/proxy.rs | 41 +++- web/scripts/compress-assets.ts | 151 +++++++++++++++ web/src/lib/api.ts | 8 - 12 files changed, 799 insertions(+), 19 deletions(-) create mode 100644 src/encoding.rs create mode 100644 web/scripts/compress-assets.ts diff --git a/Cargo.lock b/Cargo.lock index 955b31a..ae4cff7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,6 +35,21 @@ dependencies = [ "equator", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -107,9 +122,11 @@ dependencies = [ "axum", "axum-extra", "blurhash", + "brotli", "clap", "dashmap", "dotenvy", + "flate2", "futures", "image", "include_dir", @@ -133,6 +150,7 @@ dependencies = [ "ulid", "urlencoding", "uuid", + "zstd", ] [[package]] @@ -179,6 +197,18 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "async-compression" +version = "0.4.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-lock" version = "3.4.2" @@ -835,6 +865,27 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e79769241dcd44edf79a732545e8b5cec84c247ac060f5252cd51885d093a8fc" +[[package]] +name = "brotli" +version = "8.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "built" version = "0.8.0" @@ -982,6 +1033,26 @@ dependencies = [ "memchr", ] +[[package]] +name = "compression-codecs" +version = "0.4.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +dependencies = [ + "brotli", + "compression-core", + "flate2", + "memchr", + "zstd", + "zstd-safe", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -4006,14 +4077,18 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ + "async-compression", "bitflags", "bytes", + "futures-core", "futures-util", "http 1.4.0", "http-body 1.0.1", "http-body-util", "iri-string", "pin-project-lite", + "tokio", + "tokio-util", "tower", "tower-layer", "tower-service", @@ -4856,6 +4931,34 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30e0d8dffbae3d840f64bda38e28391faef673a7b5a6017840f2a106c8145868" +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.4.12" diff --git a/Cargo.toml b/Cargo.toml index 43ffa82..624581b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,12 @@ time = { version = "0.3.44", features = ["formatting", "macros", "serde"] } tokio = { version = "1.49.0", features = ["full"] } tokio-util = { version = "0.7.18", features = ["io"] } tower = "0.5" -tower-http = { version = "0.6.8", features = ["trace", "cors", "limit"] } +tower-http = { version = "0.6.8", features = ["trace", "cors", "limit", "compression-full"] } + +# Compression for ISR cache +zstd = "0.13" +brotli = "8" +flate2 = "1.0" tracing = "0.1.44" tracing-subscriber = { version = "0.3.22", features = ["env-filter", "json"] } ulid = { version = "1", features = ["serde"] } diff --git a/Dockerfile b/Dockerfile index e834da8..58d86fb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,6 +34,9 @@ RUN cargo build --release FROM oven/bun:1 AS frontend WORKDIR /build +# Install system zstd for pre-compression +RUN apt-get update && apt-get install -y zstd && rm -rf /var/lib/apt/lists/* + # Install dependencies (cached until package.json/bun.lock change) COPY web/package.json web/bun.lock ./ RUN bun install --frozen-lockfile @@ -43,6 +46,9 @@ COPY web/ ./ ARG VITE_OG_R2_BASE_URL RUN bun run build +# Pre-compress static assets (gzip, brotli, zstd) +RUN bun run scripts/compress-assets.ts + # ========== Stage 5: Final Rust Build (with embedded assets) ========== FROM chef AS final-builder diff --git a/Justfile b/Justfile index 1b66685..d931136 100644 --- a/Justfile +++ b/Justfile @@ -111,6 +111,12 @@ build *flags: : ["bunx", "--bun", "vite", "build"]; run(buildCmd, "web"); + // Pre-compress in release mode only + if (!debug) { + console.log(`\x1b[1;36m→ Pre-compressing assets...\x1b[0m`); + run(["bun", "run", "scripts/compress-assets.ts"], "web"); + } + console.log(`\x1b[1;36m→ Building Rust (${profile})...\x1b[0m`); const cargoArgs = ["cargo", "build"]; if (!debug) cargoArgs.push("--release"); diff --git a/src/assets.rs b/src/assets.rs index f1b975c..16fdd2b 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -1,9 +1,11 @@ use axum::{ - http::{StatusCode, Uri, header}, + http::{HeaderMap, StatusCode, Uri, header}, response::{IntoResponse, Response}, }; use include_dir::{Dir, include_dir}; +use crate::encoding; + static CLIENT_ASSETS: Dir = include_dir!("$CARGO_MANIFEST_DIR/web/build/client"); static ERROR_PAGES: Dir = include_dir!("$CARGO_MANIFEST_DIR/web/build/prerendered/errors"); static PRERENDERED_PAGES: Dir = include_dir!("$CARGO_MANIFEST_DIR/web/build/prerendered"); @@ -48,6 +50,80 @@ pub fn try_serve_embedded_asset(path: &str) -> Option { }) } +/// Serve an embedded asset with content encoding negotiation +/// +/// Attempts to serve pre-compressed variants (.br, .gz, .zst) based on +/// the Accept-Encoding header. Falls back to uncompressed if no suitable +/// compressed variant is found. +/// +/// Pre-compressed assets are generated at build time by scripts/compress-assets.ts +/// and embedded alongside the original files. +/// +/// # Arguments +/// * `path` - Request path (e.g., "/_app/immutable/chunks/foo.js") +/// * `headers` - Request headers (for Accept-Encoding negotiation) +/// +/// # Returns +/// * `Some(Response)` - Response with appropriate Content-Encoding header +/// * `None` - If neither compressed nor uncompressed asset exists +pub fn try_serve_embedded_asset_with_encoding(path: &str, headers: &HeaderMap) -> Option { + let asset_path = path.strip_prefix('/').unwrap_or(path); + + // Parse accepted encodings in priority order + let accepted_encodings = encoding::parse_accepted_encodings(headers); + + // Try each encoding in order of client preference + for encoding in accepted_encodings { + // Skip identity - we'll use it as final fallback + if encoding == encoding::ContentEncoding::Identity { + continue; + } + + // Build path to pre-compressed variant + let compressed_path = format!("{}{}", asset_path, encoding.extension()); + + // Check if pre-compressed variant exists + if let Some(file) = CLIENT_ASSETS.get_file(&compressed_path) { + // Get MIME type from ORIGINAL path (not .br/.gz/.zst extension) + let mime_type = mime_guess::from_path(asset_path) + .first_or_octet_stream() + .as_ref() + .to_string(); + + let mut response_headers = axum::http::HeaderMap::new(); + response_headers.insert( + header::CONTENT_TYPE, + mime_type.parse().unwrap_or_else(|_| { + header::HeaderValue::from_static("application/octet-stream") + }), + ); + + // Set Content-Encoding header + if let Some(encoding_value) = encoding.header_value() { + response_headers.insert(header::CONTENT_ENCODING, encoding_value); + } + + // Set cache headers (same as uncompressed) + if path.contains("/immutable/") { + response_headers.insert( + header::CACHE_CONTROL, + header::HeaderValue::from_static("public, max-age=31536000, immutable"), + ); + } else { + response_headers.insert( + header::CACHE_CONTROL, + header::HeaderValue::from_static("public, max-age=3600"), + ); + } + + return Some((StatusCode::OK, response_headers, file.contents()).into_response()); + } + } + + // No compressed variant found, fall back to uncompressed + try_serve_embedded_asset(path) +} + fn serve_asset_by_path(path: &str) -> Response { if let Some(response) = try_serve_embedded_asset(path) { response diff --git a/src/cache.rs b/src/cache.rs index f012ee4..ccb27cd 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -4,22 +4,31 @@ //! - TTL-based expiration //! - Stale-while-revalidate pattern //! - Singleflight (via moka's built-in coalescing) +//! - Multi-encoding compressed storage (lazy) //! - On-demand invalidation use axum::http::{HeaderMap, StatusCode}; use dashmap::DashSet; use moka::future::Cache; use std::{ + collections::HashMap, sync::Arc, time::{Duration, Instant}, }; -/// Cached response data +use crate::encoding::{ + COMPRESSION_MIN_SIZE, ContentEncoding, compress_brotli, compress_gzip, compress_zstd, +}; + +/// Cached response data with lazy compressed variants #[derive(Clone)] pub struct CachedResponse { pub status: StatusCode, pub headers: HeaderMap, + /// Original uncompressed body pub body: axum::body::Bytes, + /// Compressed variants (lazily populated on first request per encoding) + compressed: Arc>>, pub cached_at: Instant, } @@ -29,10 +38,50 @@ impl CachedResponse { status, headers, body, + compressed: Arc::new(parking_lot::RwLock::new(HashMap::new())), cached_at: Instant::now(), } } + /// Get body for a specific encoding, compressing on-demand if needed + /// + /// Returns (body_bytes, actual_encoding). The actual encoding may differ from + /// requested if the body is too small or compression doesn't help. + pub fn get_body(&self, encoding: ContentEncoding) -> (axum::body::Bytes, ContentEncoding) { + // Identity encoding or small body - return uncompressed + if encoding == ContentEncoding::Identity || self.body.len() < COMPRESSION_MIN_SIZE { + return (self.body.clone(), ContentEncoding::Identity); + } + + // Check if we already have this encoding cached + { + let cache = self.compressed.read(); + if let Some(compressed) = cache.get(&encoding) { + return (compressed.clone(), encoding); + } + } + + // Compress on-demand + let compressed_bytes = match encoding { + ContentEncoding::Zstd => compress_zstd(&self.body), + ContentEncoding::Brotli => compress_brotli(&self.body), + ContentEncoding::Gzip => compress_gzip(&self.body), + ContentEncoding::Identity => unreachable!(), + }; + + // Only cache if compression actually helped + if let Some(compressed) = compressed_bytes + && compressed.len() < self.body.len() + { + let bytes = axum::body::Bytes::from(compressed); + self.compressed.write().insert(encoding, bytes.clone()); + return (bytes, encoding); + } + + // Compression didn't help or failed, return uncompressed + (self.body.clone(), ContentEncoding::Identity) + } + /// Check if this response is still fresh (within fresh_duration) pub fn is_fresh(&self, fresh_duration: Duration) -> bool { self.cached_at.elapsed() < fresh_duration diff --git a/src/cli/serve.rs b/src/cli/serve.rs index b89d567..e1cc100 100644 --- a/src/cli/serve.rs +++ b/src/cli/serve.rs @@ -3,10 +3,13 @@ use std::collections::HashSet; use std::net::SocketAddr; use std::sync::Arc; use std::time::Duration; +use tower_http::compression::predicate::{NotForContentType, Predicate, SizeAbove}; +use tower_http::compression::{CompressionLayer, CompressionLevel}; use tower_http::cors::CorsLayer; use crate::cache::{IsrCache, IsrCacheConfig}; use crate::config::ListenAddr; +use crate::encoding::COMPRESSION_MIN_SIZE; use crate::github; use crate::icon_cache::IconCache; use crate::middleware::RequestIdLayer; @@ -174,7 +177,28 @@ pub async fn run( router: axum::Router>, trust_request_id: Option, ) -> axum::Router> { + // Build compression predicate: skip small responses and already-compressed types + // NOTE: MIN_SIZE must match COMPRESSION_MIN_SIZE in encoding.rs and compress-assets.ts + let compression_predicate = SizeAbove::new(COMPRESSION_MIN_SIZE as u16) + .and(NotForContentType::IMAGES) + .and(NotForContentType::new("video/")) + .and(NotForContentType::new("audio/")) + .and(NotForContentType::new("font/woff")) + .and(NotForContentType::new("application/octet-stream")); + + // Compression layer with all algorithms at fastest levels + // This handles runtime compression for all responses (API, SSR pages, etc.) + // ISR cached responses set Content-Encoding headers, which tower-http + // automatically detects and skips re-compression (no double compression) + let compression_layer = CompressionLayer::new() + .zstd(true) + .br(true) + .gzip(true) + .quality(CompressionLevel::Fastest) + .compress_when(compression_predicate); + router + .layer(compression_layer) .layer(RequestIdLayer::new(trust_request_id)) .layer(CorsLayer::permissive()) // 50 MiB limit for media uploads diff --git a/src/encoding.rs b/src/encoding.rs new file mode 100644 index 0000000..932079a --- /dev/null +++ b/src/encoding.rs @@ -0,0 +1,342 @@ +//! Content encoding negotiation and compression utilities +//! +//! Handles Accept-Encoding header parsing with quality values +//! and provides compression helpers for ISR cache. + +use axum::http::{HeaderMap, HeaderValue, header}; +use std::io::Write; + +/// Minimum size threshold for compression (bytes) +/// +/// NOTE: This value must match MIN_SIZE in web/scripts/compress-assets.ts +/// to ensure runtime and build-time compression use the same threshold. +pub const COMPRESSION_MIN_SIZE: usize = 512; + +/// Supported encodings in priority order (best to worst) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ContentEncoding { + Zstd, + Brotli, + Gzip, + Identity, +} + +impl ContentEncoding { + /// File extension suffix for this encoding + #[inline] + pub fn extension(&self) -> &'static str { + match self { + Self::Zstd => ".zst", + Self::Brotli => ".br", + Self::Gzip => ".gz", + Self::Identity => "", + } + } + + /// Content-Encoding header value + #[inline] + pub fn header_value(&self) -> Option { + match self { + Self::Zstd => Some(HeaderValue::from_static("zstd")), + Self::Brotli => Some(HeaderValue::from_static("br")), + Self::Gzip => Some(HeaderValue::from_static("gzip")), + Self::Identity => None, + } + } + + /// Default priority (higher = better) + #[inline] + fn default_priority(&self) -> u8 { + match self { + Self::Zstd => 4, + Self::Brotli => 3, + Self::Gzip => 2, + Self::Identity => 1, + } + } +} + +/// Parse Accept-Encoding header and return all supported encodings +/// +/// Returns encodings in priority order (best first) with quality > 0. +/// Supports quality values and wildcard (*). +#[inline] +pub fn parse_accepted_encodings(headers: &HeaderMap) -> Vec { + let Some(accept) = headers + .get(header::ACCEPT_ENCODING) + .and_then(|v| v.to_str().ok()) + else { + return vec![ContentEncoding::Identity]; + }; + + let mut encodings: Vec<(ContentEncoding, f32)> = Vec::new(); + + for part in accept.split(',') { + let part = part.trim(); + if part.is_empty() { + continue; + } + + // Parse quality value, handling additional params (e.g., "br;q=0.8;level=5") + let (encoding_str, quality) = if let Some((enc, params)) = part.split_once(';') { + let q = params + .split(';') + .find_map(|p| p.trim().strip_prefix("q=")) + .and_then(|q| q.parse::().ok()) + .unwrap_or(1.0); + (enc.trim(), q) + } else { + (part, 1.0) + }; + + // Skip disabled encodings + if quality == 0.0 { + continue; + } + + let encoding = match encoding_str.to_lowercase().as_str() { + "zstd" => ContentEncoding::Zstd, + "br" | "brotli" => ContentEncoding::Brotli, + "gzip" | "x-gzip" => ContentEncoding::Gzip, + "*" => ContentEncoding::Gzip, // Wildcard defaults to gzip + "identity" => ContentEncoding::Identity, + _ => continue, + }; + + encodings.push((encoding, quality)); + } + + // Sort by quality (desc), then by default priority (desc) + encodings.sort_by(|a, b| { + b.1.partial_cmp(&a.1) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| b.0.default_priority().cmp(&a.0.default_priority())) + }); + + if encodings.is_empty() { + vec![ContentEncoding::Identity] + } else { + encodings.into_iter().map(|(e, _)| e).collect() + } +} + +/// Parse Accept-Encoding header and return best supported encoding +/// +/// Supports quality values: `Accept-Encoding: gzip;q=0.8, br;q=1.0, zstd` +/// Priority when equal quality: zstd > brotli > gzip > identity +#[inline] +pub fn negotiate_encoding(headers: &HeaderMap) -> ContentEncoding { + parse_accepted_encodings(headers) + .into_iter() + .next() + .unwrap_or(ContentEncoding::Identity) +} + +/// Check if content type should be compressed +#[inline] +#[allow(dead_code)] +pub fn is_compressible_content_type(content_type: &str) -> bool { + let ct = content_type.to_lowercase(); + + // Text types + if ct.starts_with("text/") { + return true; + } + + // JSON, XML, SVG + if ct.contains("json") || ct.contains("xml") || ct.contains("svg") { + return true; + } + + // JavaScript + if ct.contains("javascript") || ct.contains("ecmascript") { + return true; + } + + // Font formats (woff/woff2 are already compressed) + if ct.contains("font") && !ct.contains("woff") { + return true; + } + + false +} + +/// Compress data with zstd at fast level (level 3) +pub fn compress_zstd(data: &[u8]) -> Option> { + match zstd::encode_all(std::io::Cursor::new(data), 3) { + Ok(compressed) => Some(compressed), + Err(e) => { + tracing::warn!(error = %e, size = data.len(), "zstd compression failed"); + None + } + } +} + +/// Compress data with brotli at fast level (level 4) +pub fn compress_brotli(data: &[u8]) -> Option> { + let mut output = Vec::new(); + let mut writer = brotli::CompressorWriter::new(&mut output, 4096, 4, 22); + if let Err(e) = writer.write_all(data) { + tracing::warn!(error = %e, size = data.len(), "brotli compression failed"); + return None; + } + drop(writer); + Some(output) +} + +/// Compress data with gzip at fast level (level 1) +pub fn compress_gzip(data: &[u8]) -> Option> { + use flate2::Compression; + use flate2::write::GzEncoder; + + let mut encoder = GzEncoder::new(Vec::new(), Compression::fast()); + if let Err(e) = encoder.write_all(data) { + tracing::warn!(error = %e, size = data.len(), "gzip write failed"); + return None; + } + match encoder.finish() { + Ok(compressed) => Some(compressed), + Err(e) => { + tracing::warn!(error = %e, size = data.len(), "gzip finish failed"); + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_accepted_encodings() { + let mut headers = HeaderMap::new(); + headers.insert(header::ACCEPT_ENCODING, "gzip, br, zstd".parse().unwrap()); + let encodings = parse_accepted_encodings(&headers); + assert_eq!(encodings[0], ContentEncoding::Zstd); + assert_eq!(encodings[1], ContentEncoding::Brotli); + assert_eq!(encodings[2], ContentEncoding::Gzip); + } + + #[test] + fn test_parse_accepted_encodings_with_quality() { + let mut headers = HeaderMap::new(); + headers.insert( + header::ACCEPT_ENCODING, + "gzip;q=1.0, br;q=0.5, zstd;q=0.8".parse().unwrap(), + ); + let encodings = parse_accepted_encodings(&headers); + assert_eq!(encodings[0], ContentEncoding::Gzip); + assert_eq!(encodings[1], ContentEncoding::Zstd); + assert_eq!(encodings[2], ContentEncoding::Brotli); + } + + #[test] + fn test_negotiate_simple() { + let mut headers = HeaderMap::new(); + headers.insert(header::ACCEPT_ENCODING, "gzip, br".parse().unwrap()); + assert_eq!(negotiate_encoding(&headers), ContentEncoding::Brotli); + } + + #[test] + fn test_negotiate_with_quality() { + let mut headers = HeaderMap::new(); + headers.insert( + header::ACCEPT_ENCODING, + "gzip;q=1.0, br;q=0.5".parse().unwrap(), + ); + assert_eq!(negotiate_encoding(&headers), ContentEncoding::Gzip); + } + + #[test] + fn test_negotiate_zstd_priority() { + let mut headers = HeaderMap::new(); + headers.insert(header::ACCEPT_ENCODING, "gzip, br, zstd".parse().unwrap()); + assert_eq!(negotiate_encoding(&headers), ContentEncoding::Zstd); + } + + #[test] + fn test_negotiate_no_header() { + let headers = HeaderMap::new(); + assert_eq!(negotiate_encoding(&headers), ContentEncoding::Identity); + } + + #[test] + fn test_negotiate_disabled_encoding() { + let mut headers = HeaderMap::new(); + headers.insert( + header::ACCEPT_ENCODING, + "zstd;q=0, br, gzip".parse().unwrap(), + ); + // zstd is disabled (q=0), so should pick brotli + assert_eq!(negotiate_encoding(&headers), ContentEncoding::Brotli); + } + + #[test] + fn test_negotiate_real_browser() { + // Chrome's actual header + let mut headers = HeaderMap::new(); + headers.insert( + header::ACCEPT_ENCODING, + "gzip, deflate, br, zstd".parse().unwrap(), + ); + assert_eq!(negotiate_encoding(&headers), ContentEncoding::Zstd); + } + + #[test] + fn test_compressible_content_types() { + assert!(is_compressible_content_type("text/html")); + assert!(is_compressible_content_type("text/css")); + assert!(is_compressible_content_type("application/json")); + assert!(is_compressible_content_type("application/javascript")); + assert!(is_compressible_content_type("image/svg+xml")); + assert!(is_compressible_content_type("text/xml")); + + // Not compressible + assert!(!is_compressible_content_type("image/png")); + assert!(!is_compressible_content_type("image/jpeg")); + assert!(!is_compressible_content_type("video/mp4")); + assert!(!is_compressible_content_type("font/woff2")); + assert!(!is_compressible_content_type("application/octet-stream")); + } + + #[test] + fn test_compression_functions() { + let data = b"Hello, World! This is some test data that should be compressed."; + + // All compression functions should work + let zstd = compress_zstd(data).unwrap(); + let brotli = compress_brotli(data).unwrap(); + let gzip = compress_gzip(data).unwrap(); + + // Compressed should generally be smaller (for reasonable input) + // Note: very small inputs might not compress well + assert!(!zstd.is_empty()); + assert!(!brotli.is_empty()); + assert!(!gzip.is_empty()); + } + + #[test] + fn test_extension() { + assert_eq!(ContentEncoding::Zstd.extension(), ".zst"); + assert_eq!(ContentEncoding::Brotli.extension(), ".br"); + assert_eq!(ContentEncoding::Gzip.extension(), ".gz"); + assert_eq!(ContentEncoding::Identity.extension(), ""); + } + + #[test] + fn test_header_value() { + assert_eq!( + ContentEncoding::Zstd.header_value().unwrap(), + HeaderValue::from_static("zstd") + ); + assert_eq!( + ContentEncoding::Brotli.header_value().unwrap(), + HeaderValue::from_static("br") + ); + assert_eq!( + ContentEncoding::Gzip.header_value().unwrap(), + HeaderValue::from_static("gzip") + ); + assert!(ContentEncoding::Identity.header_value().is_none()); + } +} diff --git a/src/main.rs b/src/main.rs index 42ac859..31c42e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ mod cache; mod cli; mod config; mod db; +mod encoding; mod formatter; mod github; mod handlers; diff --git a/src/proxy.rs b/src/proxy.rs index fa70f60..806065f 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -1,6 +1,6 @@ use axum::{ extract::{ConnectInfo, Request, State}, - http::{HeaderMap, StatusCode}, + http::{HeaderMap, HeaderValue, StatusCode, header}, response::{IntoResponse, Response}, }; use std::{net::SocketAddr, sync::Arc, time::Duration}; @@ -9,6 +9,7 @@ use crate::{ assets, cache::{self, CachedResponse}, db, + encoding::negotiate_encoding, state::{AppState, ProxyError}, tarpit::{self, TarpitState}, utils, @@ -20,6 +21,7 @@ pub async fn isr_handler(State(state): State>, req: Request) -> Re let uri = req.uri(); let path = uri.path(); let query = uri.query(); + let request_headers = req.headers().clone(); // Redirect trailing slashes to non-trailing (except root) if path.len() > 1 && path.ends_with('/') { @@ -85,7 +87,7 @@ pub async fn isr_handler(State(state): State>, req: Request) -> Re // Check if this is a static asset that exists in embedded CLIENT_ASSETS if utils::is_static_asset(path) - && let Some(response) = assets::try_serve_embedded_asset(path) + && let Some(response) = assets::try_serve_embedded_asset_with_encoding(path, req.headers()) { return response; } @@ -152,7 +154,7 @@ pub async fn isr_handler(State(state): State>, req: Request) -> Re let age_ms = cached.age().as_millis() as u64; tracing::debug!(cache = "hit", age_ms, "ISR cache hit (fresh)"); - return serve_cached_response(&cached, is_head); + return serve_cached_response(&cached, &request_headers, is_head); } else if cached.is_stale_but_usable(fresh_duration, stale_duration) { // Stale cache hit - serve immediately and refresh in background let age_ms = cached.age().as_millis() as u64; @@ -167,7 +169,7 @@ pub async fn isr_handler(State(state): State>, req: Request) -> Re }); } - return serve_cached_response(&cached, is_head); + return serve_cached_response(&cached, &request_headers, is_head); } // Cache entry is too old - fall through to fetch } @@ -232,12 +234,35 @@ pub async fn isr_handler(State(state): State>, req: Request) -> Re } } -/// Serve a cached response -fn serve_cached_response(cached: &CachedResponse, is_head: bool) -> Response { +/// Serve a cached response with content encoding negotiation +fn serve_cached_response( + cached: &CachedResponse, + request_headers: &HeaderMap, + is_head: bool, +) -> Response { + // Negotiate encoding based on Accept-Encoding + let desired_encoding = negotiate_encoding(request_headers); + let (body, actual_encoding) = cached.get_body(desired_encoding); + + let mut headers = cached.headers.clone(); + + // Add Content-Encoding header if compressed + if let Some(encoding_value) = actual_encoding.header_value() { + headers.insert(header::CONTENT_ENCODING, encoding_value); + } + + // Add Vary header for caching + headers.insert(header::VARY, HeaderValue::from_static("Accept-Encoding")); + + // Update Content-Length for compressed body + if let Ok(len) = HeaderValue::from_str(&body.len().to_string()) { + headers.insert(header::CONTENT_LENGTH, len); + } + if is_head { - (cached.status, cached.headers.clone()).into_response() + (cached.status, headers).into_response() } else { - (cached.status, cached.headers.clone(), cached.body.clone()).into_response() + (cached.status, headers, body).into_response() } } diff --git a/web/scripts/compress-assets.ts b/web/scripts/compress-assets.ts new file mode 100644 index 0000000..4629772 --- /dev/null +++ b/web/scripts/compress-assets.ts @@ -0,0 +1,151 @@ +#!/usr/bin/env bun +/** + * Pre-compress static assets with maximum compression levels + * Run after `bun run build` + * + * Generates .gz, .br, .zst variants for compressible files + */ +import { readdir, stat, readFile, writeFile } from "fs/promises"; +import { join, extname } from "path"; +import { gzipSync, brotliCompressSync, constants } from "zlib"; +import { $ } from "bun"; + +// NOTE: Must match COMPRESSION_MIN_SIZE in src/encoding.rs +const MIN_SIZE = 512; +const COMPRESSIBLE_EXTENSIONS = new Set([ + ".js", + ".css", + ".html", + ".json", + ".svg", + ".txt", + ".xml", + ".map", +]); + +// Check if zstd is available +let hasZstd = false; +try { + await $`which zstd`.quiet(); + hasZstd = true; +} catch { + console.warn("Warning: zstd not found, skipping .zst generation"); +} + +async function* walkDir(dir: string): AsyncGenerator { + try { + const entries = await readdir(dir, { withFileTypes: true }); + for (const entry of entries) { + const path = join(dir, entry.name); + if (entry.isDirectory()) { + yield* walkDir(path); + } else if (entry.isFile()) { + yield path; + } + } + } catch { + // Directory doesn't exist, skip + } +} + +async function compressFile(path: string): Promise { + const ext = extname(path); + + // Skip if not compressible or already compressed + if (!COMPRESSIBLE_EXTENSIONS.has(ext)) return; + if (path.endsWith(".br") || path.endsWith(".gz") || path.endsWith(".zst")) + return; + + const stats = await stat(path); + if (stats.size < MIN_SIZE) return; + + // Skip if all compressed variants already exist + const variantsExist = await Promise.all([ + stat(`${path}.br`).then( + () => true, + () => false, + ), + stat(`${path}.gz`).then( + () => true, + () => false, + ), + hasZstd + ? stat(`${path}.zst`).then( + () => true, + () => false, + ) + : Promise.resolve(false), + ]); + + if (variantsExist.every((exists) => exists || !hasZstd)) { + return; // All available variants already compressed + } + + const content = await readFile(path); + const originalSize = content.length; + + // Brotli (maximum quality = 11) + const brContent = brotliCompressSync(content, { + params: { + [constants.BROTLI_PARAM_QUALITY]: 11, + }, + }); + await writeFile(`${path}.br`, brContent); + + // Gzip (level 9) + const gzContent = gzipSync(content, { level: 9 }); + await writeFile(`${path}.gz`, gzContent); + + // Zstd (level 19 - maximum) + if (hasZstd) { + try { + await $`zstd -19 -q -f -o ${path}.zst ${path}`.quiet(); + } catch (e) { + console.warn(`Warning: Failed to compress ${path} with zstd: ${e}`); + } + } + + const brRatio = ((brContent.length / originalSize) * 100).toFixed(1); + const gzRatio = ((gzContent.length / originalSize) * 100).toFixed(1); + console.log( + `Compressed: ${path} (br: ${brRatio}%, gz: ${gzRatio}%, ${originalSize} bytes)`, + ); +} + +async function main() { + console.log("Pre-compressing static assets..."); + + const dirs = ["build/client", "build/prerendered"]; + let scannedFiles = 0; + let compressedFiles = 0; + + for (const dir of dirs) { + for await (const file of walkDir(dir)) { + const ext = extname(file); + scannedFiles++; + + // Track if we actually compressed this file + if ( + COMPRESSIBLE_EXTENSIONS.has(ext) && + !file.endsWith(".br") && + !file.endsWith(".gz") && + !file.endsWith(".zst") + ) { + const stats = await stat(file); + if (stats.size >= MIN_SIZE) { + await compressFile(file); + compressedFiles++; + } + } + } + } + + console.log( + `Done! Scanned ${scannedFiles} files, compressed ${compressedFiles} files.`, + ); +} + +main().catch((e) => { + console.error("Compression failed:", e); + process.exit(1); +}); diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index ee558ae..cfd79aa 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -13,10 +13,6 @@ import type { } from "./admin-types"; import { ApiError } from "./errors"; -// ============================================================================ -// CLIENT-SIDE API FUNCTIONS -// ============================================================================ - // Client-side fetch wrapper for browser requests async function clientApiFetch(path: string, init?: RequestInit): Promise { const response = await fetch(path, { @@ -31,10 +27,6 @@ async function clientApiFetch(path: string, init?: RequestInit): Promise { return response.json(); } -// ============================================================================ -// ADMIN API FUNCTIONS -// ============================================================================ - // Admin Projects API export async function getAdminProjects(): Promise { return clientApiFetch("/api/projects");