From e3b638a7d831ec1524b41f75c6f3b821711c99c9 Mon Sep 17 00:00:00 2001 From: Xevion Date: Sat, 13 Sep 2025 13:24:54 -0500 Subject: [PATCH] feat: add ETag & Cache-Control headers, cached hexadecimal hashes via rapidhash --- Cargo.lock | 10 +++++ Cargo.toml | 1 + src/web/assets.rs | 85 +++++++++++++++++++++++++++++++++++------- src/web/routes.rs | 95 ++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 164 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad84516..8634083 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -240,6 +240,7 @@ dependencies = [ "once_cell", "poise", "rand 0.9.2", + "rapidhash", "regex", "reqwest 0.12.23", "reqwest-middleware", @@ -2218,6 +2219,15 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rapidhash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "164772177ee16e3b074e6019c63cd92cb3cecf38e8c40d097675958b86dd8084" +dependencies = [ + "rustversion", +] + [[package]] name = "raw-cpuid" version = "11.6.0" diff --git a/Cargo.toml b/Cargo.toml index 5caa628..13accd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ tower-http = { version = "0.6.0", features = ["fs", "cors", "trace"] } rust-embed = { version = "8.0", features = ["debug-embed", "include-exclude"] } mime_guess = "2.0" clap = { version = "4.5", features = ["derive"] } +rapidhash = "4.1.0" [dev-dependencies] diff --git a/src/web/assets.rs b/src/web/assets.rs index bf31555..375f568 100644 --- a/src/web/assets.rs +++ b/src/web/assets.rs @@ -5,7 +5,9 @@ use dashmap::DashMap; use once_cell::sync::Lazy; +use rapidhash::v3::rapidhash_v3; use rust_embed::RustEmbed; +use std::fmt; /// Embedded web assets from the dist directory #[derive(RustEmbed)] @@ -14,24 +16,81 @@ use rust_embed::RustEmbed; #[exclude = "*.map"] pub struct WebAssets; -/// Global cache for MIME types to avoid repeated mime_guess lookups -static MIME_CACHE: Lazy>> = Lazy::new(DashMap::new); +/// RapidHash hash type for asset content (u64 native output size) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct AssetHash(u64); -/// Get cached MIME type for a file path, caching on-demand -/// Returns None if the MIME type is text/plain or if no MIME type could be determined -pub fn get_mime_type_cached(path: &str) -> Option { - // Check cache first - if let Some(cached) = MIME_CACHE.get(path) { - return cached.value().as_ref().cloned(); +impl AssetHash { + /// Create a new AssetHash from u64 value + pub fn new(hash: u64) -> Self { + Self(hash) } - // Perform MIME guess and cache the result - let result = mime_guess::from_path(path) + /// Get the hash as a hex string + pub fn to_hex(&self) -> String { + format!("{:016x}", self.0) + } + + /// Get the hash as a quoted hex string + pub fn quoted(&self) -> String { + format!("\"{}\"", self.to_hex()) + } +} + +impl fmt::Display for AssetHash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.to_hex()) + } +} + +/// Metadata for an asset including MIME type and RapidHash hash +#[derive(Debug, Clone)] +pub struct AssetMetadata { + pub mime_type: Option, + pub hash: AssetHash, +} + +impl AssetMetadata { + /// Check if the etag matches the asset hash + pub fn etag_matches(&self, etag: &str) -> bool { + // Remove quotes if present (ETags are typically quoted) + let etag = etag.trim_matches('"'); + + // ETags generated from u64 hex should be 16 characters + etag.len() == 16 + // Parse the hexadecimal, compare if it matches + && etag.parse::() + .map(|parsed| parsed == self.hash.0) + .unwrap_or(false) + } +} + +/// Global cache for asset metadata to avoid repeated calculations +static ASSET_CACHE: Lazy> = Lazy::new(DashMap::new); + +/// Get cached asset metadata for a file path, caching on-demand +/// Returns AssetMetadata containing MIME type and RapidHash hash +pub fn get_asset_metadata_cached(path: &str, content: &[u8]) -> AssetMetadata { + // Check cache first + if let Some(cached) = ASSET_CACHE.get(path) { + return cached.value().clone(); + } + + // Calculate MIME type + let mime_type = mime_guess::from_path(path) .first() .map(|mime| mime.to_string()); - // Cache the result - MIME_CACHE.insert(path.to_string(), result.clone()); + // Calculate RapidHash hash (using u64 native output size) + let hash_value = rapidhash_v3(content); + let hash = AssetHash::new(hash_value); - result + let metadata = AssetMetadata { mime_type, hash }; + + // Only cache if we haven't exceeded the limit + if ASSET_CACHE.len() < 1000 { + ASSET_CACHE.insert(path.to_string(), metadata.clone()); + } + + metadata } diff --git a/src/web/routes.rs b/src/web/routes.rs index 32a09a3..2ef0620 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -2,8 +2,8 @@ use axum::{ Router, - extract::State, - http::{StatusCode, Uri}, + extract::{Request, State}, + http::{HeaderMap, HeaderValue, StatusCode, Uri}, response::{Html, IntoResponse, Json, Response}, routing::get, }; @@ -16,10 +16,46 @@ use tower_http::{ }; use tracing::info; -use crate::web::assets::{WebAssets, get_mime_type_cached}; +use crate::web::assets::{WebAssets, get_asset_metadata_cached}; use crate::banner::BannerApi; +/// Set appropriate caching headers based on asset type +fn set_caching_headers(response: &mut Response, path: &str, etag: &str) { + let headers = response.headers_mut(); + + // Set ETag + if let Ok(etag_value) = HeaderValue::from_str(etag) { + headers.insert(header::ETAG, etag_value); + } + + // Set Cache-Control based on asset type + let cache_control = if path.starts_with("assets/") { + // Static assets with hashed filenames - long-term cache + "public, max-age=31536000, immutable" + } else if path == "index.html" { + // HTML files - short-term cache + "public, max-age=300" + } else { + match path.split_once('.').map(|(_, extension)| extension) { + Some(ext) => match ext { + // CSS/JS files - medium-term cache + "css" | "js" => "public, max-age=86400", + // Images - long-term cache + "png" | "jpg" | "jpeg" | "gif" | "svg" | "ico" => "public, max-age=2592000", + // Default for other files + _ => "public, max-age=3600", + }, + // Default for files without an extension + None => "public, max-age=3600", + } + }; + + if let Ok(cache_control_value) = HeaderValue::from_str(cache_control) { + headers.insert(header::CACHE_CONTROL, cache_control_value); + } +} + /// Shared application state for web server #[derive(Clone)] pub struct BannerState { @@ -51,7 +87,7 @@ pub fn create_router(state: BannerState) -> Router { Router::new() .route("/", get(root)) .nest("/api", api_router) - .fallback(handle_spa_fallback) + .fallback(fallback) .layer(TraceLayer::new_for_http()) } } @@ -73,28 +109,50 @@ async fn root() -> Response { .into_response() } else { // Production mode: serve the SPA index.html - handle_spa_fallback(Uri::from_static("/")).await + handle_spa_fallback_with_headers(Uri::from_static("/"), HeaderMap::new()).await } } +/// Handler that extracts request information for caching +async fn fallback(request: Request) -> Response { + let uri = request.uri().clone(); + let headers = request.headers().clone(); + handle_spa_fallback_with_headers(uri, headers).await +} + /// Handles SPA routing by serving index.html for non-API, non-asset requests -async fn handle_spa_fallback(uri: Uri) -> Response { +/// This version includes HTTP caching headers and ETag support +async fn handle_spa_fallback_with_headers(uri: Uri, request_headers: HeaderMap) -> Response { let path = uri.path().trim_start_matches('/'); if let Some(content) = WebAssets::get(path) { - let data = content.data.to_vec(); + // Get asset metadata (MIME type and hash) with caching + let metadata = get_asset_metadata_cached(path, &content.data); + + // Check if client has a matching ETag (conditional request) + if let Some(etag) = request_headers.get(header::IF_NONE_MATCH) + && metadata.etag_matches(etag.to_str().unwrap()) + { + return StatusCode::NOT_MODIFIED.into_response(); + } // Use cached MIME type, only set Content-Type if we have a valid MIME type - let mime_type = get_mime_type_cached(path); - return ( + let mut response = ( [( header::CONTENT_TYPE, // For unknown types, set to application/octet-stream - mime_type.unwrap_or("application/octet-stream".to_string()), + metadata + .mime_type + .unwrap_or("application/octet-stream".to_string()), )], - data, + content.data, ) .into_response(); + + // Set caching headers + set_caching_headers(&mut response, path, &metadata.hash.quoted()); + + return response; } else { // Any assets that are not found should be treated as a 404, not falling back to the SPA index.html if path.starts_with("assets/") { @@ -105,9 +163,18 @@ async fn handle_spa_fallback(uri: Uri) -> Response { // Fall back to the SPA index.html match WebAssets::get("index.html") { Some(content) => { - let data = content.data.to_vec(); - let html_content = String::from_utf8_lossy(&data).to_string(); - Html(html_content).into_response() + let metadata = get_asset_metadata_cached("index.html", &content.data); + + // Check if client has a matching ETag for index.html + if let Some(etag) = request_headers.get(header::IF_NONE_MATCH) + && metadata.etag_matches(etag.to_str().unwrap()) + { + return StatusCode::NOT_MODIFIED.into_response(); + } + + let mut response = Html(content.data).into_response(); + set_caching_headers(&mut response, "index.html", &metadata.hash.quoted()); + response } None => ( StatusCode::INTERNAL_SERVER_ERROR,