feat: add ETag & Cache-Control headers, cached hexadecimal hashes via rapidhash

This commit is contained in:
2025-09-13 13:24:54 -05:00
parent 404a52e64c
commit e3b638a7d8
4 changed files with 164 additions and 27 deletions

10
Cargo.lock generated
View File

@@ -240,6 +240,7 @@ dependencies = [
"once_cell", "once_cell",
"poise", "poise",
"rand 0.9.2", "rand 0.9.2",
"rapidhash",
"regex", "regex",
"reqwest 0.12.23", "reqwest 0.12.23",
"reqwest-middleware", "reqwest-middleware",
@@ -2218,6 +2219,15 @@ dependencies = [
"getrandom 0.3.3", "getrandom 0.3.3",
] ]
[[package]]
name = "rapidhash"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "164772177ee16e3b074e6019c63cd92cb3cecf38e8c40d097675958b86dd8084"
dependencies = [
"rustversion",
]
[[package]] [[package]]
name = "raw-cpuid" name = "raw-cpuid"
version = "11.6.0" version = "11.6.0"

View File

@@ -48,6 +48,7 @@ tower-http = { version = "0.6.0", features = ["fs", "cors", "trace"] }
rust-embed = { version = "8.0", features = ["debug-embed", "include-exclude"] } rust-embed = { version = "8.0", features = ["debug-embed", "include-exclude"] }
mime_guess = "2.0" mime_guess = "2.0"
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
rapidhash = "4.1.0"
[dev-dependencies] [dev-dependencies]

View File

@@ -5,7 +5,9 @@
use dashmap::DashMap; use dashmap::DashMap;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use rapidhash::v3::rapidhash_v3;
use rust_embed::RustEmbed; use rust_embed::RustEmbed;
use std::fmt;
/// Embedded web assets from the dist directory /// Embedded web assets from the dist directory
#[derive(RustEmbed)] #[derive(RustEmbed)]
@@ -14,24 +16,81 @@ use rust_embed::RustEmbed;
#[exclude = "*.map"] #[exclude = "*.map"]
pub struct WebAssets; pub struct WebAssets;
/// Global cache for MIME types to avoid repeated mime_guess lookups /// RapidHash hash type for asset content (u64 native output size)
static MIME_CACHE: Lazy<DashMap<String, Option<String>>> = Lazy::new(DashMap::new); #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct AssetHash(u64);
/// Get cached MIME type for a file path, caching on-demand impl AssetHash {
/// Returns None if the MIME type is text/plain or if no MIME type could be determined /// Create a new AssetHash from u64 value
pub fn get_mime_type_cached(path: &str) -> Option<String> { pub fn new(hash: u64) -> Self {
// Check cache first Self(hash)
if let Some(cached) = MIME_CACHE.get(path) {
return cached.value().as_ref().cloned();
} }
// Perform MIME guess and cache the result /// Get the hash as a hex string
let result = mime_guess::from_path(path) pub fn to_hex(&self) -> String {
format!("{:016x}", self.0)
}
/// Get the hash as a quoted hex string
pub fn quoted(&self) -> String {
format!("\"{}\"", self.to_hex())
}
}
impl fmt::Display for AssetHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_hex())
}
}
/// Metadata for an asset including MIME type and RapidHash hash
#[derive(Debug, Clone)]
pub struct AssetMetadata {
pub mime_type: Option<String>,
pub hash: AssetHash,
}
impl AssetMetadata {
/// Check if the etag matches the asset hash
pub fn etag_matches(&self, etag: &str) -> bool {
// Remove quotes if present (ETags are typically quoted)
let etag = etag.trim_matches('"');
// ETags generated from u64 hex should be 16 characters
etag.len() == 16
// Parse the hexadecimal, compare if it matches
&& etag.parse::<u64>()
.map(|parsed| parsed == self.hash.0)
.unwrap_or(false)
}
}
/// Global cache for asset metadata to avoid repeated calculations
static ASSET_CACHE: Lazy<DashMap<String, AssetMetadata>> = Lazy::new(DashMap::new);
/// Get cached asset metadata for a file path, caching on-demand
/// Returns AssetMetadata containing MIME type and RapidHash hash
pub fn get_asset_metadata_cached(path: &str, content: &[u8]) -> AssetMetadata {
// Check cache first
if let Some(cached) = ASSET_CACHE.get(path) {
return cached.value().clone();
}
// Calculate MIME type
let mime_type = mime_guess::from_path(path)
.first() .first()
.map(|mime| mime.to_string()); .map(|mime| mime.to_string());
// Cache the result // Calculate RapidHash hash (using u64 native output size)
MIME_CACHE.insert(path.to_string(), result.clone()); let hash_value = rapidhash_v3(content);
let hash = AssetHash::new(hash_value);
result let metadata = AssetMetadata { mime_type, hash };
// Only cache if we haven't exceeded the limit
if ASSET_CACHE.len() < 1000 {
ASSET_CACHE.insert(path.to_string(), metadata.clone());
}
metadata
} }

View File

@@ -2,8 +2,8 @@
use axum::{ use axum::{
Router, Router,
extract::State, extract::{Request, State},
http::{StatusCode, Uri}, http::{HeaderMap, HeaderValue, StatusCode, Uri},
response::{Html, IntoResponse, Json, Response}, response::{Html, IntoResponse, Json, Response},
routing::get, routing::get,
}; };
@@ -16,10 +16,46 @@ use tower_http::{
}; };
use tracing::info; use tracing::info;
use crate::web::assets::{WebAssets, get_mime_type_cached}; use crate::web::assets::{WebAssets, get_asset_metadata_cached};
use crate::banner::BannerApi; use crate::banner::BannerApi;
/// Set appropriate caching headers based on asset type
fn set_caching_headers(response: &mut Response, path: &str, etag: &str) {
let headers = response.headers_mut();
// Set ETag
if let Ok(etag_value) = HeaderValue::from_str(etag) {
headers.insert(header::ETAG, etag_value);
}
// Set Cache-Control based on asset type
let cache_control = if path.starts_with("assets/") {
// Static assets with hashed filenames - long-term cache
"public, max-age=31536000, immutable"
} else if path == "index.html" {
// HTML files - short-term cache
"public, max-age=300"
} else {
match path.split_once('.').map(|(_, extension)| extension) {
Some(ext) => match ext {
// CSS/JS files - medium-term cache
"css" | "js" => "public, max-age=86400",
// Images - long-term cache
"png" | "jpg" | "jpeg" | "gif" | "svg" | "ico" => "public, max-age=2592000",
// Default for other files
_ => "public, max-age=3600",
},
// Default for files without an extension
None => "public, max-age=3600",
}
};
if let Ok(cache_control_value) = HeaderValue::from_str(cache_control) {
headers.insert(header::CACHE_CONTROL, cache_control_value);
}
}
/// Shared application state for web server /// Shared application state for web server
#[derive(Clone)] #[derive(Clone)]
pub struct BannerState { pub struct BannerState {
@@ -51,7 +87,7 @@ pub fn create_router(state: BannerState) -> Router {
Router::new() Router::new()
.route("/", get(root)) .route("/", get(root))
.nest("/api", api_router) .nest("/api", api_router)
.fallback(handle_spa_fallback) .fallback(fallback)
.layer(TraceLayer::new_for_http()) .layer(TraceLayer::new_for_http())
} }
} }
@@ -73,28 +109,50 @@ async fn root() -> Response {
.into_response() .into_response()
} else { } else {
// Production mode: serve the SPA index.html // Production mode: serve the SPA index.html
handle_spa_fallback(Uri::from_static("/")).await handle_spa_fallback_with_headers(Uri::from_static("/"), HeaderMap::new()).await
} }
} }
/// Handler that extracts request information for caching
async fn fallback(request: Request) -> Response {
let uri = request.uri().clone();
let headers = request.headers().clone();
handle_spa_fallback_with_headers(uri, headers).await
}
/// Handles SPA routing by serving index.html for non-API, non-asset requests /// Handles SPA routing by serving index.html for non-API, non-asset requests
async fn handle_spa_fallback(uri: Uri) -> Response { /// This version includes HTTP caching headers and ETag support
async fn handle_spa_fallback_with_headers(uri: Uri, request_headers: HeaderMap) -> Response {
let path = uri.path().trim_start_matches('/'); let path = uri.path().trim_start_matches('/');
if let Some(content) = WebAssets::get(path) { if let Some(content) = WebAssets::get(path) {
let data = content.data.to_vec(); // Get asset metadata (MIME type and hash) with caching
let metadata = get_asset_metadata_cached(path, &content.data);
// Check if client has a matching ETag (conditional request)
if let Some(etag) = request_headers.get(header::IF_NONE_MATCH)
&& metadata.etag_matches(etag.to_str().unwrap())
{
return StatusCode::NOT_MODIFIED.into_response();
}
// Use cached MIME type, only set Content-Type if we have a valid MIME type // Use cached MIME type, only set Content-Type if we have a valid MIME type
let mime_type = get_mime_type_cached(path); let mut response = (
return (
[( [(
header::CONTENT_TYPE, header::CONTENT_TYPE,
// For unknown types, set to application/octet-stream // For unknown types, set to application/octet-stream
mime_type.unwrap_or("application/octet-stream".to_string()), metadata
.mime_type
.unwrap_or("application/octet-stream".to_string()),
)], )],
data, content.data,
) )
.into_response(); .into_response();
// Set caching headers
set_caching_headers(&mut response, path, &metadata.hash.quoted());
return response;
} else { } else {
// Any assets that are not found should be treated as a 404, not falling back to the SPA index.html // Any assets that are not found should be treated as a 404, not falling back to the SPA index.html
if path.starts_with("assets/") { if path.starts_with("assets/") {
@@ -105,9 +163,18 @@ async fn handle_spa_fallback(uri: Uri) -> Response {
// Fall back to the SPA index.html // Fall back to the SPA index.html
match WebAssets::get("index.html") { match WebAssets::get("index.html") {
Some(content) => { Some(content) => {
let data = content.data.to_vec(); let metadata = get_asset_metadata_cached("index.html", &content.data);
let html_content = String::from_utf8_lossy(&data).to_string();
Html(html_content).into_response() // Check if client has a matching ETag for index.html
if let Some(etag) = request_headers.get(header::IF_NONE_MATCH)
&& metadata.etag_matches(etag.to_str().unwrap())
{
return StatusCode::NOT_MODIFIED.into_response();
}
let mut response = Html(content.data).into_response();
set_caching_headers(&mut response, "index.html", &metadata.hash.quoted());
response
} }
None => ( None => (
StatusCode::INTERNAL_SERVER_ERROR, StatusCode::INTERNAL_SERVER_ERROR,