feat: improve build script, error handling, logging

This commit is contained in:
2025-07-10 10:06:44 -05:00
parent 56777038a0
commit 9d248a7c23
2 changed files with 245 additions and 73 deletions

221
build.rs
View File

@@ -1,71 +1,182 @@
use lazy_static::lazy_static;
use regex::Regex;
use std::env;
use std::fmt;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;
use regex::Regex;
use lazy_static::lazy_static;
lazy_static! {
static ref FULL_PATTERN: Regex = Regex::new(r"([A-Z]+)\s\t.+\s\tUTC([+±]\d{2}(?::\d{2})?)").unwrap();
static ref OFFSET_PATTERN: Regex = Regex::new(r"([+±])(\d{2}(?::\d{2})?)").unwrap();
/// Error types for build script failures
#[derive(Debug)]
enum BuildError {
Io(std::io::Error),
Regex(String),
Parse(String),
Env(env::VarError),
}
const HOUR: u32 = 3600;
impl fmt::Display for BuildError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
BuildError::Io(e) => write!(f, "IO error: {}", e),
BuildError::Regex(msg) => write!(f, "Regex error: {}", msg),
BuildError::Parse(msg) => write!(f, "Parse error: {}", msg),
BuildError::Env(e) => write!(f, "Environment error: {}", e),
}
}
}
fn parse_offset(raw_offset: &str) -> i32 {
let capture = OFFSET_PATTERN.captures(raw_offset).expect("RegEx failed to match offset");
println!("{}: {}", raw_offset, capture.get(1).expect("First group capture failed").as_str());
impl From<std::io::Error> for BuildError {
fn from(error: std::io::Error) -> Self {
BuildError::Io(error)
}
}
let is_west = capture.get(1).unwrap().as_str() == "";
let time = capture.get(2).expect("Second group capture failed").as_str();
let (hours, minutes) = if time.contains(':') {
let mut split = time.split(':');
let hours = split.next().unwrap().parse::<u32>().unwrap();
let minutes = split.next().unwrap().parse::<u32>().unwrap();
impl From<env::VarError> for BuildError {
fn from(error: env::VarError) -> Self {
BuildError::Env(error)
}
}
(hours, minutes)
} else {
// Minutes not specified, assume 0
(time.parse::<u32>().unwrap(), 0)
};
lazy_static! {
/// Regex to match timezone lines: "ABBR \t Description \t UTC±HH:MM"
static ref TIMEZONE_PATTERN: Regex =
Regex::new(r"([A-Z]+)\s\t.+\s\tUTC([+±]\d{2}(?::\d{2})?)").unwrap();
let value = (hours * HOUR) + (minutes * 60);
return if is_west { value as i32 * -1 } else { value as i32 };
/// Regex to parse UTC offset format: "±HH:MM" or "±HH"
static ref OFFSET_PATTERN: Regex =
Regex::new(r"([+±])(\d{2})(?::(\d{2}))?").unwrap();
}
const SECONDS_PER_HOUR: i32 = 3600;
const SECONDS_PER_MINUTE: i32 = 60;
/// Parse a UTC offset string (e.g., "+05:30", "-08", "±00") into seconds from UTC
fn parse_utc_offset(raw_offset: &str) -> Result<i32, BuildError> {
let captures = OFFSET_PATTERN.captures(raw_offset).ok_or_else(|| {
BuildError::Regex(format!("Failed to match offset pattern: {}", raw_offset))
})?;
// Handle ± (variable offset) as UTC
let sign = captures.get(1).unwrap().as_str();
if sign == "±" {
return Ok(0);
}
let hours_str = captures.get(2).unwrap().as_str();
let minutes_str = captures.get(3).map(|m| m.as_str()).unwrap_or("0");
let hours: i32 = hours_str
.parse()
.map_err(|e| BuildError::Parse(format!("Invalid hours '{}': {}", hours_str, e)))?;
let minutes: i32 = minutes_str
.parse()
.map_err(|e| BuildError::Parse(format!("Invalid minutes '{}': {}", minutes_str, e)))?;
// Validate ranges
if hours > 23 {
return Err(BuildError::Parse(format!("Hours out of range: {}", hours)));
}
if minutes > 59 {
return Err(BuildError::Parse(format!(
"Minutes out of range: {}",
minutes
)));
}
let total_seconds = (hours * SECONDS_PER_HOUR) + (minutes * SECONDS_PER_MINUTE);
// Apply sign ( is west/negative, + is east/positive)
Ok(match sign {
"" => -total_seconds,
"+" => total_seconds,
_ => unreachable!("Regex should only match +, , or ±"),
})
}
/// Parse a single timezone line and extract abbreviation and offset
fn parse_timezone_line(line: &str) -> Result<Option<(String, i32)>, BuildError> {
// Skip comment lines
if line.trim().starts_with('#') || line.trim().is_empty() {
return Ok(None);
}
let captures = TIMEZONE_PATTERN
.captures(line)
.ok_or_else(|| BuildError::Regex(format!("Failed to match timezone pattern: {}", line)))?;
let abbreviation = captures.get(1).unwrap().as_str().to_string();
let raw_offset = captures.get(2).unwrap().as_str();
let offset = parse_utc_offset(raw_offset)?;
Ok(Some((abbreviation, offset)))
}
/// Generate the PHF map code for timezone abbreviations to UTC offsets
fn generate_timezone_map() -> Result<(), BuildError> {
let out_dir = env::var("OUT_DIR")?;
let output_path = Path::new(&out_dir).join("timezone_map.rs");
let tz_path = Path::new("./src/abbr_tz");
let tz_file = File::open(tz_path)?;
let reader = BufReader::new(tz_file);
let mut out_file = BufWriter::new(File::create(&output_path)?);
let mut builder = phf_codegen::Map::<String>::new();
let mut processed_count = 0;
let mut skipped_count = 0;
for (_line_num, line_result) in reader.lines().enumerate() {
let line = line_result?;
match parse_timezone_line(&line)? {
Some((abbreviation, offset)) => {
builder.entry(abbreviation.clone(), &offset.to_string());
processed_count += 1;
println!(
"cargo:warning=Processed timezone: {} -> {} seconds",
abbreviation, offset
);
}
None => {
skipped_count += 1;
}
}
}
// Generate the PHF map
writeln!(
&mut out_file,
"/// Auto-generated timezone abbreviation to UTC offset (in seconds) mapping"
)?;
writeln!(
&mut out_file,
"/// Generated from {} timezone definitions ({} processed, {} skipped)",
processed_count + skipped_count,
processed_count,
skipped_count
)?;
writeln!(
&mut out_file,
"pub static TIMEZONE_OFFSETS: phf::Map<&'static str, i32> = {};",
builder.build()
)?;
println!(
"cargo:warning=Generated timezone map with {} entries",
processed_count
);
Ok(())
}
fn main() {
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs");
let raw_tz = BufReader::new(File::open("./src/abbr_tz").unwrap());
let mut file = BufWriter::new(File::create(&path).unwrap());
let mut builder: phf_codegen::Map<String> = phf_codegen::Map::new();
for line in raw_tz.lines() {
let line = line.unwrap();
if line.starts_with('#') {
continue;
}
let capture = FULL_PATTERN.captures(&line).expect("RegEx failed to match line");
let abbreviation = capture.get(1).unwrap().as_str();
let raw_offset = capture.get(2).unwrap().as_str();
let offset = if !raw_offset.starts_with('±') {
parse_offset(raw_offset)
} else {
0
};
builder.entry(String::from(abbreviation), &format!("\"{}\"", offset).to_string());
if let Err(e) = generate_timezone_map() {
panic!("Build script failed: {}", e);
}
write!(
&mut file,
"static TIMEZONES: phf::Map<&'static str, &'static str> = {}",
builder.build()
)
.unwrap();
write!(&mut file, ";\n").unwrap();
}
// Tell Cargo to re-run this build script if the timezone file changes
println!("cargo:rerun-if-changed=src/abbr_tz");
}

View File

@@ -1,31 +1,92 @@
use chrono::FixedOffset;
// Generated by build.rs, phf_codegen
include!(concat!(env!("OUT_DIR"), "/codegen.rs"));
// Generated by build.rs - timezone abbreviation to UTC offset mapping
include!(concat!(env!("OUT_DIR"), "/timezone_map.rs"));
/*
Parse an abbreviation of a timezone into a UTC offset.
Note: This is not standardized at all and is simply built on a reference of Time Zone abbreviations
from Wikipedia (as of 2023-7-20).
*/
/// Parse a timezone abbreviation into a UTC offset.
///
/// This uses a pre-generated map of timezone abbreviations to their UTC offsets
/// in seconds. The mapping is based on the Wikipedia reference of timezone
/// abbreviations (as of 2023-07-20).
///
/// Note: Timezone abbreviations are not standardized and can be ambiguous.
/// This implementation uses preferred interpretations for conflicting abbreviations.
///
/// # Arguments
/// * `abbreviation` - The timezone abbreviation (e.g., "CST", "EST", "PST")
///
/// # Returns
/// * `Ok(FixedOffset)` - The UTC offset for the timezone
/// * `Err(String)` - Error message if abbreviation is not found or invalid
///
/// # Examples
/// ```
/// use chrono::FixedOffset;
///
/// let cst = parse_abbreviation("CST").unwrap();
/// assert_eq!(cst, FixedOffset::west_opt(6 * 3600).unwrap());
/// ```
pub fn parse_abbreviation(abbreviation: &str) -> Result<FixedOffset, String> {
let offset_integer_string = TIMEZONES.get(abbreviation);
if offset_integer_string.is_none() {
return Err("Failed to find abbreviation".to_string());
}
let offset_seconds = TIMEZONE_OFFSETS
.get(abbreviation)
.ok_or_else(|| format!("Unknown timezone abbreviation: {}", abbreviation))?;
let offset = FixedOffset::east_opt(offset_integer_string.unwrap().parse().expect("Failed to parse stored offset"));
return offset.ok_or("Failed to parse offset".to_string());
// Convert seconds to FixedOffset
// Positive offsets are east of UTC, negative are west
let offset = if *offset_seconds >= 0 {
FixedOffset::east_opt(*offset_seconds)
} else {
FixedOffset::west_opt(-*offset_seconds)
};
offset.ok_or_else(|| {
format!(
"Invalid offset for timezone {}: {} seconds",
abbreviation, offset_seconds
)
})
}
#[cfg(test)]
mod tests {
use chrono::FixedOffset;
use crate::abbr::parse_abbreviation;
use chrono::FixedOffset;
#[test]
fn parse_offset() {
assert_eq!(parse_abbreviation("CST").unwrap(), FixedOffset::west_opt(6 * 3600).unwrap());
fn test_parse_cst() {
// CST (Central Standard Time) is UTC-6
let cst = parse_abbreviation("CST").unwrap();
assert_eq!(cst, FixedOffset::west_opt(6 * 3600).unwrap());
}
}
#[test]
fn test_parse_est() {
// EST (Eastern Standard Time) is UTC-5
let est = parse_abbreviation("EST").unwrap();
assert_eq!(est, FixedOffset::west_opt(5 * 3600).unwrap());
}
#[test]
fn test_parse_utc() {
// UTC should be zero offset
let utc = parse_abbreviation("UTC").unwrap();
assert_eq!(utc, FixedOffset::east_opt(0).unwrap());
}
#[test]
fn test_parse_unknown() {
// Unknown abbreviation should return error
let result = parse_abbreviation("INVALID");
assert!(result.is_err());
assert!(result
.unwrap_err()
.contains("Unknown timezone abbreviation"));
}
#[test]
fn test_parse_positive_offset() {
// JST (Japan Standard Time) is UTC+9
let jst = parse_abbreviation("JST").unwrap();
assert_eq!(jst, FixedOffset::east_opt(9 * 3600).unwrap());
}
}