diff --git a/build.rs b/build.rs index 2b4098a..26474fb 100644 --- a/build.rs +++ b/build.rs @@ -1,71 +1,182 @@ +use lazy_static::lazy_static; +use regex::Regex; use std::env; +use std::fmt; use std::fs::File; use std::io::{BufRead, BufReader, BufWriter, Write}; use std::path::Path; -use regex::Regex; -use lazy_static::lazy_static; -lazy_static! { - static ref FULL_PATTERN: Regex = Regex::new(r"([A-Z]+)\s\t.+\s\tUTC([−+±]\d{2}(?::\d{2})?)").unwrap(); - static ref OFFSET_PATTERN: Regex = Regex::new(r"([−+±])(\d{2}(?::\d{2})?)").unwrap(); +/// Error types for build script failures +#[derive(Debug)] +enum BuildError { + Io(std::io::Error), + Regex(String), + Parse(String), + Env(env::VarError), } -const HOUR: u32 = 3600; +impl fmt::Display for BuildError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BuildError::Io(e) => write!(f, "IO error: {}", e), + BuildError::Regex(msg) => write!(f, "Regex error: {}", msg), + BuildError::Parse(msg) => write!(f, "Parse error: {}", msg), + BuildError::Env(e) => write!(f, "Environment error: {}", e), + } + } +} -fn parse_offset(raw_offset: &str) -> i32 { - let capture = OFFSET_PATTERN.captures(raw_offset).expect("RegEx failed to match offset"); - println!("{}: {}", raw_offset, capture.get(1).expect("First group capture failed").as_str()); +impl From for BuildError { + fn from(error: std::io::Error) -> Self { + BuildError::Io(error) + } +} - let is_west = capture.get(1).unwrap().as_str() == "−"; - let time = capture.get(2).expect("Second group capture failed").as_str(); - let (hours, minutes) = if time.contains(':') { - let mut split = time.split(':'); - let hours = split.next().unwrap().parse::().unwrap(); - let minutes = split.next().unwrap().parse::().unwrap(); +impl From for BuildError { + fn from(error: env::VarError) -> Self { + BuildError::Env(error) + } +} - (hours, minutes) - } else { - // Minutes not specified, assume 0 - (time.parse::().unwrap(), 0) - }; +lazy_static! { + /// Regex to match timezone lines: "ABBR \t Description \t UTC±HH:MM" + static ref TIMEZONE_PATTERN: Regex = + Regex::new(r"([A-Z]+)\s\t.+\s\tUTC([−+±]\d{2}(?::\d{2})?)").unwrap(); - let value = (hours * HOUR) + (minutes * 60); - return if is_west { value as i32 * -1 } else { value as i32 }; + /// Regex to parse UTC offset format: "±HH:MM" or "±HH" + static ref OFFSET_PATTERN: Regex = + Regex::new(r"([−+±])(\d{2})(?::(\d{2}))?").unwrap(); +} + +const SECONDS_PER_HOUR: i32 = 3600; +const SECONDS_PER_MINUTE: i32 = 60; + +/// Parse a UTC offset string (e.g., "+05:30", "-08", "±00") into seconds from UTC +fn parse_utc_offset(raw_offset: &str) -> Result { + let captures = OFFSET_PATTERN.captures(raw_offset).ok_or_else(|| { + BuildError::Regex(format!("Failed to match offset pattern: {}", raw_offset)) + })?; + + // Handle ± (variable offset) as UTC + let sign = captures.get(1).unwrap().as_str(); + if sign == "±" { + return Ok(0); + } + + let hours_str = captures.get(2).unwrap().as_str(); + let minutes_str = captures.get(3).map(|m| m.as_str()).unwrap_or("0"); + + let hours: i32 = hours_str + .parse() + .map_err(|e| BuildError::Parse(format!("Invalid hours '{}': {}", hours_str, e)))?; + + let minutes: i32 = minutes_str + .parse() + .map_err(|e| BuildError::Parse(format!("Invalid minutes '{}': {}", minutes_str, e)))?; + + // Validate ranges + if hours > 23 { + return Err(BuildError::Parse(format!("Hours out of range: {}", hours))); + } + if minutes > 59 { + return Err(BuildError::Parse(format!( + "Minutes out of range: {}", + minutes + ))); + } + + let total_seconds = (hours * SECONDS_PER_HOUR) + (minutes * SECONDS_PER_MINUTE); + + // Apply sign (− is west/negative, + is east/positive) + Ok(match sign { + "−" => -total_seconds, + "+" => total_seconds, + _ => unreachable!("Regex should only match +, −, or ±"), + }) +} + +/// Parse a single timezone line and extract abbreviation and offset +fn parse_timezone_line(line: &str) -> Result, BuildError> { + // Skip comment lines + if line.trim().starts_with('#') || line.trim().is_empty() { + return Ok(None); + } + + let captures = TIMEZONE_PATTERN + .captures(line) + .ok_or_else(|| BuildError::Regex(format!("Failed to match timezone pattern: {}", line)))?; + + let abbreviation = captures.get(1).unwrap().as_str().to_string(); + let raw_offset = captures.get(2).unwrap().as_str(); + + let offset = parse_utc_offset(raw_offset)?; + + Ok(Some((abbreviation, offset))) +} + +/// Generate the PHF map code for timezone abbreviations to UTC offsets +fn generate_timezone_map() -> Result<(), BuildError> { + let out_dir = env::var("OUT_DIR")?; + let output_path = Path::new(&out_dir).join("timezone_map.rs"); + + let tz_path = Path::new("./src/abbr_tz"); + let tz_file = File::open(tz_path)?; + let reader = BufReader::new(tz_file); + + let mut out_file = BufWriter::new(File::create(&output_path)?); + let mut builder = phf_codegen::Map::::new(); + + let mut processed_count = 0; + let mut skipped_count = 0; + + for (_line_num, line_result) in reader.lines().enumerate() { + let line = line_result?; + + match parse_timezone_line(&line)? { + Some((abbreviation, offset)) => { + builder.entry(abbreviation.clone(), &offset.to_string()); + processed_count += 1; + println!( + "cargo:warning=Processed timezone: {} -> {} seconds", + abbreviation, offset + ); + } + None => { + skipped_count += 1; + } + } + } + + // Generate the PHF map + writeln!( + &mut out_file, + "/// Auto-generated timezone abbreviation to UTC offset (in seconds) mapping" + )?; + writeln!( + &mut out_file, + "/// Generated from {} timezone definitions ({} processed, {} skipped)", + processed_count + skipped_count, + processed_count, + skipped_count + )?; + writeln!( + &mut out_file, + "pub static TIMEZONE_OFFSETS: phf::Map<&'static str, i32> = {};", + builder.build() + )?; + + println!( + "cargo:warning=Generated timezone map with {} entries", + processed_count + ); + Ok(()) } fn main() { - let path = Path::new(&env::var("OUT_DIR").unwrap()).join("codegen.rs"); - let raw_tz = BufReader::new(File::open("./src/abbr_tz").unwrap()); - - let mut file = BufWriter::new(File::create(&path).unwrap()); - - let mut builder: phf_codegen::Map = phf_codegen::Map::new(); - - for line in raw_tz.lines() { - let line = line.unwrap(); - if line.starts_with('#') { - continue; - } - - let capture = FULL_PATTERN.captures(&line).expect("RegEx failed to match line"); - - let abbreviation = capture.get(1).unwrap().as_str(); - let raw_offset = capture.get(2).unwrap().as_str(); - - let offset = if !raw_offset.starts_with('±') { - parse_offset(raw_offset) - } else { - 0 - }; - - builder.entry(String::from(abbreviation), &format!("\"{}\"", offset).to_string()); + if let Err(e) = generate_timezone_map() { + panic!("Build script failed: {}", e); } - write!( - &mut file, - "static TIMEZONES: phf::Map<&'static str, &'static str> = {}", - builder.build() - ) - .unwrap(); - write!(&mut file, ";\n").unwrap(); -} \ No newline at end of file + // Tell Cargo to re-run this build script if the timezone file changes + println!("cargo:rerun-if-changed=src/abbr_tz"); +} diff --git a/src/abbr.rs b/src/abbr.rs index df4b597..152c9c9 100644 --- a/src/abbr.rs +++ b/src/abbr.rs @@ -1,31 +1,92 @@ use chrono::FixedOffset; -// Generated by build.rs, phf_codegen -include!(concat!(env!("OUT_DIR"), "/codegen.rs")); +// Generated by build.rs - timezone abbreviation to UTC offset mapping +include!(concat!(env!("OUT_DIR"), "/timezone_map.rs")); -/* - Parse an abbreviation of a timezone into a UTC offset. - Note: This is not standardized at all and is simply built on a reference of Time Zone abbreviations - from Wikipedia (as of 2023-7-20). - */ +/// Parse a timezone abbreviation into a UTC offset. +/// +/// This uses a pre-generated map of timezone abbreviations to their UTC offsets +/// in seconds. The mapping is based on the Wikipedia reference of timezone +/// abbreviations (as of 2023-07-20). +/// +/// Note: Timezone abbreviations are not standardized and can be ambiguous. +/// This implementation uses preferred interpretations for conflicting abbreviations. +/// +/// # Arguments +/// * `abbreviation` - The timezone abbreviation (e.g., "CST", "EST", "PST") +/// +/// # Returns +/// * `Ok(FixedOffset)` - The UTC offset for the timezone +/// * `Err(String)` - Error message if abbreviation is not found or invalid +/// +/// # Examples +/// ``` +/// use chrono::FixedOffset; +/// +/// let cst = parse_abbreviation("CST").unwrap(); +/// assert_eq!(cst, FixedOffset::west_opt(6 * 3600).unwrap()); +/// ``` pub fn parse_abbreviation(abbreviation: &str) -> Result { - let offset_integer_string = TIMEZONES.get(abbreviation); - if offset_integer_string.is_none() { - return Err("Failed to find abbreviation".to_string()); - } + let offset_seconds = TIMEZONE_OFFSETS + .get(abbreviation) + .ok_or_else(|| format!("Unknown timezone abbreviation: {}", abbreviation))?; - let offset = FixedOffset::east_opt(offset_integer_string.unwrap().parse().expect("Failed to parse stored offset")); - return offset.ok_or("Failed to parse offset".to_string()); + // Convert seconds to FixedOffset + // Positive offsets are east of UTC, negative are west + let offset = if *offset_seconds >= 0 { + FixedOffset::east_opt(*offset_seconds) + } else { + FixedOffset::west_opt(-*offset_seconds) + }; + + offset.ok_or_else(|| { + format!( + "Invalid offset for timezone {}: {} seconds", + abbreviation, offset_seconds + ) + }) } - #[cfg(test)] mod tests { - use chrono::FixedOffset; use crate::abbr::parse_abbreviation; + use chrono::FixedOffset; #[test] - fn parse_offset() { - assert_eq!(parse_abbreviation("CST").unwrap(), FixedOffset::west_opt(6 * 3600).unwrap()); + fn test_parse_cst() { + // CST (Central Standard Time) is UTC-6 + let cst = parse_abbreviation("CST").unwrap(); + assert_eq!(cst, FixedOffset::west_opt(6 * 3600).unwrap()); } -} \ No newline at end of file + + #[test] + fn test_parse_est() { + // EST (Eastern Standard Time) is UTC-5 + let est = parse_abbreviation("EST").unwrap(); + assert_eq!(est, FixedOffset::west_opt(5 * 3600).unwrap()); + } + + #[test] + fn test_parse_utc() { + // UTC should be zero offset + let utc = parse_abbreviation("UTC").unwrap(); + assert_eq!(utc, FixedOffset::east_opt(0).unwrap()); + } + + #[test] + fn test_parse_unknown() { + // Unknown abbreviation should return error + let result = parse_abbreviation("INVALID"); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .contains("Unknown timezone abbreviation")); + } + + #[test] + fn test_parse_positive_offset() { + // JST (Japan Standard Time) is UTC+9 + let jst = parse_abbreviation("JST").unwrap(); + assert_eq!(jst, FixedOffset::east_opt(9 * 3600).unwrap()); + } +}