Commmit current datetime parsing work

This commit is contained in:
2023-07-21 23:35:41 -05:00
parent ffde9e68e0
commit 8dc13f29ad
4 changed files with 284 additions and 1 deletions

View File

@@ -24,6 +24,7 @@ chrono-tz = "0.8.3"
phf = { version = "0.11.2", features = ["macros"] }
phf_codegen = "0.11.1"
chrono = "0.4.26"
regex = "1.8.4"
[build-dependencies]
chrono = "0.4.26"

View File

@@ -18,4 +18,49 @@ My first Rust project, intended to offer a simple way to display the current tim
- `/svg/2023-06-14-3PM-CST`
- `2023-06-14-3PM-CST.svg`
- `/jpeg/2023.06.14.33` (14th of June, 2023, 2:33 PM UTC)
- `/jpeg/2023.06.14.33T-5` (14th of June, 2023, 2:33 PM UTC-5)
- `/jpeg/2023.06.14.33T-5` (14th of June, 2023, 2:33 PM UTC-5)
## Structure
1. Routing
- Handle different input formats at the route layer
2. Parsing
- Module for parsing input
3. Cache Layer
- Given all route options, provide a globally available cache for the next layer
4. SVG Template Rendering
- Template rendering based on parsed input
5. (Optional) Rasterization
- If rasterization is requested, render SVG to PNG
6. (Catch-all) Error Handling
- All errors/panics will be caught in separate middleware
## Input Parsing
- Separators can be any of the following: `.`, `,`, `-`, `:` and ` `.
- They must be consistent through the date section and the time section (they can be different between the two).
- Date order can be modified with `?date=[YMD|DMY|MDY]`. By default, it is `YMD`.
- Time is not required, but will default each value to 0 (except HOUR, which is the minimum specified value).
- Millisecond precision is allowed, but will be ignored in most outputs. Periods or commas are allowed as separators.
- Timezones can be qualified in a number of ways, but will default to UTC if not specified.
- Fully qualified TZ identifiers like "America/Chicago" are specified using the `tz` query parameter.
- Abbreviated TZ identifiers like "CST" are specified inside the time string, after the time, separated by a dash.
- Abbreviated terms are incredibly ambiguous, and should be avoided if possible. For ease of use, they are
available, but several of them are ambiguous, and the preferred TZ has been specified in code.
- Full table available in [`abbr_tz`](./src/abbr_tz). Comments designated with `#`. Preferred interpretation
designated arbitrarily by me. Table sourced
from [Wikipedia](https://en.wikipedia.org/wiki/List_of_time_zone_abbreviations)
```shell
2023-06-14-3 # 3AM UTC
2023-06-14-3-45 # 3:45AM UTC
2023-06-14-3PM-CST
2023.06.14.15-CST # 3PM CST
2023.06.14.15-45-CST # 3:45PM CST
2023.06.14.15-45-30-CST # 3:45:30PM CST
2023.06.14.15-45-30.123-CST # 3:45:30.123PM CST
2023.06.14.15-45-30,123-CST # 3:45:30.123PM CST
```
## Output Formats

228
src/parse.rs Normal file
View File

@@ -0,0 +1,228 @@
use std::str::FromStr;
use regex::{Regex, Captures, Match};
use lazy_static::lazy_static;
use crate::separator::Separable;
lazy_static! {
// First, second, third groups required (date segments)
static ref ABSOLUTE_TIME: Regex = Regex::new(r"^(\d+)[.,-: ](\d+)[.,-: ](\d+)[.,-: ](?<time>[^\w]*?(?:PM|AM)?)(?<tz>[.,-: ]\w{2,5})?$").unwrap();
}
#[derive(Debug, Copy, Clone, Default)]
pub enum DateSegmentOrder {
#[default]
YearMonthDay,
MonthDayYear,
DayMonthYear,
}
impl FromStr for DateSegmentOrder {
type Err = ();
/*
Case-sensitive match on the string representation of the enum.
*/
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"YMD" => Ok(DateSegmentOrder::YearMonthDay),
"MDY" => Ok(DateSegmentOrder::MonthDayYear),
"DMY" => Ok(DateSegmentOrder::DayMonthYear),
_ => Err(()),
}
}
}
#[derive(Debug, Copy, Clone, Default)]
pub struct ExtractionOptions {
date_segment_order: DateSegmentOrder,
strict: bool,
}
#[derive(Debug, PartialEq)]
pub struct ExtractedTime {
year: u32,
month: u32,
day: u32,
hour: u32,
minute: u32,
second: u32,
timezone: String,
}
trait ExtractAbsolute {
fn extract_absolute(&self, options: ExtractionOptions) -> Result<ExtractedTime, String>;
}
impl ExtractAbsolute for String {
fn extract_absolute(&self, options: ExtractionOptions) -> Result<ExtractedTime, String> {
let capture: Captures = ABSOLUTE_TIME.captures(self).expect(format!("Invalid absolute time format: {}", self).as_str());
println!("{:?}", capture);
let (year, month, day): (u32, u32, u32) = {
fn as_u32(capture: Option<Match>) -> u32 {
capture.unwrap().as_str().parse().unwrap()
}
let (first, second, third) = (as_u32(capture.get(1)), as_u32(capture.get(2)), as_u32(capture.get(3)));
match options.date_segment_order {
DateSegmentOrder::YearMonthDay => {
(first, second, third)
}
DateSegmentOrder::MonthDayYear => {
(third, first, second)
}
DateSegmentOrder::DayMonthYear => {
(third, second, first)
}
}
};
let (hour, minute, second): (u32, u32, u32) = if capture.name("time").unwrap().len() > 0 {
// Split the time segment using the separator characters defined in the Separable trait.
let mut time = capture.name("time").unwrap().as_str().split(|c: char| c.is_separator());
println!("{:?}", time);
let (mut hour, mut minute, mut second) = (0, 0, 0);
// Iterate over the next four segments as available.
if let Some(next_hour) = time.next() { hour = next_hour.parse().unwrap(); }
if let Some(next_minute) = time.next() { minute = next_minute.parse().unwrap(); }
if let Some(next_second) = time.next() { second = next_second.parse().unwrap(); }
time.next(); // Skip the milliseconds segment
// Prevent additional segments from being present.
let remaining = time.count();
if remaining > 0 {
return Err("Invalid time format: too many segments".to_string());
}
(hour, minute, second)
} else {
(0, 0, 0)
};
Ok(ExtractedTime {
year,
month,
day,
hour,
minute,
second,
timezone: "UTC".to_string(),
})
}
}
#[cfg(test)]
mod tests {
use crate::parse::{ExtractAbsolute, ExtractedTime};
#[test]
fn extract_hour() {
// 3:00 AM UTC
assert_eq!("2023-06-14-3".to_string().extract_absolute(Default::default()).unwrap(), ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 3,
minute: 0,
second: 0,
timezone: "UTC".to_string(),
});
// 3:00 PM CST
assert_eq!("2023.06.14.15-CST".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 15,
minute: 0,
second: 0,
timezone: "CST".to_string(),
}));
// 3:00 PM CST
assert_eq!("2023-06-14-3PM-CST".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 12 + 3,
minute: 0,
second: 0,
timezone: "CST".to_string(),
}));
}
#[test]
fn extract_minute() {
// 3:45 AM UTC
assert_eq!("2023-06-14-3-45".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 3,
minute: 45,
second: 0,
timezone: "UTC".to_string(),
}));
// 3:45 PM CST
assert_eq!("2023.06.14.15-45-CST".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 15,
minute: 45,
second: 0,
timezone: "CST".to_string(),
}));
}
#[test]
fn extract_seconds() {
// 3:45:30 PM CST
assert_eq!("2023.06.14.15-45-30-CST".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 15,
minute: 45,
second: 30,
timezone: "CST".to_string(),
}));
}
#[test]
fn handle_milliseconds() {
// Handle comma
assert_eq!("2023.06.14.15-45-30,123".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 12 + 3,
minute: 45,
second: 30,
timezone: "UTC".to_string(),
}));
// Handle period
assert_eq!("2023.06.14.15-45-30.456".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 12 + 3,
minute: 45,
second: 30,
timezone: "UTC".to_string(),
}));
// Handle comma and timezone
assert_eq!("2023.06.14.15-45-30,123-CST".to_string().extract_absolute(Default::default()), Ok(ExtractedTime {
year: 2023,
month: 6,
day: 14,
hour: 15,
minute: 45,
second: 30,
timezone: "CST".to_string(),
}));
}
}

9
src/separator.rs Normal file
View File

@@ -0,0 +1,9 @@
pub trait Separable {
fn is_separator(&self) -> bool;
}
impl Separable for char {
fn is_separator(&self) -> bool {
*self == ' ' || *self == '-' || *self == ',' || *self == ':' || *self == '.'
}
}