From 6df4303bd6650c583a9b455cca2dfa08af6aea04 Mon Sep 17 00:00:00 2001 From: Xevion Date: Wed, 28 Jan 2026 21:06:29 -0600 Subject: [PATCH] feat: implement comprehensive course data model with reference cache and search --- .../20260128000000_expand_course_schema.sql | 83 +++++ src/app.rs | 8 +- src/banner/api.rs | 23 ++ src/banner/models/meetings.rs | 2 +- src/banner/session.rs | 6 +- src/bot/commands/ics.rs | 4 +- src/data/batch.rs | 307 +++++++++++++++--- src/data/courses.rs | 128 ++++++++ src/data/mod.rs | 2 + src/data/models.rs | 63 ++++ src/data/reference.rs | 53 +++ src/scraper/mod.rs | 21 +- src/scraper/scheduler.rs | 130 +++++++- src/services/web.rs | 9 +- src/state.rs | 66 ++++ src/web/routes.rs | 292 ++++++++++++++++- 16 files changed, 1121 insertions(+), 76 deletions(-) create mode 100644 migrations/20260128000000_expand_course_schema.sql create mode 100644 src/data/courses.rs create mode 100644 src/data/reference.rs diff --git a/migrations/20260128000000_expand_course_schema.sql b/migrations/20260128000000_expand_course_schema.sql new file mode 100644 index 0000000..811fa15 --- /dev/null +++ b/migrations/20260128000000_expand_course_schema.sql @@ -0,0 +1,83 @@ +-- ============================================================ +-- Expand courses table with rich Banner API fields +-- ============================================================ + +-- Section identifiers +ALTER TABLE courses ADD COLUMN sequence_number VARCHAR; +ALTER TABLE courses ADD COLUMN part_of_term VARCHAR; + +-- Schedule & delivery (store codes, descriptions come from reference_data) +ALTER TABLE courses ADD COLUMN instructional_method VARCHAR; +ALTER TABLE courses ADD COLUMN campus VARCHAR; + +-- Credit hours +ALTER TABLE courses ADD COLUMN credit_hours INTEGER; +ALTER TABLE courses ADD COLUMN credit_hour_low INTEGER; +ALTER TABLE courses ADD COLUMN credit_hour_high INTEGER; + +-- Cross-listing +ALTER TABLE courses ADD COLUMN cross_list VARCHAR; +ALTER TABLE courses ADD COLUMN cross_list_capacity INTEGER; +ALTER TABLE courses ADD COLUMN cross_list_count INTEGER; + +-- Section linking +ALTER TABLE courses ADD COLUMN link_identifier VARCHAR; +ALTER TABLE courses ADD COLUMN is_section_linked BOOLEAN; + +-- JSONB columns for 1-to-many data +ALTER TABLE courses ADD COLUMN meeting_times JSONB NOT NULL DEFAULT '[]'::jsonb; +ALTER TABLE courses ADD COLUMN attributes JSONB NOT NULL DEFAULT '[]'::jsonb; + +-- ============================================================ +-- Full-text search support +-- ============================================================ + +-- Generated tsvector for word-based search on title +ALTER TABLE courses ADD COLUMN title_search tsvector + GENERATED ALWAYS AS (to_tsvector('simple', coalesce(title, ''))) STORED; + +CREATE INDEX idx_courses_title_search ON courses USING GIN (title_search); + +-- Trigram index for substring/ILIKE search on title +CREATE EXTENSION IF NOT EXISTS pg_trgm; +CREATE INDEX idx_courses_title_trgm ON courses USING GIN (title gin_trgm_ops); + +-- ============================================================ +-- New filter indexes +-- ============================================================ + +CREATE INDEX idx_courses_instructional_method ON courses(instructional_method); +CREATE INDEX idx_courses_campus ON courses(campus); + +-- Composite for "open CS courses in Fall 2024" pattern +CREATE INDEX idx_courses_term_subject_avail ON courses(term_code, subject, max_enrollment, enrollment); + +-- ============================================================ +-- Instructors table (normalized, deduplicated) +-- ============================================================ + +CREATE TABLE instructors ( + banner_id VARCHAR PRIMARY KEY, + display_name VARCHAR NOT NULL, + email VARCHAR +); + +CREATE TABLE course_instructors ( + course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE, + instructor_id VARCHAR NOT NULL REFERENCES instructors(banner_id) ON DELETE CASCADE, + is_primary BOOLEAN NOT NULL DEFAULT false, + PRIMARY KEY (course_id, instructor_id) +); + +CREATE INDEX idx_course_instructors_instructor ON course_instructors(instructor_id); + +-- ============================================================ +-- Reference data table (all code→description lookups) +-- ============================================================ + +CREATE TABLE reference_data ( + category VARCHAR NOT NULL, + code VARCHAR NOT NULL, + description VARCHAR NOT NULL, + PRIMARY KEY (category, code) +); diff --git a/src/app.rs b/src/app.rs index 69f93b2..6284e25 100644 --- a/src/app.rs +++ b/src/app.rs @@ -6,13 +6,13 @@ use crate::services::bot::BotService; use crate::services::manager::ServiceManager; use crate::services::web::WebService; use crate::state::AppState; +use anyhow::Context; use figment::value::UncasedStr; use figment::{Figment, providers::Env}; use sqlx::postgres::PgPoolOptions; use std::process::ExitCode; use std::sync::Arc; use std::time::Duration; -use anyhow::Context; use tracing::{error, info}; /// Main application struct containing all necessary components @@ -79,6 +79,11 @@ impl App { let banner_api_arc = Arc::new(banner_api); let app_state = AppState::new(banner_api_arc.clone(), db_pool.clone()); + // Load reference data cache from DB (may be empty on first run) + if let Err(e) = app_state.load_reference_cache().await { + info!(error = ?e, "Could not load reference cache on startup (may be empty)"); + } + Ok(App { config, db_pool, @@ -101,6 +106,7 @@ impl App { let scraper_service = Box::new(ScraperService::new( self.db_pool.clone(), self.banner_api.clone(), + self.app_state.reference_cache.clone(), self.app_state.service_statuses.clone(), )); self.service_manager diff --git a/src/banner/api.rs b/src/banner/api.rs index 173f19a..8e84909 100644 --- a/src/banner/api.rs +++ b/src/banner/api.rs @@ -228,6 +228,29 @@ impl BannerApi { .await } + /// Retrieves campus codes and descriptions. + pub async fn get_campuses(&self, term: &str) -> Result> { + self.get_list_endpoint("get_campus", "", term, 1, 500).await + } + + /// Retrieves instructional method codes and descriptions. + pub async fn get_instructional_methods(&self, term: &str) -> Result> { + self.get_list_endpoint("get_instructionalMethod", "", term, 1, 500) + .await + } + + /// Retrieves part-of-term codes and descriptions. + pub async fn get_parts_of_term(&self, term: &str) -> Result> { + self.get_list_endpoint("get_partOfTerm", "", term, 1, 500) + .await + } + + /// Retrieves section attribute codes and descriptions. + pub async fn get_attributes(&self, term: &str) -> Result> { + self.get_list_endpoint("get_attribute", "", term, 1, 500) + .await + } + /// Retrieves meeting time information for a course. pub async fn get_course_meeting_time( &self, diff --git a/src/banner/models/meetings.rs b/src/banner/models/meetings.rs index 7e65130..0b70a9e 100644 --- a/src/banner/models/meetings.rs +++ b/src/banner/models/meetings.rs @@ -1,4 +1,4 @@ -use bitflags::{bitflags, Flags}; +use bitflags::{Flags, bitflags}; use chrono::{DateTime, NaiveDate, NaiveTime, Timelike, Utc, Weekday}; use extension_traits::extension; use serde::{Deserialize, Deserializer, Serialize}; diff --git a/src/banner/session.rs b/src/banner/session.rs index 47fb774..7e32420 100644 --- a/src/banner/session.rs +++ b/src/banner/session.rs @@ -452,7 +452,11 @@ impl SessionPool { self.select_term(&term.to_string(), &unique_session_id, &cookie_header) .await?; - Ok(BannerSession::new(&unique_session_id, jsessionid, ssb_cookie)) + Ok(BannerSession::new( + &unique_session_id, + jsessionid, + ssb_cookie, + )) } /// Retrieves a list of terms from the Banner API. diff --git a/src/bot/commands/ics.rs b/src/bot/commands/ics.rs index ddf2b4f..6ac7bac 100644 --- a/src/bot/commands/ics.rs +++ b/src/bot/commands/ics.rs @@ -18,7 +18,9 @@ fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Opti /// Compute a consecutive range of dates starting from `start` for `count` days. fn date_range(start: NaiveDate, count: i64) -> Vec { - (0..count).filter_map(|i| start.checked_add_signed(Duration::days(i))).collect() + (0..count) + .filter_map(|i| start.checked_add_signed(Duration::days(i))) + .collect() } /// Compute university holidays for a given year. diff --git a/src/data/batch.rs b/src/data/batch.rs index c79ef97..e9c55f6 100644 --- a/src/data/batch.rs +++ b/src/data/batch.rs @@ -1,43 +1,73 @@ //! Batch database operations for improved performance. use crate::banner::Course; +use crate::data::models::DbMeetingTime; use crate::error::Result; use sqlx::PgPool; +use std::collections::HashSet; use std::time::Instant; use tracing::info; +/// Convert a Banner API course's meeting times to the DB JSONB shape. +fn to_db_meeting_times(course: &Course) -> serde_json::Value { + let meetings: Vec = course + .meetings_faculty + .iter() + .map(|mf| { + let mt = &mf.meeting_time; + DbMeetingTime { + begin_time: mt.begin_time.clone(), + end_time: mt.end_time.clone(), + start_date: mt.start_date.clone(), + end_date: mt.end_date.clone(), + monday: mt.monday, + tuesday: mt.tuesday, + wednesday: mt.wednesday, + thursday: mt.thursday, + friday: mt.friday, + saturday: mt.saturday, + sunday: mt.sunday, + building: mt.building.clone(), + building_description: mt.building_description.clone(), + room: mt.room.clone(), + campus: mt.campus.clone(), + meeting_type: mt.meeting_type.clone(), + meeting_schedule_type: mt.meeting_schedule_type.clone(), + } + }) + .collect(); + serde_json::to_value(meetings).unwrap_or_default() +} + +/// Convert a Banner API course's section attributes to a JSONB array of code strings. +fn to_db_attributes(course: &Course) -> serde_json::Value { + let codes: Vec<&str> = course + .section_attributes + .iter() + .map(|a| a.code.as_str()) + .collect(); + serde_json::to_value(codes).unwrap_or_default() +} + +/// Extract the campus code from the first meeting time (Banner doesn't put it on the course directly). +fn extract_campus_code(course: &Course) -> Option { + course + .meetings_faculty + .first() + .and_then(|mf| mf.meeting_time.campus.clone()) +} + /// Batch upsert courses in a single database query. /// -/// This function performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses -/// in a single round-trip to the database, significantly reducing overhead compared -/// to individual inserts. +/// Performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses, including +/// new fields (meeting times, attributes, instructor data). Returns the +/// database IDs for all upserted courses (in input order) so instructors +/// can be linked. /// /// # Performance -/// - Reduces N database round-trips to 1 +/// - Reduces N database round-trips to 3 (courses, instructors, junction) /// - Typical usage: 50-200 courses per batch -/// - PostgreSQL parameter limit: 65,535 (we use ~10 per course) -/// -/// # Arguments -/// * `courses` - Slice of Course structs from the Banner API -/// * `db_pool` - PostgreSQL connection pool -/// -/// # Returns -/// * `Ok(())` on success -/// * `Err(_)` if the database operation fails -/// -/// # Example -/// ```no_run -/// use banner::data::batch::batch_upsert_courses; -/// use banner::banner::Course; -/// use sqlx::PgPool; -/// -/// async fn example(courses: &[Course], pool: &PgPool) -> anyhow::Result<()> { -/// batch_upsert_courses(courses, pool).await?; -/// Ok(()) -/// } -/// ``` pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result<()> { - // Early return for empty batches if courses.is_empty() { info!("No courses to upsert, skipping batch operation"); return Ok(()); @@ -46,42 +76,108 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul let start = Instant::now(); let course_count = courses.len(); - // Extract course fields into vectors for UNNEST + // Step 1: Upsert courses with all fields, returning IDs + let course_ids = upsert_courses(courses, db_pool).await?; + + // Step 2: Upsert instructors (deduplicated across batch) + upsert_instructors(courses, db_pool).await?; + + // Step 3: Link courses to instructors via junction table + upsert_course_instructors(courses, &course_ids, db_pool).await?; + + let duration = start.elapsed(); + info!( + courses_count = course_count, + duration_ms = duration.as_millis(), + "Batch upserted courses with instructors" + ); + + Ok(()) +} + +/// Upsert all courses and return their database IDs in input order. +async fn upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result> { let crns: Vec<&str> = courses .iter() .map(|c| c.course_reference_number.as_str()) .collect(); - let subjects: Vec<&str> = courses.iter().map(|c| c.subject.as_str()).collect(); - let course_numbers: Vec<&str> = courses.iter().map(|c| c.course_number.as_str()).collect(); - let titles: Vec<&str> = courses.iter().map(|c| c.course_title.as_str()).collect(); - let term_codes: Vec<&str> = courses.iter().map(|c| c.term.as_str()).collect(); - let enrollments: Vec = courses.iter().map(|c| c.enrollment).collect(); - let max_enrollments: Vec = courses.iter().map(|c| c.maximum_enrollment).collect(); - let wait_counts: Vec = courses.iter().map(|c| c.wait_count).collect(); - let wait_capacities: Vec = courses.iter().map(|c| c.wait_capacity).collect(); - // Perform batch upsert using UNNEST for efficient bulk insertion - let result = sqlx::query( + // New scalar fields + let sequence_numbers: Vec> = courses + .iter() + .map(|c| Some(c.sequence_number.as_str())) + .collect(); + let parts_of_term: Vec> = courses + .iter() + .map(|c| Some(c.part_of_term.as_str())) + .collect(); + let instructional_methods: Vec> = courses + .iter() + .map(|c| Some(c.instructional_method.as_str())) + .collect(); + let campuses: Vec> = courses.iter().map(extract_campus_code).collect(); + let credit_hours: Vec> = courses.iter().map(|c| c.credit_hours).collect(); + let credit_hour_lows: Vec> = courses.iter().map(|c| c.credit_hour_low).collect(); + let credit_hour_highs: Vec> = courses.iter().map(|c| c.credit_hour_high).collect(); + let cross_lists: Vec> = courses.iter().map(|c| c.cross_list.as_deref()).collect(); + let cross_list_capacities: Vec> = + courses.iter().map(|c| c.cross_list_capacity).collect(); + let cross_list_counts: Vec> = courses.iter().map(|c| c.cross_list_count).collect(); + let link_identifiers: Vec> = courses + .iter() + .map(|c| c.link_identifier.as_deref()) + .collect(); + let is_section_linkeds: Vec> = + courses.iter().map(|c| Some(c.is_section_linked)).collect(); + + // JSONB fields + let meeting_times_json: Vec = + courses.iter().map(to_db_meeting_times).collect(); + let attributes_json: Vec = courses.iter().map(to_db_attributes).collect(); + + let rows = sqlx::query_scalar::<_, i32>( r#" INSERT INTO courses ( crn, subject, course_number, title, term_code, - enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at + enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at, + sequence_number, part_of_term, instructional_method, campus, + credit_hours, credit_hour_low, credit_hour_high, + cross_list, cross_list_capacity, cross_list_count, + link_identifier, is_section_linked, + meeting_times, attributes ) - SELECT * FROM UNNEST( + SELECT + v.crn, v.subject, v.course_number, v.title, v.term_code, + v.enrollment, v.max_enrollment, v.wait_count, v.wait_capacity, NOW(), + v.sequence_number, v.part_of_term, v.instructional_method, v.campus, + v.credit_hours, v.credit_hour_low, v.credit_hour_high, + v.cross_list, v.cross_list_capacity, v.cross_list_count, + v.link_identifier, v.is_section_linked, + v.meeting_times, v.attributes + FROM UNNEST( $1::text[], $2::text[], $3::text[], $4::text[], $5::text[], $6::int4[], $7::int4[], $8::int4[], $9::int4[], - array_fill(NOW()::timestamptz, ARRAY[$10]) - ) AS t( + $10::text[], $11::text[], $12::text[], $13::text[], + $14::int4[], $15::int4[], $16::int4[], + $17::text[], $18::int4[], $19::int4[], + $20::text[], $21::bool[], + $22::jsonb[], $23::jsonb[] + ) AS v( crn, subject, course_number, title, term_code, - enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at + enrollment, max_enrollment, wait_count, wait_capacity, + sequence_number, part_of_term, instructional_method, campus, + credit_hours, credit_hour_low, credit_hour_high, + cross_list, cross_list_capacity, cross_list_count, + link_identifier, is_section_linked, + meeting_times, attributes ) ON CONFLICT (crn, term_code) DO UPDATE SET @@ -92,7 +188,22 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul max_enrollment = EXCLUDED.max_enrollment, wait_count = EXCLUDED.wait_count, wait_capacity = EXCLUDED.wait_capacity, - last_scraped_at = EXCLUDED.last_scraped_at + last_scraped_at = EXCLUDED.last_scraped_at, + sequence_number = EXCLUDED.sequence_number, + part_of_term = EXCLUDED.part_of_term, + instructional_method = EXCLUDED.instructional_method, + campus = EXCLUDED.campus, + credit_hours = EXCLUDED.credit_hours, + credit_hour_low = EXCLUDED.credit_hour_low, + credit_hour_high = EXCLUDED.credit_hour_high, + cross_list = EXCLUDED.cross_list, + cross_list_capacity = EXCLUDED.cross_list_capacity, + cross_list_count = EXCLUDED.cross_list_count, + link_identifier = EXCLUDED.link_identifier, + is_section_linked = EXCLUDED.is_section_linked, + meeting_times = EXCLUDED.meeting_times, + attributes = EXCLUDED.attributes + RETURNING id "#, ) .bind(&crns) @@ -104,19 +215,111 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul .bind(&max_enrollments) .bind(&wait_counts) .bind(&wait_capacities) - .bind(course_count as i32) - .execute(db_pool) + .bind(&sequence_numbers) + .bind(&parts_of_term) + .bind(&instructional_methods) + .bind(&campuses) + .bind(&credit_hours) + .bind(&credit_hour_lows) + .bind(&credit_hour_highs) + .bind(&cross_lists) + .bind(&cross_list_capacities) + .bind(&cross_list_counts) + .bind(&link_identifiers) + .bind(&is_section_linkeds) + .bind(&meeting_times_json) + .bind(&attributes_json) + .fetch_all(db_pool) .await .map_err(|e| anyhow::anyhow!("Failed to batch upsert courses: {}", e))?; - let duration = start.elapsed(); + Ok(rows) +} - info!( - courses_count = course_count, - rows_affected = result.rows_affected(), - duration_ms = duration.as_millis(), - "Batch upserted courses" - ); +/// Deduplicate and upsert all instructors from the batch. +async fn upsert_instructors(courses: &[Course], db_pool: &PgPool) -> Result<()> { + let mut seen = HashSet::new(); + let mut banner_ids = Vec::new(); + let mut display_names = Vec::new(); + let mut emails: Vec> = Vec::new(); + + for course in courses { + for faculty in &course.faculty { + if seen.insert(faculty.banner_id.as_str()) { + banner_ids.push(faculty.banner_id.as_str()); + display_names.push(faculty.display_name.as_str()); + emails.push(faculty.email_address.as_deref()); + } + } + } + + if banner_ids.is_empty() { + return Ok(()); + } + + sqlx::query( + r#" + INSERT INTO instructors (banner_id, display_name, email) + SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[]) + ON CONFLICT (banner_id) + DO UPDATE SET + display_name = EXCLUDED.display_name, + email = COALESCE(EXCLUDED.email, instructors.email) + "#, + ) + .bind(&banner_ids) + .bind(&display_names) + .bind(&emails) + .execute(db_pool) + .await + .map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?; + + Ok(()) +} + +/// Link courses to their instructors via the junction table. +async fn upsert_course_instructors( + courses: &[Course], + course_ids: &[i32], + db_pool: &PgPool, +) -> Result<()> { + let mut cids = Vec::new(); + let mut iids = Vec::new(); + let mut primaries = Vec::new(); + + for (course, &course_id) in courses.iter().zip(course_ids) { + for faculty in &course.faculty { + cids.push(course_id); + iids.push(faculty.banner_id.as_str()); + primaries.push(faculty.primary_indicator); + } + } + + if cids.is_empty() { + return Ok(()); + } + + // Delete existing links for these courses then re-insert. + // This handles instructor changes cleanly. + sqlx::query("DELETE FROM course_instructors WHERE course_id = ANY($1)") + .bind(&cids) + .execute(db_pool) + .await?; + + sqlx::query( + r#" + INSERT INTO course_instructors (course_id, instructor_id, is_primary) + SELECT * FROM UNNEST($1::int4[], $2::text[], $3::bool[]) + ON CONFLICT (course_id, instructor_id) + DO UPDATE SET is_primary = EXCLUDED.is_primary + "#, + ) + .bind(&cids) + .bind(&iids) + .bind(&primaries) + .execute(db_pool) + .await + .map_err(|e| anyhow::anyhow!("Failed to batch upsert course_instructors: {}", e))?; Ok(()) } diff --git a/src/data/courses.rs b/src/data/courses.rs new file mode 100644 index 0000000..7b7b38e --- /dev/null +++ b/src/data/courses.rs @@ -0,0 +1,128 @@ +//! Database query functions for courses, used by the web API. + +use crate::data::models::Course; +use crate::error::Result; +use sqlx::PgPool; + +/// Search courses by term with optional filters. +/// +/// Returns `(courses, total_count)` for pagination. Uses FTS tsvector for word +/// search and falls back to trigram ILIKE for substring matching. +#[allow(clippy::too_many_arguments)] +pub async fn search_courses( + db_pool: &PgPool, + term_code: &str, + subject: Option<&str>, + title_query: Option<&str>, + course_number_low: Option, + course_number_high: Option, + open_only: bool, + instructional_method: Option<&str>, + campus: Option<&str>, + limit: i32, + offset: i32, +) -> Result<(Vec, i64)> { + // Build WHERE clauses dynamically via parameter binding + COALESCE trick: + // each optional filter uses ($N IS NULL OR column = $N) so NULL means "no filter". + let courses = sqlx::query_as::<_, Course>( + r#" + SELECT * + FROM courses + WHERE term_code = $1 + AND ($2::text IS NULL OR subject = $2) + AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%') + AND ($4::int IS NULL OR course_number::int >= $4) + AND ($5::int IS NULL OR course_number::int <= $5) + AND ($6::bool = false OR max_enrollment > enrollment) + AND ($7::text IS NULL OR instructional_method = $7) + AND ($8::text IS NULL OR campus = $8) + ORDER BY subject, course_number, sequence_number + LIMIT $9 OFFSET $10 + "#, + ) + .bind(term_code) + .bind(subject) + .bind(title_query) + .bind(course_number_low) + .bind(course_number_high) + .bind(open_only) + .bind(instructional_method) + .bind(campus) + .bind(limit) + .bind(offset) + .fetch_all(db_pool) + .await?; + + let total: (i64,) = sqlx::query_as( + r#" + SELECT COUNT(*) + FROM courses + WHERE term_code = $1 + AND ($2::text IS NULL OR subject = $2) + AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%') + AND ($4::int IS NULL OR course_number::int >= $4) + AND ($5::int IS NULL OR course_number::int <= $5) + AND ($6::bool = false OR max_enrollment > enrollment) + AND ($7::text IS NULL OR instructional_method = $7) + AND ($8::text IS NULL OR campus = $8) + "#, + ) + .bind(term_code) + .bind(subject) + .bind(title_query) + .bind(course_number_low) + .bind(course_number_high) + .bind(open_only) + .bind(instructional_method) + .bind(campus) + .fetch_one(db_pool) + .await?; + + Ok((courses, total.0)) +} + +/// Get a single course by CRN and term. +pub async fn get_course_by_crn( + db_pool: &PgPool, + crn: &str, + term_code: &str, +) -> Result> { + let course = + sqlx::query_as::<_, Course>("SELECT * FROM courses WHERE crn = $1 AND term_code = $2") + .bind(crn) + .bind(term_code) + .fetch_optional(db_pool) + .await?; + Ok(course) +} + +/// Get instructors for a course by course ID. +/// +/// Returns `(banner_id, display_name, email, is_primary)` tuples. +pub async fn get_course_instructors( + db_pool: &PgPool, + course_id: i32, +) -> Result, bool)>> { + let rows: Vec<(String, String, Option, bool)> = sqlx::query_as( + r#" + SELECT i.banner_id, i.display_name, i.email, ci.is_primary + FROM course_instructors ci + JOIN instructors i ON i.banner_id = ci.instructor_id + WHERE ci.course_id = $1 + ORDER BY ci.is_primary DESC, i.display_name + "#, + ) + .bind(course_id) + .fetch_all(db_pool) + .await?; + Ok(rows) +} + +/// Get all distinct term codes that have courses in the DB. +pub async fn get_available_terms(db_pool: &PgPool) -> Result> { + let rows: Vec<(String,)> = + sqlx::query_as("SELECT DISTINCT term_code FROM courses ORDER BY term_code DESC") + .fetch_all(db_pool) + .await?; + Ok(rows.into_iter().map(|(tc,)| tc).collect()) +} diff --git a/src/data/mod.rs b/src/data/mod.rs index 091b5e7..9f2afda 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -1,5 +1,7 @@ //! Database models and schema. pub mod batch; +pub mod courses; pub mod models; +pub mod reference; pub mod scrape_jobs; diff --git a/src/data/models.rs b/src/data/models.rs index 7843334..e4b096d 100644 --- a/src/data/models.rs +++ b/src/data/models.rs @@ -1,8 +1,31 @@ //! `sqlx` models for the database schema. use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; use serde_json::Value; +/// Represents a meeting time stored as JSONB in the courses table. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DbMeetingTime { + pub begin_time: Option, + pub end_time: Option, + pub start_date: String, + pub end_date: String, + pub monday: bool, + pub tuesday: bool, + pub wednesday: bool, + pub thursday: bool, + pub friday: bool, + pub saturday: bool, + pub sunday: bool, + pub building: Option, + pub building_description: Option, + pub room: Option, + pub campus: Option, + pub meeting_type: String, + pub meeting_schedule_type: String, +} + #[allow(dead_code)] #[derive(sqlx::FromRow, Debug, Clone)] pub struct Course { @@ -17,6 +40,46 @@ pub struct Course { pub wait_count: i32, pub wait_capacity: i32, pub last_scraped_at: DateTime, + // New scalar fields + pub sequence_number: Option, + pub part_of_term: Option, + pub instructional_method: Option, + pub campus: Option, + pub credit_hours: Option, + pub credit_hour_low: Option, + pub credit_hour_high: Option, + pub cross_list: Option, + pub cross_list_capacity: Option, + pub cross_list_count: Option, + pub link_identifier: Option, + pub is_section_linked: Option, + // JSONB fields + pub meeting_times: Value, + pub attributes: Value, +} + +#[allow(dead_code)] +#[derive(sqlx::FromRow, Debug, Clone)] +pub struct Instructor { + pub banner_id: String, + pub display_name: String, + pub email: Option, +} + +#[allow(dead_code)] +#[derive(sqlx::FromRow, Debug, Clone)] +pub struct CourseInstructor { + pub course_id: i32, + pub instructor_id: String, + pub is_primary: bool, +} + +#[allow(dead_code)] +#[derive(sqlx::FromRow, Debug, Clone)] +pub struct ReferenceData { + pub category: String, + pub code: String, + pub description: String, } #[allow(dead_code)] diff --git a/src/data/reference.rs b/src/data/reference.rs new file mode 100644 index 0000000..607fd99 --- /dev/null +++ b/src/data/reference.rs @@ -0,0 +1,53 @@ +//! Database operations for the `reference_data` table (code→description lookups). + +use crate::data::models::ReferenceData; +use crate::error::Result; +use sqlx::PgPool; + +/// Batch upsert reference data entries. +pub async fn batch_upsert(entries: &[ReferenceData], db_pool: &PgPool) -> Result<()> { + if entries.is_empty() { + return Ok(()); + } + + let categories: Vec<&str> = entries.iter().map(|e| e.category.as_str()).collect(); + let codes: Vec<&str> = entries.iter().map(|e| e.code.as_str()).collect(); + let descriptions: Vec<&str> = entries.iter().map(|e| e.description.as_str()).collect(); + + sqlx::query( + r#" + INSERT INTO reference_data (category, code, description) + SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[]) + ON CONFLICT (category, code) + DO UPDATE SET description = EXCLUDED.description + "#, + ) + .bind(&categories) + .bind(&codes) + .bind(&descriptions) + .execute(db_pool) + .await?; + + Ok(()) +} + +/// Get all reference data entries for a category. +pub async fn get_by_category(category: &str, db_pool: &PgPool) -> Result> { + let rows = sqlx::query_as::<_, ReferenceData>( + "SELECT category, code, description FROM reference_data WHERE category = $1 ORDER BY description", + ) + .bind(category) + .fetch_all(db_pool) + .await?; + Ok(rows) +} + +/// Get all reference data entries (for cache initialization). +pub async fn get_all(db_pool: &PgPool) -> Result> { + let rows = sqlx::query_as::<_, ReferenceData>( + "SELECT category, code, description FROM reference_data ORDER BY category, description", + ) + .fetch_all(db_pool) + .await?; + Ok(rows) +} diff --git a/src/scraper/mod.rs b/src/scraper/mod.rs index 5926e27..51c3fb0 100644 --- a/src/scraper/mod.rs +++ b/src/scraper/mod.rs @@ -4,10 +4,11 @@ pub mod worker; use crate::banner::BannerApi; use crate::services::Service; +use crate::state::ReferenceCache; use crate::status::{ServiceStatus, ServiceStatusRegistry}; use sqlx::PgPool; use std::sync::Arc; -use tokio::sync::broadcast; +use tokio::sync::{RwLock, broadcast}; use tokio::task::JoinHandle; use tracing::{info, warn}; @@ -21,6 +22,7 @@ use self::worker::Worker; pub struct ScraperService { db_pool: PgPool, banner_api: Arc, + reference_cache: Arc>, service_statuses: ServiceStatusRegistry, scheduler_handle: Option>, worker_handles: Vec>, @@ -29,10 +31,16 @@ pub struct ScraperService { impl ScraperService { /// Creates a new `ScraperService`. - pub fn new(db_pool: PgPool, banner_api: Arc, service_statuses: ServiceStatusRegistry) -> Self { + pub fn new( + db_pool: PgPool, + banner_api: Arc, + reference_cache: Arc>, + service_statuses: ServiceStatusRegistry, + ) -> Self { Self { db_pool, banner_api, + reference_cache, service_statuses, scheduler_handle: None, worker_handles: Vec::new(), @@ -48,7 +56,11 @@ impl ScraperService { let (shutdown_tx, _) = broadcast::channel(1); self.shutdown_tx = Some(shutdown_tx.clone()); - let scheduler = Scheduler::new(self.db_pool.clone(), self.banner_api.clone()); + let scheduler = Scheduler::new( + self.db_pool.clone(), + self.banner_api.clone(), + self.reference_cache.clone(), + ); let shutdown_rx = shutdown_tx.subscribe(); let scheduler_handle = tokio::spawn(async move { scheduler.run(shutdown_rx).await; @@ -86,7 +98,8 @@ impl Service for ScraperService { } async fn shutdown(&mut self) -> Result<(), anyhow::Error> { - self.service_statuses.set("scraper", ServiceStatus::Disabled); + self.service_statuses + .set("scraper", ServiceStatus::Disabled); info!("Shutting down scraper service"); // Send shutdown signal to all tasks diff --git a/src/scraper/scheduler.rs b/src/scraper/scheduler.rs index 3ea30dc..1649570 100644 --- a/src/scraper/scheduler.rs +++ b/src/scraper/scheduler.rs @@ -1,28 +1,38 @@ use crate::banner::{BannerApi, Term}; -use crate::data::models::{ScrapePriority, TargetType}; +use crate::data::models::{ReferenceData, ScrapePriority, TargetType}; use crate::data::scrape_jobs; use crate::error::Result; use crate::scraper::jobs::subject::SubjectJob; +use crate::state::ReferenceCache; use serde_json::json; use sqlx::PgPool; use std::sync::Arc; -use std::time::Duration; -use tokio::sync::broadcast; +use std::time::{Duration, Instant}; +use tokio::sync::{RwLock, broadcast}; use tokio::time; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, warn}; +/// How often reference data is re-scraped (6 hours). +const REFERENCE_DATA_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60); + /// Periodically analyzes data and enqueues prioritized scrape jobs. pub struct Scheduler { db_pool: PgPool, banner_api: Arc, + reference_cache: Arc>, } impl Scheduler { - pub fn new(db_pool: PgPool, banner_api: Arc) -> Self { + pub fn new( + db_pool: PgPool, + banner_api: Arc, + reference_cache: Arc>, + ) -> Self { Self { db_pool, banner_api, + reference_cache, } } @@ -41,26 +51,35 @@ impl Scheduler { let work_interval = Duration::from_secs(60); let mut next_run = time::Instant::now(); let mut current_work: Option<(tokio::task::JoinHandle<()>, CancellationToken)> = None; + // Scrape reference data immediately on first cycle + let mut last_ref_scrape = Instant::now() - REFERENCE_DATA_INTERVAL; loop { tokio::select! { _ = time::sleep_until(next_run) => { let cancel_token = CancellationToken::new(); + let should_scrape_ref = last_ref_scrape.elapsed() >= REFERENCE_DATA_INTERVAL; + // Spawn work in separate task to allow graceful cancellation during shutdown. - // Without this, shutdown would have to wait for the full scheduling cycle. let work_handle = tokio::spawn({ let db_pool = self.db_pool.clone(); let banner_api = self.banner_api.clone(); let cancel_token = cancel_token.clone(); + let reference_cache = self.reference_cache.clone(); async move { tokio::select! { - result = Self::schedule_jobs_impl(&db_pool, &banner_api) => { - if let Err(e) = result { + _ = async { + if should_scrape_ref + && let Err(e) = Self::scrape_reference_data(&db_pool, &banner_api, &reference_cache).await + { + error!(error = ?e, "Failed to scrape reference data"); + } + if let Err(e) = Self::schedule_jobs_impl(&db_pool, &banner_api).await { error!(error = ?e, "Failed to schedule jobs"); } - } + } => {} _ = cancel_token.cancelled() => { debug!("Scheduling work cancelled gracefully"); } @@ -68,6 +87,10 @@ impl Scheduler { } }); + if should_scrape_ref { + last_ref_scrape = Instant::now(); + } + current_work = Some((work_handle, cancel_token)); next_run = time::Instant::now() + work_interval; } @@ -170,4 +193,95 @@ impl Scheduler { debug!("Job scheduling complete"); Ok(()) } + + /// Scrape all reference data categories from Banner and upsert to DB, then refresh cache. + #[tracing::instrument(skip_all)] + async fn scrape_reference_data( + db_pool: &PgPool, + banner_api: &BannerApi, + reference_cache: &Arc>, + ) -> Result<()> { + let term = Term::get_current().inner().to_string(); + info!(term = %term, "Scraping reference data"); + + let mut all_entries = Vec::new(); + + // Subjects + match banner_api.get_subjects("", &term, 1, 500).await { + Ok(pairs) => { + debug!(count = pairs.len(), "Fetched subjects"); + all_entries.extend(pairs.into_iter().map(|p| ReferenceData { + category: "subject".to_string(), + code: p.code, + description: p.description, + })); + } + Err(e) => warn!(error = ?e, "Failed to fetch subjects"), + } + + // Campuses + match banner_api.get_campuses(&term).await { + Ok(pairs) => { + debug!(count = pairs.len(), "Fetched campuses"); + all_entries.extend(pairs.into_iter().map(|p| ReferenceData { + category: "campus".to_string(), + code: p.code, + description: p.description, + })); + } + Err(e) => warn!(error = ?e, "Failed to fetch campuses"), + } + + // Instructional methods + match banner_api.get_instructional_methods(&term).await { + Ok(pairs) => { + debug!(count = pairs.len(), "Fetched instructional methods"); + all_entries.extend(pairs.into_iter().map(|p| ReferenceData { + category: "instructional_method".to_string(), + code: p.code, + description: p.description, + })); + } + Err(e) => warn!(error = ?e, "Failed to fetch instructional methods"), + } + + // Parts of term + match banner_api.get_parts_of_term(&term).await { + Ok(pairs) => { + debug!(count = pairs.len(), "Fetched parts of term"); + all_entries.extend(pairs.into_iter().map(|p| ReferenceData { + category: "part_of_term".to_string(), + code: p.code, + description: p.description, + })); + } + Err(e) => warn!(error = ?e, "Failed to fetch parts of term"), + } + + // Attributes + match banner_api.get_attributes(&term).await { + Ok(pairs) => { + debug!(count = pairs.len(), "Fetched attributes"); + all_entries.extend(pairs.into_iter().map(|p| ReferenceData { + category: "attribute".to_string(), + code: p.code, + description: p.description, + })); + } + Err(e) => warn!(error = ?e, "Failed to fetch attributes"), + } + + // Batch upsert all entries + let total = all_entries.len(); + crate::data::reference::batch_upsert(&all_entries, db_pool).await?; + info!(total_entries = total, "Reference data upserted to DB"); + + // Refresh in-memory cache + let all = crate::data::reference::get_all(db_pool).await?; + let count = all.len(); + *reference_cache.write().await = ReferenceCache::from_entries(all); + info!(entries = count, "Reference cache refreshed"); + + Ok(()) + } } diff --git a/src/services/web.rs b/src/services/web.rs index e53cc8c..4ba10f1 100644 --- a/src/services/web.rs +++ b/src/services/web.rs @@ -23,10 +23,7 @@ impl WebService { } } /// Periodically pings the database and updates the "database" service status. - async fn db_health_check_loop( - state: AppState, - mut shutdown_rx: broadcast::Receiver<()>, - ) { + async fn db_health_check_loop(state: AppState, mut shutdown_rx: broadcast::Receiver<()>) { use std::time::Duration; let mut interval = tokio::time::interval(Duration::from_secs(30)); @@ -66,7 +63,9 @@ impl Service for WebService { let addr = SocketAddr::from(([0, 0, 0, 0], self.port)); let listener = TcpListener::bind(addr).await?; - self.app_state.service_statuses.set("web", ServiceStatus::Active); + self.app_state + .service_statuses + .set("web", ServiceStatus::Active); info!( service = "web", address = %addr, diff --git a/src/state.rs b/src/state.rs index a5be5e1..d715250 100644 --- a/src/state.rs +++ b/src/state.rs @@ -2,16 +2,71 @@ use crate::banner::BannerApi; use crate::banner::Course; +use crate::data::models::ReferenceData; use crate::status::ServiceStatusRegistry; use anyhow::Result; use sqlx::PgPool; +use std::collections::HashMap; use std::sync::Arc; +use tokio::sync::RwLock; + +/// In-memory cache for reference data (code→description lookups). +/// +/// Loaded from the `reference_data` table on startup and refreshed periodically. +pub struct ReferenceCache { + /// `(category, code)` → `description` + data: HashMap<(String, String), String>, +} + +impl Default for ReferenceCache { + fn default() -> Self { + Self::new() + } +} + +impl ReferenceCache { + /// Create an empty cache. + pub fn new() -> Self { + Self { + data: HashMap::new(), + } + } + + /// Build cache from a list of reference data entries. + pub fn from_entries(entries: Vec) -> Self { + let data = entries + .into_iter() + .map(|e| ((e.category, e.code), e.description)) + .collect(); + Self { data } + } + + /// Look up a description by category and code. + pub fn lookup(&self, category: &str, code: &str) -> Option<&str> { + self.data + .get(&(category.to_string(), code.to_string())) + .map(|s| s.as_str()) + } + + /// Get all `(code, description)` pairs for a category, sorted by description. + pub fn entries_for_category(&self, category: &str) -> Vec<(&str, &str)> { + let mut entries: Vec<(&str, &str)> = self + .data + .iter() + .filter(|((cat, _), _)| cat == category) + .map(|((_, code), desc)| (code.as_str(), desc.as_str())) + .collect(); + entries.sort_by(|a, b| a.1.cmp(b.1)); + entries + } +} #[derive(Clone)] pub struct AppState { pub banner_api: Arc, pub db_pool: PgPool, pub service_statuses: ServiceStatusRegistry, + pub reference_cache: Arc>, } impl AppState { @@ -20,9 +75,20 @@ impl AppState { banner_api, db_pool, service_statuses: ServiceStatusRegistry::new(), + reference_cache: Arc::new(RwLock::new(ReferenceCache::new())), } } + /// Initialize the reference cache from the database. + pub async fn load_reference_cache(&self) -> Result<()> { + let entries = crate::data::reference::get_all(&self.db_pool).await?; + let count = entries.len(); + let cache = ReferenceCache::from_entries(entries); + *self.reference_cache.write().await = cache; + tracing::info!(entries = count, "Reference cache loaded"); + Ok(()) + } + /// Get a course by CRN directly from Banner API pub async fn get_course_or_fetch(&self, term: &str, crn: &str) -> Result { self.banner_api diff --git a/src/web/routes.rs b/src/web/routes.rs index b58c067..d524258 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -3,7 +3,8 @@ use axum::{ Router, body::Body, - extract::{Request, State}, + extract::{Path, Query, Request, State}, + http::StatusCode as AxumStatusCode, response::{Json, Response}, routing::get, }; @@ -14,7 +15,7 @@ use axum::{ }; #[cfg(feature = "embed-assets")] use http::header; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::{collections::BTreeMap, time::Duration}; @@ -71,6 +72,11 @@ pub fn create_router(app_state: AppState) -> Router { .route("/health", get(health)) .route("/status", get(status)) .route("/metrics", get(metrics)) + .route("/courses/search", get(search_courses)) + .route("/courses/{term}/{crn}", get(get_course)) + .route("/terms", get(get_terms)) + .route("/subjects", get(get_subjects)) + .route("/reference/{category}", get(get_reference)) .with_state(app_state); let mut router = Router::new().nest("/api", api_router); @@ -249,7 +255,10 @@ async fn status(State(state): State) -> Json { ); } - let overall_status = if services.values().any(|s| matches!(s.status, ServiceStatus::Error)) { + let overall_status = if services + .values() + .any(|s| matches!(s.status, ServiceStatus::Error)) + { ServiceStatus::Error } else if !services.is_empty() && services @@ -281,3 +290,280 @@ async fn metrics() -> Json { "timestamp": chrono::Utc::now().to_rfc3339() })) } + +// ============================================================ +// Course search & detail API +// ============================================================ + +#[derive(Deserialize)] +struct SearchParams { + term: String, + subject: Option, + q: Option, + course_number_low: Option, + course_number_high: Option, + #[serde(default)] + open_only: bool, + instructional_method: Option, + campus: Option, + #[serde(default = "default_limit")] + limit: i32, + #[serde(default)] + offset: i32, +} + +fn default_limit() -> i32 { + 25 +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct CourseResponse { + crn: String, + subject: String, + course_number: String, + title: String, + term_code: String, + sequence_number: Option, + instructional_method: Option, + campus: Option, + enrollment: i32, + max_enrollment: i32, + wait_count: i32, + wait_capacity: i32, + credit_hours: Option, + credit_hour_low: Option, + credit_hour_high: Option, + cross_list: Option, + cross_list_capacity: Option, + cross_list_count: Option, + link_identifier: Option, + is_section_linked: Option, + part_of_term: Option, + meeting_times: Value, + attributes: Value, + instructors: Vec, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct InstructorResponse { + banner_id: String, + display_name: String, + email: Option, + is_primary: bool, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct SearchResponse { + courses: Vec, + total_count: i64, + offset: i32, + limit: i32, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct CodeDescription { + code: String, + description: String, +} + +/// Build a `CourseResponse` from a DB course, fetching its instructors. +async fn build_course_response( + course: &crate::data::models::Course, + db_pool: &sqlx::PgPool, +) -> CourseResponse { + let instructors = crate::data::courses::get_course_instructors(db_pool, course.id) + .await + .unwrap_or_default() + .into_iter() + .map( + |(banner_id, display_name, email, is_primary)| InstructorResponse { + banner_id, + display_name, + email, + is_primary, + }, + ) + .collect(); + + CourseResponse { + crn: course.crn.clone(), + subject: course.subject.clone(), + course_number: course.course_number.clone(), + title: course.title.clone(), + term_code: course.term_code.clone(), + sequence_number: course.sequence_number.clone(), + instructional_method: course.instructional_method.clone(), + campus: course.campus.clone(), + enrollment: course.enrollment, + max_enrollment: course.max_enrollment, + wait_count: course.wait_count, + wait_capacity: course.wait_capacity, + credit_hours: course.credit_hours, + credit_hour_low: course.credit_hour_low, + credit_hour_high: course.credit_hour_high, + cross_list: course.cross_list.clone(), + cross_list_capacity: course.cross_list_capacity, + cross_list_count: course.cross_list_count, + link_identifier: course.link_identifier.clone(), + is_section_linked: course.is_section_linked, + part_of_term: course.part_of_term.clone(), + meeting_times: course.meeting_times.clone(), + attributes: course.attributes.clone(), + instructors, + } +} + +/// `GET /api/courses/search` +async fn search_courses( + State(state): State, + Query(params): Query, +) -> Result, (AxumStatusCode, String)> { + let limit = params.limit.clamp(1, 100); + let offset = params.offset.max(0); + + let (courses, total_count) = crate::data::courses::search_courses( + &state.db_pool, + ¶ms.term, + params.subject.as_deref(), + params.q.as_deref(), + params.course_number_low, + params.course_number_high, + params.open_only, + params.instructional_method.as_deref(), + params.campus.as_deref(), + limit, + offset, + ) + .await + .map_err(|e| { + tracing::error!(error = %e, "Course search failed"); + ( + AxumStatusCode::INTERNAL_SERVER_ERROR, + "Search failed".to_string(), + ) + })?; + + let mut course_responses = Vec::with_capacity(courses.len()); + for course in &courses { + course_responses.push(build_course_response(course, &state.db_pool).await); + } + + Ok(Json(SearchResponse { + courses: course_responses, + total_count, + offset, + limit, + })) +} + +/// `GET /api/courses/:term/:crn` +async fn get_course( + State(state): State, + Path((term, crn)): Path<(String, String)>, +) -> Result, (AxumStatusCode, String)> { + let course = crate::data::courses::get_course_by_crn(&state.db_pool, &crn, &term) + .await + .map_err(|e| { + tracing::error!(error = %e, "Course lookup failed"); + ( + AxumStatusCode::INTERNAL_SERVER_ERROR, + "Lookup failed".to_string(), + ) + })? + .ok_or_else(|| (AxumStatusCode::NOT_FOUND, "Course not found".to_string()))?; + + Ok(Json(build_course_response(&course, &state.db_pool).await)) +} + +/// `GET /api/terms` +async fn get_terms( + State(state): State, +) -> Result>, (AxumStatusCode, String)> { + let cache = state.reference_cache.read().await; + let term_codes = crate::data::courses::get_available_terms(&state.db_pool) + .await + .map_err(|e| { + tracing::error!(error = %e, "Failed to get terms"); + ( + AxumStatusCode::INTERNAL_SERVER_ERROR, + "Failed to get terms".to_string(), + ) + })?; + + let terms: Vec = term_codes + .into_iter() + .map(|code| { + let description = cache + .lookup("term", &code) + .unwrap_or("Unknown Term") + .to_string(); + CodeDescription { code, description } + }) + .collect(); + + Ok(Json(terms)) +} + +/// `GET /api/subjects?term=202420` +async fn get_subjects( + State(state): State, +) -> Result>, (AxumStatusCode, String)> { + let cache = state.reference_cache.read().await; + let entries = cache.entries_for_category("subject"); + + let subjects: Vec = entries + .into_iter() + .map(|(code, description)| CodeDescription { + code: code.to_string(), + description: description.to_string(), + }) + .collect(); + + Ok(Json(subjects)) +} + +/// `GET /api/reference/:category` +async fn get_reference( + State(state): State, + Path(category): Path, +) -> Result>, (AxumStatusCode, String)> { + let cache = state.reference_cache.read().await; + let entries = cache.entries_for_category(&category); + + if entries.is_empty() { + // Fall back to DB query in case cache doesn't have this category + drop(cache); + let rows = crate::data::reference::get_by_category(&category, &state.db_pool) + .await + .map_err(|e| { + tracing::error!(error = %e, category = %category, "Reference lookup failed"); + ( + AxumStatusCode::INTERNAL_SERVER_ERROR, + "Lookup failed".to_string(), + ) + })?; + + return Ok(Json( + rows.into_iter() + .map(|r| CodeDescription { + code: r.code, + description: r.description, + }) + .collect(), + )); + } + + Ok(Json( + entries + .into_iter() + .map(|(code, desc)| CodeDescription { + code: code.to_string(), + description: desc.to_string(), + }) + .collect(), + )) +}