From 6df4303bd6650c583a9b455cca2dfa08af6aea04 Mon Sep 17 00:00:00 2001
From: Xevion <xevion@xevion.dev>
Date: Wed, 28 Jan 2026 21:06:29 -0600
Subject: [PATCH] feat: implement comprehensive course data model with
 reference cache and search

---
 .../20260128000000_expand_course_schema.sql   |  83 +++++
 src/app.rs                                    |   8 +-
 src/banner/api.rs                             |  23 ++
 src/banner/models/meetings.rs                 |   2 +-
 src/banner/session.rs                         |   6 +-
 src/bot/commands/ics.rs                       |   4 +-
 src/data/batch.rs                             | 307 +++++++++++++++---
 src/data/courses.rs                           | 128 ++++++++
 src/data/mod.rs                               |   2 +
 src/data/models.rs                            |  63 ++++
 src/data/reference.rs                         |  53 +++
 src/scraper/mod.rs                            |  21 +-
 src/scraper/scheduler.rs                      | 130 +++++++-
 src/services/web.rs                           |   9 +-
 src/state.rs                                  |  66 ++++
 src/web/routes.rs                             | 292 ++++++++++++++++-
 16 files changed, 1121 insertions(+), 76 deletions(-)
 create mode 100644 migrations/20260128000000_expand_course_schema.sql
 create mode 100644 src/data/courses.rs
 create mode 100644 src/data/reference.rs

diff --git a/migrations/20260128000000_expand_course_schema.sql b/migrations/20260128000000_expand_course_schema.sql
new file mode 100644
index 0000000..811fa15
--- /dev/null
+++ b/migrations/20260128000000_expand_course_schema.sql
@@ -0,0 +1,83 @@
+-- ============================================================
+-- Expand courses table with rich Banner API fields
+-- ============================================================
+
+-- Section identifiers
+ALTER TABLE courses ADD COLUMN sequence_number VARCHAR;
+ALTER TABLE courses ADD COLUMN part_of_term VARCHAR;
+
+-- Schedule & delivery (store codes, descriptions come from reference_data)
+ALTER TABLE courses ADD COLUMN instructional_method VARCHAR;
+ALTER TABLE courses ADD COLUMN campus VARCHAR;
+
+-- Credit hours
+ALTER TABLE courses ADD COLUMN credit_hours INTEGER;
+ALTER TABLE courses ADD COLUMN credit_hour_low INTEGER;
+ALTER TABLE courses ADD COLUMN credit_hour_high INTEGER;
+
+-- Cross-listing
+ALTER TABLE courses ADD COLUMN cross_list VARCHAR;
+ALTER TABLE courses ADD COLUMN cross_list_capacity INTEGER;
+ALTER TABLE courses ADD COLUMN cross_list_count INTEGER;
+
+-- Section linking
+ALTER TABLE courses ADD COLUMN link_identifier VARCHAR;
+ALTER TABLE courses ADD COLUMN is_section_linked BOOLEAN;
+
+-- JSONB columns for 1-to-many data
+ALTER TABLE courses ADD COLUMN meeting_times JSONB NOT NULL DEFAULT '[]'::jsonb;
+ALTER TABLE courses ADD COLUMN attributes JSONB NOT NULL DEFAULT '[]'::jsonb;
+
+-- ============================================================
+-- Full-text search support
+-- ============================================================
+
+-- Generated tsvector for word-based search on title
+ALTER TABLE courses ADD COLUMN title_search tsvector
+    GENERATED ALWAYS AS (to_tsvector('simple', coalesce(title, ''))) STORED;
+
+CREATE INDEX idx_courses_title_search ON courses USING GIN (title_search);
+
+-- Trigram index for substring/ILIKE search on title
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+CREATE INDEX idx_courses_title_trgm ON courses USING GIN (title gin_trgm_ops);
+
+-- ============================================================
+-- New filter indexes
+-- ============================================================
+
+CREATE INDEX idx_courses_instructional_method ON courses(instructional_method);
+CREATE INDEX idx_courses_campus ON courses(campus);
+
+-- Composite for "open CS courses in Fall 2024" pattern
+CREATE INDEX idx_courses_term_subject_avail ON courses(term_code, subject, max_enrollment, enrollment);
+
+-- ============================================================
+-- Instructors table (normalized, deduplicated)
+-- ============================================================
+
+CREATE TABLE instructors (
+    banner_id VARCHAR PRIMARY KEY,
+    display_name VARCHAR NOT NULL,
+    email VARCHAR
+);
+
+CREATE TABLE course_instructors (
+    course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
+    instructor_id VARCHAR NOT NULL REFERENCES instructors(banner_id) ON DELETE CASCADE,
+    is_primary BOOLEAN NOT NULL DEFAULT false,
+    PRIMARY KEY (course_id, instructor_id)
+);
+
+CREATE INDEX idx_course_instructors_instructor ON course_instructors(instructor_id);
+
+-- ============================================================
+-- Reference data table (all code→description lookups)
+-- ============================================================
+
+CREATE TABLE reference_data (
+    category VARCHAR NOT NULL,
+    code VARCHAR NOT NULL,
+    description VARCHAR NOT NULL,
+    PRIMARY KEY (category, code)
+);
diff --git a/src/app.rs b/src/app.rs
index 69f93b2..6284e25 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -6,13 +6,13 @@ use crate::services::bot::BotService;
 use crate::services::manager::ServiceManager;
 use crate::services::web::WebService;
 use crate::state::AppState;
+use anyhow::Context;
 use figment::value::UncasedStr;
 use figment::{Figment, providers::Env};
 use sqlx::postgres::PgPoolOptions;
 use std::process::ExitCode;
 use std::sync::Arc;
 use std::time::Duration;
-use anyhow::Context;
 use tracing::{error, info};
 
 /// Main application struct containing all necessary components
@@ -79,6 +79,11 @@ impl App {
         let banner_api_arc = Arc::new(banner_api);
         let app_state = AppState::new(banner_api_arc.clone(), db_pool.clone());
 
+        // Load reference data cache from DB (may be empty on first run)
+        if let Err(e) = app_state.load_reference_cache().await {
+            info!(error = ?e, "Could not load reference cache on startup (may be empty)");
+        }
+
         Ok(App {
             config,
             db_pool,
@@ -101,6 +106,7 @@ impl App {
             let scraper_service = Box::new(ScraperService::new(
                 self.db_pool.clone(),
                 self.banner_api.clone(),
+                self.app_state.reference_cache.clone(),
                 self.app_state.service_statuses.clone(),
             ));
             self.service_manager
diff --git a/src/banner/api.rs b/src/banner/api.rs
index 173f19a..8e84909 100644
--- a/src/banner/api.rs
+++ b/src/banner/api.rs
@@ -228,6 +228,29 @@ impl BannerApi {
             .await
     }
 
+    /// Retrieves campus codes and descriptions.
+    pub async fn get_campuses(&self, term: &str) -> Result<Vec<Pair>> {
+        self.get_list_endpoint("get_campus", "", term, 1, 500).await
+    }
+
+    /// Retrieves instructional method codes and descriptions.
+    pub async fn get_instructional_methods(&self, term: &str) -> Result<Vec<Pair>> {
+        self.get_list_endpoint("get_instructionalMethod", "", term, 1, 500)
+            .await
+    }
+
+    /// Retrieves part-of-term codes and descriptions.
+    pub async fn get_parts_of_term(&self, term: &str) -> Result<Vec<Pair>> {
+        self.get_list_endpoint("get_partOfTerm", "", term, 1, 500)
+            .await
+    }
+
+    /// Retrieves section attribute codes and descriptions.
+    pub async fn get_attributes(&self, term: &str) -> Result<Vec<Pair>> {
+        self.get_list_endpoint("get_attribute", "", term, 1, 500)
+            .await
+    }
+
     /// Retrieves meeting time information for a course.
     pub async fn get_course_meeting_time(
         &self,
diff --git a/src/banner/models/meetings.rs b/src/banner/models/meetings.rs
index 7e65130..0b70a9e 100644
--- a/src/banner/models/meetings.rs
+++ b/src/banner/models/meetings.rs
@@ -1,4 +1,4 @@
-use bitflags::{bitflags, Flags};
+use bitflags::{Flags, bitflags};
 use chrono::{DateTime, NaiveDate, NaiveTime, Timelike, Utc, Weekday};
 use extension_traits::extension;
 use serde::{Deserialize, Deserializer, Serialize};
diff --git a/src/banner/session.rs b/src/banner/session.rs
index 47fb774..7e32420 100644
--- a/src/banner/session.rs
+++ b/src/banner/session.rs
@@ -452,7 +452,11 @@ impl SessionPool {
         self.select_term(&term.to_string(), &unique_session_id, &cookie_header)
             .await?;
 
-        Ok(BannerSession::new(&unique_session_id, jsessionid, ssb_cookie))
+        Ok(BannerSession::new(
+            &unique_session_id,
+            jsessionid,
+            ssb_cookie,
+        ))
     }
 
     /// Retrieves a list of terms from the Banner API.
diff --git a/src/bot/commands/ics.rs b/src/bot/commands/ics.rs
index ddf2b4f..6ac7bac 100644
--- a/src/bot/commands/ics.rs
+++ b/src/bot/commands/ics.rs
@@ -18,7 +18,9 @@ fn nth_weekday_of_month(year: i32, month: u32, weekday: Weekday, n: u32) -> Opti
 
 /// Compute a consecutive range of dates starting from `start` for `count` days.
 fn date_range(start: NaiveDate, count: i64) -> Vec<NaiveDate> {
-    (0..count).filter_map(|i| start.checked_add_signed(Duration::days(i))).collect()
+    (0..count)
+        .filter_map(|i| start.checked_add_signed(Duration::days(i)))
+        .collect()
 }
 
 /// Compute university holidays for a given year.
diff --git a/src/data/batch.rs b/src/data/batch.rs
index c79ef97..e9c55f6 100644
--- a/src/data/batch.rs
+++ b/src/data/batch.rs
@@ -1,43 +1,73 @@
 //! Batch database operations for improved performance.
 
 use crate::banner::Course;
+use crate::data::models::DbMeetingTime;
 use crate::error::Result;
 use sqlx::PgPool;
+use std::collections::HashSet;
 use std::time::Instant;
 use tracing::info;
 
+/// Convert a Banner API course's meeting times to the DB JSONB shape.
+fn to_db_meeting_times(course: &Course) -> serde_json::Value {
+    let meetings: Vec<DbMeetingTime> = course
+        .meetings_faculty
+        .iter()
+        .map(|mf| {
+            let mt = &mf.meeting_time;
+            DbMeetingTime {
+                begin_time: mt.begin_time.clone(),
+                end_time: mt.end_time.clone(),
+                start_date: mt.start_date.clone(),
+                end_date: mt.end_date.clone(),
+                monday: mt.monday,
+                tuesday: mt.tuesday,
+                wednesday: mt.wednesday,
+                thursday: mt.thursday,
+                friday: mt.friday,
+                saturday: mt.saturday,
+                sunday: mt.sunday,
+                building: mt.building.clone(),
+                building_description: mt.building_description.clone(),
+                room: mt.room.clone(),
+                campus: mt.campus.clone(),
+                meeting_type: mt.meeting_type.clone(),
+                meeting_schedule_type: mt.meeting_schedule_type.clone(),
+            }
+        })
+        .collect();
+    serde_json::to_value(meetings).unwrap_or_default()
+}
+
+/// Convert a Banner API course's section attributes to a JSONB array of code strings.
+fn to_db_attributes(course: &Course) -> serde_json::Value {
+    let codes: Vec<&str> = course
+        .section_attributes
+        .iter()
+        .map(|a| a.code.as_str())
+        .collect();
+    serde_json::to_value(codes).unwrap_or_default()
+}
+
+/// Extract the campus code from the first meeting time (Banner doesn't put it on the course directly).
+fn extract_campus_code(course: &Course) -> Option<String> {
+    course
+        .meetings_faculty
+        .first()
+        .and_then(|mf| mf.meeting_time.campus.clone())
+}
+
 /// Batch upsert courses in a single database query.
 ///
-/// This function performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses
-/// in a single round-trip to the database, significantly reducing overhead compared
-/// to individual inserts.
+/// Performs a bulk INSERT...ON CONFLICT DO UPDATE for all courses, including
+/// new fields (meeting times, attributes, instructor data). Returns the
+/// database IDs for all upserted courses (in input order) so instructors
+/// can be linked.
 ///
 /// # Performance
-/// - Reduces N database round-trips to 1
+/// - Reduces N database round-trips to 3 (courses, instructors, junction)
 /// - Typical usage: 50-200 courses per batch
-/// - PostgreSQL parameter limit: 65,535 (we use ~10 per course)
-///
-/// # Arguments
-/// * `courses` - Slice of Course structs from the Banner API
-/// * `db_pool` - PostgreSQL connection pool
-///
-/// # Returns
-/// * `Ok(())` on success
-/// * `Err(_)` if the database operation fails
-///
-/// # Example
-/// ```no_run
-/// use banner::data::batch::batch_upsert_courses;
-/// use banner::banner::Course;
-/// use sqlx::PgPool;
-///
-/// async fn example(courses: &[Course], pool: &PgPool) -> anyhow::Result<()> {
-///     batch_upsert_courses(courses, pool).await?;
-///     Ok(())
-/// }
-/// ```
 pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result<()> {
-    // Early return for empty batches
     if courses.is_empty() {
         info!("No courses to upsert, skipping batch operation");
         return Ok(());
@@ -46,42 +76,108 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul
     let start = Instant::now();
     let course_count = courses.len();
 
-    // Extract course fields into vectors for UNNEST
+    // Step 1: Upsert courses with all fields, returning IDs
+    let course_ids = upsert_courses(courses, db_pool).await?;
+
+    // Step 2: Upsert instructors (deduplicated across batch)
+    upsert_instructors(courses, db_pool).await?;
+
+    // Step 3: Link courses to instructors via junction table
+    upsert_course_instructors(courses, &course_ids, db_pool).await?;
+
+    let duration = start.elapsed();
+    info!(
+        courses_count = course_count,
+        duration_ms = duration.as_millis(),
+        "Batch upserted courses with instructors"
+    );
+
+    Ok(())
+}
+
+/// Upsert all courses and return their database IDs in input order.
+async fn upsert_courses(courses: &[Course], db_pool: &PgPool) -> Result<Vec<i32>> {
     let crns: Vec<&str> = courses
         .iter()
         .map(|c| c.course_reference_number.as_str())
         .collect();
-
     let subjects: Vec<&str> = courses.iter().map(|c| c.subject.as_str()).collect();
-
     let course_numbers: Vec<&str> = courses.iter().map(|c| c.course_number.as_str()).collect();
-
     let titles: Vec<&str> = courses.iter().map(|c| c.course_title.as_str()).collect();
-
     let term_codes: Vec<&str> = courses.iter().map(|c| c.term.as_str()).collect();
-
     let enrollments: Vec<i32> = courses.iter().map(|c| c.enrollment).collect();
-
     let max_enrollments: Vec<i32> = courses.iter().map(|c| c.maximum_enrollment).collect();
-
     let wait_counts: Vec<i32> = courses.iter().map(|c| c.wait_count).collect();
-
     let wait_capacities: Vec<i32> = courses.iter().map(|c| c.wait_capacity).collect();
 
-    // Perform batch upsert using UNNEST for efficient bulk insertion
-    let result = sqlx::query(
+    // New scalar fields
+    let sequence_numbers: Vec<Option<&str>> = courses
+        .iter()
+        .map(|c| Some(c.sequence_number.as_str()))
+        .collect();
+    let parts_of_term: Vec<Option<&str>> = courses
+        .iter()
+        .map(|c| Some(c.part_of_term.as_str()))
+        .collect();
+    let instructional_methods: Vec<Option<&str>> = courses
+        .iter()
+        .map(|c| Some(c.instructional_method.as_str()))
+        .collect();
+    let campuses: Vec<Option<String>> = courses.iter().map(extract_campus_code).collect();
+    let credit_hours: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hours).collect();
+    let credit_hour_lows: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_low).collect();
+    let credit_hour_highs: Vec<Option<i32>> = courses.iter().map(|c| c.credit_hour_high).collect();
+    let cross_lists: Vec<Option<&str>> = courses.iter().map(|c| c.cross_list.as_deref()).collect();
+    let cross_list_capacities: Vec<Option<i32>> =
+        courses.iter().map(|c| c.cross_list_capacity).collect();
+    let cross_list_counts: Vec<Option<i32>> = courses.iter().map(|c| c.cross_list_count).collect();
+    let link_identifiers: Vec<Option<&str>> = courses
+        .iter()
+        .map(|c| c.link_identifier.as_deref())
+        .collect();
+    let is_section_linkeds: Vec<Option<bool>> =
+        courses.iter().map(|c| Some(c.is_section_linked)).collect();
+
+    // JSONB fields
+    let meeting_times_json: Vec<serde_json::Value> =
+        courses.iter().map(to_db_meeting_times).collect();
+    let attributes_json: Vec<serde_json::Value> = courses.iter().map(to_db_attributes).collect();
+
+    let rows = sqlx::query_scalar::<_, i32>(
         r#"
         INSERT INTO courses (
             crn, subject, course_number, title, term_code,
-            enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at
+            enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at,
+            sequence_number, part_of_term, instructional_method, campus,
+            credit_hours, credit_hour_low, credit_hour_high,
+            cross_list, cross_list_capacity, cross_list_count,
+            link_identifier, is_section_linked,
+            meeting_times, attributes
         )
-        SELECT * FROM UNNEST(
+        SELECT
+            v.crn, v.subject, v.course_number, v.title, v.term_code,
+            v.enrollment, v.max_enrollment, v.wait_count, v.wait_capacity, NOW(),
+            v.sequence_number, v.part_of_term, v.instructional_method, v.campus,
+            v.credit_hours, v.credit_hour_low, v.credit_hour_high,
+            v.cross_list, v.cross_list_capacity, v.cross_list_count,
+            v.link_identifier, v.is_section_linked,
+            v.meeting_times, v.attributes
+        FROM UNNEST(
             $1::text[], $2::text[], $3::text[], $4::text[], $5::text[],
             $6::int4[], $7::int4[], $8::int4[], $9::int4[],
-            array_fill(NOW()::timestamptz, ARRAY[$10])
-        ) AS t(
+            $10::text[], $11::text[], $12::text[], $13::text[],
+            $14::int4[], $15::int4[], $16::int4[],
+            $17::text[], $18::int4[], $19::int4[],
+            $20::text[], $21::bool[],
+            $22::jsonb[], $23::jsonb[]
+        ) AS v(
             crn, subject, course_number, title, term_code,
-            enrollment, max_enrollment, wait_count, wait_capacity, last_scraped_at
+            enrollment, max_enrollment, wait_count, wait_capacity,
+            sequence_number, part_of_term, instructional_method, campus,
+            credit_hours, credit_hour_low, credit_hour_high,
+            cross_list, cross_list_capacity, cross_list_count,
+            link_identifier, is_section_linked,
+            meeting_times, attributes
         )
         ON CONFLICT (crn, term_code)
         DO UPDATE SET
@@ -92,7 +188,22 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul
             max_enrollment = EXCLUDED.max_enrollment,
             wait_count = EXCLUDED.wait_count,
             wait_capacity = EXCLUDED.wait_capacity,
-            last_scraped_at = EXCLUDED.last_scraped_at
+            last_scraped_at = EXCLUDED.last_scraped_at,
+            sequence_number = EXCLUDED.sequence_number,
+            part_of_term = EXCLUDED.part_of_term,
+            instructional_method = EXCLUDED.instructional_method,
+            campus = EXCLUDED.campus,
+            credit_hours = EXCLUDED.credit_hours,
+            credit_hour_low = EXCLUDED.credit_hour_low,
+            credit_hour_high = EXCLUDED.credit_hour_high,
+            cross_list = EXCLUDED.cross_list,
+            cross_list_capacity = EXCLUDED.cross_list_capacity,
+            cross_list_count = EXCLUDED.cross_list_count,
+            link_identifier = EXCLUDED.link_identifier,
+            is_section_linked = EXCLUDED.is_section_linked,
+            meeting_times = EXCLUDED.meeting_times,
+            attributes = EXCLUDED.attributes
+        RETURNING id
         "#,
     )
     .bind(&crns)
@@ -104,19 +215,111 @@ pub async fn batch_upsert_courses(courses: &[Course], db_pool: &PgPool) -> Resul
     .bind(&max_enrollments)
     .bind(&wait_counts)
     .bind(&wait_capacities)
-    .bind(course_count as i32)
-    .execute(db_pool)
+    .bind(&sequence_numbers)
+    .bind(&parts_of_term)
+    .bind(&instructional_methods)
+    .bind(&campuses)
+    .bind(&credit_hours)
+    .bind(&credit_hour_lows)
+    .bind(&credit_hour_highs)
+    .bind(&cross_lists)
+    .bind(&cross_list_capacities)
+    .bind(&cross_list_counts)
+    .bind(&link_identifiers)
+    .bind(&is_section_linkeds)
+    .bind(&meeting_times_json)
+    .bind(&attributes_json)
+    .fetch_all(db_pool)
     .await
     .map_err(|e| anyhow::anyhow!("Failed to batch upsert courses: {}", e))?;
 
-    let duration = start.elapsed();
+    Ok(rows)
+}
 
-    info!(
-        courses_count = course_count,
-        rows_affected = result.rows_affected(),
-        duration_ms = duration.as_millis(),
-        "Batch upserted courses"
-    );
+/// Deduplicate and upsert all instructors from the batch.
+async fn upsert_instructors(courses: &[Course], db_pool: &PgPool) -> Result<()> {
+    let mut seen = HashSet::new();
+    let mut banner_ids = Vec::new();
+    let mut display_names = Vec::new();
+    let mut emails: Vec<Option<&str>> = Vec::new();
+
+    for course in courses {
+        for faculty in &course.faculty {
+            if seen.insert(faculty.banner_id.as_str()) {
+                banner_ids.push(faculty.banner_id.as_str());
+                display_names.push(faculty.display_name.as_str());
+                emails.push(faculty.email_address.as_deref());
+            }
+        }
+    }
+
+    if banner_ids.is_empty() {
+        return Ok(());
+    }
+
+    sqlx::query(
+        r#"
+        INSERT INTO instructors (banner_id, display_name, email)
+        SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[])
+        ON CONFLICT (banner_id)
+        DO UPDATE SET
+            display_name = EXCLUDED.display_name,
+            email = COALESCE(EXCLUDED.email, instructors.email)
+        "#,
+    )
+    .bind(&banner_ids)
+    .bind(&display_names)
+    .bind(&emails)
+    .execute(db_pool)
+    .await
+    .map_err(|e| anyhow::anyhow!("Failed to batch upsert instructors: {}", e))?;
+
+    Ok(())
+}
+
+/// Link courses to their instructors via the junction table.
+async fn upsert_course_instructors(
+    courses: &[Course],
+    course_ids: &[i32],
+    db_pool: &PgPool,
+) -> Result<()> {
+    let mut cids = Vec::new();
+    let mut iids = Vec::new();
+    let mut primaries = Vec::new();
+
+    for (course, &course_id) in courses.iter().zip(course_ids) {
+        for faculty in &course.faculty {
+            cids.push(course_id);
+            iids.push(faculty.banner_id.as_str());
+            primaries.push(faculty.primary_indicator);
+        }
+    }
+
+    if cids.is_empty() {
+        return Ok(());
+    }
+
+    // Delete existing links for these courses then re-insert.
+    // This handles instructor changes cleanly.
+    sqlx::query("DELETE FROM course_instructors WHERE course_id = ANY($1)")
+        .bind(&cids)
+        .execute(db_pool)
+        .await?;
+
+    sqlx::query(
+        r#"
+        INSERT INTO course_instructors (course_id, instructor_id, is_primary)
+        SELECT * FROM UNNEST($1::int4[], $2::text[], $3::bool[])
+        ON CONFLICT (course_id, instructor_id)
+        DO UPDATE SET is_primary = EXCLUDED.is_primary
+        "#,
+    )
+    .bind(&cids)
+    .bind(&iids)
+    .bind(&primaries)
+    .execute(db_pool)
+    .await
+    .map_err(|e| anyhow::anyhow!("Failed to batch upsert course_instructors: {}", e))?;
 
     Ok(())
 }
diff --git a/src/data/courses.rs b/src/data/courses.rs
new file mode 100644
index 0000000..7b7b38e
--- /dev/null
+++ b/src/data/courses.rs
@@ -0,0 +1,128 @@
+//! Database query functions for courses, used by the web API.
+
+use crate::data::models::Course;
+use crate::error::Result;
+use sqlx::PgPool;
+
+/// Search courses by term with optional filters.
+///
+/// Returns `(courses, total_count)` for pagination. Uses FTS tsvector for word
+/// search and falls back to trigram ILIKE for substring matching.
+#[allow(clippy::too_many_arguments)]
+pub async fn search_courses(
+    db_pool: &PgPool,
+    term_code: &str,
+    subject: Option<&str>,
+    title_query: Option<&str>,
+    course_number_low: Option<i32>,
+    course_number_high: Option<i32>,
+    open_only: bool,
+    instructional_method: Option<&str>,
+    campus: Option<&str>,
+    limit: i32,
+    offset: i32,
+) -> Result<(Vec<Course>, i64)> {
+    // Build WHERE clauses dynamically via parameter binding + COALESCE trick:
+    // each optional filter uses ($N IS NULL OR column = $N) so NULL means "no filter".
+    let courses = sqlx::query_as::<_, Course>(
+        r#"
+        SELECT *
+        FROM courses
+        WHERE term_code = $1
+          AND ($2::text IS NULL OR subject = $2)
+          AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
+          AND ($4::int IS NULL OR course_number::int >= $4)
+          AND ($5::int IS NULL OR course_number::int <= $5)
+          AND ($6::bool = false OR max_enrollment > enrollment)
+          AND ($7::text IS NULL OR instructional_method = $7)
+          AND ($8::text IS NULL OR campus = $8)
+        ORDER BY subject, course_number, sequence_number
+        LIMIT $9 OFFSET $10
+        "#,
+    )
+    .bind(term_code)
+    .bind(subject)
+    .bind(title_query)
+    .bind(course_number_low)
+    .bind(course_number_high)
+    .bind(open_only)
+    .bind(instructional_method)
+    .bind(campus)
+    .bind(limit)
+    .bind(offset)
+    .fetch_all(db_pool)
+    .await?;
+
+    let total: (i64,) = sqlx::query_as(
+        r#"
+        SELECT COUNT(*)
+        FROM courses
+        WHERE term_code = $1
+          AND ($2::text IS NULL OR subject = $2)
+          AND ($3::text IS NULL OR title_search @@ plainto_tsquery('simple', $3) OR title ILIKE '%' || $3 || '%')
+          AND ($4::int IS NULL OR course_number::int >= $4)
+          AND ($5::int IS NULL OR course_number::int <= $5)
+          AND ($6::bool = false OR max_enrollment > enrollment)
+          AND ($7::text IS NULL OR instructional_method = $7)
+          AND ($8::text IS NULL OR campus = $8)
+        "#,
+    )
+    .bind(term_code)
+    .bind(subject)
+    .bind(title_query)
+    .bind(course_number_low)
+    .bind(course_number_high)
+    .bind(open_only)
+    .bind(instructional_method)
+    .bind(campus)
+    .fetch_one(db_pool)
+    .await?;
+
+    Ok((courses, total.0))
+}
+
+/// Get a single course by CRN and term.
+pub async fn get_course_by_crn(
+    db_pool: &PgPool,
+    crn: &str,
+    term_code: &str,
+) -> Result<Option<Course>> {
+    let course =
+        sqlx::query_as::<_, Course>("SELECT * FROM courses WHERE crn = $1 AND term_code = $2")
+            .bind(crn)
+            .bind(term_code)
+            .fetch_optional(db_pool)
+            .await?;
+    Ok(course)
+}
+
+/// Get instructors for a course by course ID.
+///
+/// Returns `(banner_id, display_name, email, is_primary)` tuples.
+pub async fn get_course_instructors(
+    db_pool: &PgPool,
+    course_id: i32,
+) -> Result<Vec<(String, String, Option<String>, bool)>> {
+    let rows: Vec<(String, String, Option<String>, bool)> = sqlx::query_as(
+        r#"
+        SELECT i.banner_id, i.display_name, i.email, ci.is_primary
+        FROM course_instructors ci
+        JOIN instructors i ON i.banner_id = ci.instructor_id
+        WHERE ci.course_id = $1
+        ORDER BY ci.is_primary DESC, i.display_name
+        "#,
+    )
+    .bind(course_id)
+    .fetch_all(db_pool)
+    .await?;
+    Ok(rows)
+}
+
+/// Get all distinct term codes that have courses in the DB.
+pub async fn get_available_terms(db_pool: &PgPool) -> Result<Vec<String>> {
+    let rows: Vec<(String,)> =
+        sqlx::query_as("SELECT DISTINCT term_code FROM courses ORDER BY term_code DESC")
+            .fetch_all(db_pool)
+            .await?;
+    Ok(rows.into_iter().map(|(tc,)| tc).collect())
+}
diff --git a/src/data/mod.rs b/src/data/mod.rs
index 091b5e7..9f2afda 100644
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@@ -1,5 +1,7 @@
 //! Database models and schema.
 
 pub mod batch;
+pub mod courses;
 pub mod models;
+pub mod reference;
 pub mod scrape_jobs;
diff --git a/src/data/models.rs b/src/data/models.rs
index 7843334..e4b096d 100644
--- a/src/data/models.rs
+++ b/src/data/models.rs
@@ -1,8 +1,31 @@
 //! `sqlx` models for the database schema.
 
 use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
 use serde_json::Value;
 
+/// Represents a meeting time stored as JSONB in the courses table.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DbMeetingTime {
+    pub begin_time: Option<String>,
+    pub end_time: Option<String>,
+    pub start_date: String,
+    pub end_date: String,
+    pub monday: bool,
+    pub tuesday: bool,
+    pub wednesday: bool,
+    pub thursday: bool,
+    pub friday: bool,
+    pub saturday: bool,
+    pub sunday: bool,
+    pub building: Option<String>,
+    pub building_description: Option<String>,
+    pub room: Option<String>,
+    pub campus: Option<String>,
+    pub meeting_type: String,
+    pub meeting_schedule_type: String,
+}
+
 #[allow(dead_code)]
 #[derive(sqlx::FromRow, Debug, Clone)]
 pub struct Course {
@@ -17,6 +40,46 @@ pub struct Course {
     pub wait_count: i32,
     pub wait_capacity: i32,
     pub last_scraped_at: DateTime<Utc>,
+    // New scalar fields
+    pub sequence_number: Option<String>,
+    pub part_of_term: Option<String>,
+    pub instructional_method: Option<String>,
+    pub campus: Option<String>,
+    pub credit_hours: Option<i32>,
+    pub credit_hour_low: Option<i32>,
+    pub credit_hour_high: Option<i32>,
+    pub cross_list: Option<String>,
+    pub cross_list_capacity: Option<i32>,
+    pub cross_list_count: Option<i32>,
+    pub link_identifier: Option<String>,
+    pub is_section_linked: Option<bool>,
+    // JSONB fields
+    pub meeting_times: Value,
+    pub attributes: Value,
+}
+
+#[allow(dead_code)]
+#[derive(sqlx::FromRow, Debug, Clone)]
+pub struct Instructor {
+    pub banner_id: String,
+    pub display_name: String,
+    pub email: Option<String>,
+}
+
+#[allow(dead_code)]
+#[derive(sqlx::FromRow, Debug, Clone)]
+pub struct CourseInstructor {
+    pub course_id: i32,
+    pub instructor_id: String,
+    pub is_primary: bool,
+}
+
+#[allow(dead_code)]
+#[derive(sqlx::FromRow, Debug, Clone)]
+pub struct ReferenceData {
+    pub category: String,
+    pub code: String,
+    pub description: String,
 }
 
 #[allow(dead_code)]
diff --git a/src/data/reference.rs b/src/data/reference.rs
new file mode 100644
index 0000000..607fd99
--- /dev/null
+++ b/src/data/reference.rs
@@ -0,0 +1,53 @@
+//! Database operations for the `reference_data` table (code→description lookups).
+
+use crate::data::models::ReferenceData;
+use crate::error::Result;
+use sqlx::PgPool;
+
+/// Batch upsert reference data entries.
+pub async fn batch_upsert(entries: &[ReferenceData], db_pool: &PgPool) -> Result<()> {
+    if entries.is_empty() {
+        return Ok(());
+    }
+
+    let categories: Vec<&str> = entries.iter().map(|e| e.category.as_str()).collect();
+    let codes: Vec<&str> = entries.iter().map(|e| e.code.as_str()).collect();
+    let descriptions: Vec<&str> = entries.iter().map(|e| e.description.as_str()).collect();
+
+    sqlx::query(
+        r#"
+        INSERT INTO reference_data (category, code, description)
+        SELECT * FROM UNNEST($1::text[], $2::text[], $3::text[])
+        ON CONFLICT (category, code)
+        DO UPDATE SET description = EXCLUDED.description
+        "#,
+    )
+    .bind(&categories)
+    .bind(&codes)
+    .bind(&descriptions)
+    .execute(db_pool)
+    .await?;
+
+    Ok(())
+}
+
+/// Get all reference data entries for a category.
+pub async fn get_by_category(category: &str, db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
+    let rows = sqlx::query_as::<_, ReferenceData>(
+        "SELECT category, code, description FROM reference_data WHERE category = $1 ORDER BY description",
+    )
+    .bind(category)
+    .fetch_all(db_pool)
+    .await?;
+    Ok(rows)
+}
+
+/// Get all reference data entries (for cache initialization).
+pub async fn get_all(db_pool: &PgPool) -> Result<Vec<ReferenceData>> {
+    let rows = sqlx::query_as::<_, ReferenceData>(
+        "SELECT category, code, description FROM reference_data ORDER BY category, description",
+    )
+    .fetch_all(db_pool)
+    .await?;
+    Ok(rows)
+}
diff --git a/src/scraper/mod.rs b/src/scraper/mod.rs
index 5926e27..51c3fb0 100644
--- a/src/scraper/mod.rs
+++ b/src/scraper/mod.rs
@@ -4,10 +4,11 @@ pub mod worker;
 
 use crate::banner::BannerApi;
 use crate::services::Service;
+use crate::state::ReferenceCache;
 use crate::status::{ServiceStatus, ServiceStatusRegistry};
 use sqlx::PgPool;
 use std::sync::Arc;
-use tokio::sync::broadcast;
+use tokio::sync::{RwLock, broadcast};
 use tokio::task::JoinHandle;
 use tracing::{info, warn};
 
@@ -21,6 +22,7 @@ use self::worker::Worker;
 pub struct ScraperService {
     db_pool: PgPool,
     banner_api: Arc<BannerApi>,
+    reference_cache: Arc<RwLock<ReferenceCache>>,
     service_statuses: ServiceStatusRegistry,
     scheduler_handle: Option<JoinHandle<()>>,
     worker_handles: Vec<JoinHandle<()>>,
@@ -29,10 +31,16 @@ pub struct ScraperService {
 
 impl ScraperService {
     /// Creates a new `ScraperService`.
-    pub fn new(db_pool: PgPool, banner_api: Arc<BannerApi>, service_statuses: ServiceStatusRegistry) -> Self {
+    pub fn new(
+        db_pool: PgPool,
+        banner_api: Arc<BannerApi>,
+        reference_cache: Arc<RwLock<ReferenceCache>>,
+        service_statuses: ServiceStatusRegistry,
+    ) -> Self {
         Self {
             db_pool,
             banner_api,
+            reference_cache,
             service_statuses,
             scheduler_handle: None,
             worker_handles: Vec::new(),
@@ -48,7 +56,11 @@ impl ScraperService {
         let (shutdown_tx, _) = broadcast::channel(1);
         self.shutdown_tx = Some(shutdown_tx.clone());
 
-        let scheduler = Scheduler::new(self.db_pool.clone(), self.banner_api.clone());
+        let scheduler = Scheduler::new(
+            self.db_pool.clone(),
+            self.banner_api.clone(),
+            self.reference_cache.clone(),
+        );
         let shutdown_rx = shutdown_tx.subscribe();
         let scheduler_handle = tokio::spawn(async move {
             scheduler.run(shutdown_rx).await;
@@ -86,7 +98,8 @@ impl Service for ScraperService {
     }
 
     async fn shutdown(&mut self) -> Result<(), anyhow::Error> {
-        self.service_statuses.set("scraper", ServiceStatus::Disabled);
+        self.service_statuses
+            .set("scraper", ServiceStatus::Disabled);
         info!("Shutting down scraper service");
 
         // Send shutdown signal to all tasks
diff --git a/src/scraper/scheduler.rs b/src/scraper/scheduler.rs
index 3ea30dc..1649570 100644
--- a/src/scraper/scheduler.rs
+++ b/src/scraper/scheduler.rs
@@ -1,28 +1,38 @@
 use crate::banner::{BannerApi, Term};
-use crate::data::models::{ScrapePriority, TargetType};
+use crate::data::models::{ReferenceData, ScrapePriority, TargetType};
 use crate::data::scrape_jobs;
 use crate::error::Result;
 use crate::scraper::jobs::subject::SubjectJob;
+use crate::state::ReferenceCache;
 use serde_json::json;
 use sqlx::PgPool;
 use std::sync::Arc;
-use std::time::Duration;
-use tokio::sync::broadcast;
+use std::time::{Duration, Instant};
+use tokio::sync::{RwLock, broadcast};
 use tokio::time;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, warn};
 
+/// How often reference data is re-scraped (6 hours).
+const REFERENCE_DATA_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60);
+
 /// Periodically analyzes data and enqueues prioritized scrape jobs.
 pub struct Scheduler {
     db_pool: PgPool,
     banner_api: Arc<BannerApi>,
+    reference_cache: Arc<RwLock<ReferenceCache>>,
 }
 
 impl Scheduler {
-    pub fn new(db_pool: PgPool, banner_api: Arc<BannerApi>) -> Self {
+    pub fn new(
+        db_pool: PgPool,
+        banner_api: Arc<BannerApi>,
+        reference_cache: Arc<RwLock<ReferenceCache>>,
+    ) -> Self {
         Self {
             db_pool,
             banner_api,
+            reference_cache,
         }
     }
 
@@ -41,26 +51,35 @@ impl Scheduler {
         let work_interval = Duration::from_secs(60);
         let mut next_run = time::Instant::now();
         let mut current_work: Option<(tokio::task::JoinHandle<()>, CancellationToken)> = None;
+        // Scrape reference data immediately on first cycle
+        let mut last_ref_scrape = Instant::now() - REFERENCE_DATA_INTERVAL;
 
         loop {
             tokio::select! {
                 _ = time::sleep_until(next_run) => {
                     let cancel_token = CancellationToken::new();
 
+                    let should_scrape_ref = last_ref_scrape.elapsed() >= REFERENCE_DATA_INTERVAL;
+
                     // Spawn work in separate task to allow graceful cancellation during shutdown.
-                    // Without this, shutdown would have to wait for the full scheduling cycle.
                     let work_handle = tokio::spawn({
                         let db_pool = self.db_pool.clone();
                         let banner_api = self.banner_api.clone();
                         let cancel_token = cancel_token.clone();
+                        let reference_cache = self.reference_cache.clone();
 
                         async move {
                             tokio::select! {
-                                result = Self::schedule_jobs_impl(&db_pool, &banner_api) => {
-                                    if let Err(e) = result {
+                                _ = async {
+                                    if should_scrape_ref
+                                        && let Err(e) = Self::scrape_reference_data(&db_pool, &banner_api, &reference_cache).await
+                                    {
+                                        error!(error = ?e, "Failed to scrape reference data");
+                                    }
+                                    if let Err(e) = Self::schedule_jobs_impl(&db_pool, &banner_api).await {
                                         error!(error = ?e, "Failed to schedule jobs");
                                     }
-                                }
+                                } => {}
                                 _ = cancel_token.cancelled() => {
                                     debug!("Scheduling work cancelled gracefully");
                                 }
@@ -68,6 +87,10 @@ impl Scheduler {
                         }
                     });
 
+                    if should_scrape_ref {
+                        last_ref_scrape = Instant::now();
+                    }
+
                     current_work = Some((work_handle, cancel_token));
                     next_run = time::Instant::now() + work_interval;
                 }
@@ -170,4 +193,95 @@ impl Scheduler {
         debug!("Job scheduling complete");
         Ok(())
     }
+
+    /// Scrape all reference data categories from Banner and upsert to DB, then refresh cache.
+    #[tracing::instrument(skip_all)]
+    async fn scrape_reference_data(
+        db_pool: &PgPool,
+        banner_api: &BannerApi,
+        reference_cache: &Arc<RwLock<ReferenceCache>>,
+    ) -> Result<()> {
+        let term = Term::get_current().inner().to_string();
+        info!(term = %term, "Scraping reference data");
+
+        let mut all_entries = Vec::new();
+
+        // Subjects
+        match banner_api.get_subjects("", &term, 1, 500).await {
+            Ok(pairs) => {
+                debug!(count = pairs.len(), "Fetched subjects");
+                all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
+                    category: "subject".to_string(),
+                    code: p.code,
+                    description: p.description,
+                }));
+            }
+            Err(e) => warn!(error = ?e, "Failed to fetch subjects"),
+        }
+
+        // Campuses
+        match banner_api.get_campuses(&term).await {
+            Ok(pairs) => {
+                debug!(count = pairs.len(), "Fetched campuses");
+                all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
+                    category: "campus".to_string(),
+                    code: p.code,
+                    description: p.description,
+                }));
+            }
+            Err(e) => warn!(error = ?e, "Failed to fetch campuses"),
+        }
+
+        // Instructional methods
+        match banner_api.get_instructional_methods(&term).await {
+            Ok(pairs) => {
+                debug!(count = pairs.len(), "Fetched instructional methods");
+                all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
+                    category: "instructional_method".to_string(),
+                    code: p.code,
+                    description: p.description,
+                }));
+            }
+            Err(e) => warn!(error = ?e, "Failed to fetch instructional methods"),
+        }
+
+        // Parts of term
+        match banner_api.get_parts_of_term(&term).await {
+            Ok(pairs) => {
+                debug!(count = pairs.len(), "Fetched parts of term");
+                all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
+                    category: "part_of_term".to_string(),
+                    code: p.code,
+                    description: p.description,
+                }));
+            }
+            Err(e) => warn!(error = ?e, "Failed to fetch parts of term"),
+        }
+
+        // Attributes
+        match banner_api.get_attributes(&term).await {
+            Ok(pairs) => {
+                debug!(count = pairs.len(), "Fetched attributes");
+                all_entries.extend(pairs.into_iter().map(|p| ReferenceData {
+                    category: "attribute".to_string(),
+                    code: p.code,
+                    description: p.description,
+                }));
+            }
+            Err(e) => warn!(error = ?e, "Failed to fetch attributes"),
+        }
+
+        // Batch upsert all entries
+        let total = all_entries.len();
+        crate::data::reference::batch_upsert(&all_entries, db_pool).await?;
+        info!(total_entries = total, "Reference data upserted to DB");
+
+        // Refresh in-memory cache
+        let all = crate::data::reference::get_all(db_pool).await?;
+        let count = all.len();
+        *reference_cache.write().await = ReferenceCache::from_entries(all);
+        info!(entries = count, "Reference cache refreshed");
+
+        Ok(())
+    }
 }
diff --git a/src/services/web.rs b/src/services/web.rs
index e53cc8c..4ba10f1 100644
--- a/src/services/web.rs
+++ b/src/services/web.rs
@@ -23,10 +23,7 @@ impl WebService {
         }
     }
     /// Periodically pings the database and updates the "database" service status.
-    async fn db_health_check_loop(
-        state: AppState,
-        mut shutdown_rx: broadcast::Receiver<()>,
-    ) {
+    async fn db_health_check_loop(state: AppState, mut shutdown_rx: broadcast::Receiver<()>) {
         use std::time::Duration;
         let mut interval = tokio::time::interval(Duration::from_secs(30));
 
@@ -66,7 +63,9 @@ impl Service for WebService {
         let addr = SocketAddr::from(([0, 0, 0, 0], self.port));
 
         let listener = TcpListener::bind(addr).await?;
-        self.app_state.service_statuses.set("web", ServiceStatus::Active);
+        self.app_state
+            .service_statuses
+            .set("web", ServiceStatus::Active);
         info!(
             service = "web",
             address = %addr,
diff --git a/src/state.rs b/src/state.rs
index a5be5e1..d715250 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -2,16 +2,71 @@
 
 use crate::banner::BannerApi;
 use crate::banner::Course;
+use crate::data::models::ReferenceData;
 use crate::status::ServiceStatusRegistry;
 use anyhow::Result;
 use sqlx::PgPool;
+use std::collections::HashMap;
 use std::sync::Arc;
+use tokio::sync::RwLock;
+
+/// In-memory cache for reference data (code→description lookups).
+///
+/// Loaded from the `reference_data` table on startup and refreshed periodically.
+pub struct ReferenceCache {
+    /// `(category, code)` → `description`
+    data: HashMap<(String, String), String>,
+}
+
+impl Default for ReferenceCache {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ReferenceCache {
+    /// Create an empty cache.
+    pub fn new() -> Self {
+        Self {
+            data: HashMap::new(),
+        }
+    }
+
+    /// Build cache from a list of reference data entries.
+    pub fn from_entries(entries: Vec<ReferenceData>) -> Self {
+        let data = entries
+            .into_iter()
+            .map(|e| ((e.category, e.code), e.description))
+            .collect();
+        Self { data }
+    }
+
+    /// Look up a description by category and code.
+    pub fn lookup(&self, category: &str, code: &str) -> Option<&str> {
+        self.data
+            .get(&(category.to_string(), code.to_string()))
+            .map(|s| s.as_str())
+    }
+
+    /// Get all `(code, description)` pairs for a category, sorted by description.
+    pub fn entries_for_category(&self, category: &str) -> Vec<(&str, &str)> {
+        let mut entries: Vec<(&str, &str)> = self
+            .data
+            .iter()
+            .filter(|((cat, _), _)| cat == category)
+            .map(|((_, code), desc)| (code.as_str(), desc.as_str()))
+            .collect();
+        entries.sort_by(|a, b| a.1.cmp(b.1));
+        entries
+    }
+}
 
 #[derive(Clone)]
 pub struct AppState {
     pub banner_api: Arc<BannerApi>,
     pub db_pool: PgPool,
     pub service_statuses: ServiceStatusRegistry,
+    pub reference_cache: Arc<RwLock<ReferenceCache>>,
 }
 
 impl AppState {
@@ -20,9 +75,20 @@ impl AppState {
             banner_api,
             db_pool,
             service_statuses: ServiceStatusRegistry::new(),
+            reference_cache: Arc::new(RwLock::new(ReferenceCache::new())),
         }
     }
 
+    /// Initialize the reference cache from the database.
+    pub async fn load_reference_cache(&self) -> Result<()> {
+        let entries = crate::data::reference::get_all(&self.db_pool).await?;
+        let count = entries.len();
+        let cache = ReferenceCache::from_entries(entries);
+        *self.reference_cache.write().await = cache;
+        tracing::info!(entries = count, "Reference cache loaded");
+        Ok(())
+    }
+
     /// Get a course by CRN directly from Banner API
     pub async fn get_course_or_fetch(&self, term: &str, crn: &str) -> Result<Course> {
         self.banner_api
diff --git a/src/web/routes.rs b/src/web/routes.rs
index b58c067..d524258 100644
--- a/src/web/routes.rs
+++ b/src/web/routes.rs
@@ -3,7 +3,8 @@
 use axum::{
     Router,
     body::Body,
-    extract::{Request, State},
+    extract::{Path, Query, Request, State},
+    http::StatusCode as AxumStatusCode,
     response::{Json, Response},
     routing::get,
 };
@@ -14,7 +15,7 @@ use axum::{
 };
 #[cfg(feature = "embed-assets")]
 use http::header;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use serde_json::{Value, json};
 use std::{collections::BTreeMap, time::Duration};
 
@@ -71,6 +72,11 @@ pub fn create_router(app_state: AppState) -> Router {
         .route("/health", get(health))
         .route("/status", get(status))
         .route("/metrics", get(metrics))
+        .route("/courses/search", get(search_courses))
+        .route("/courses/{term}/{crn}", get(get_course))
+        .route("/terms", get(get_terms))
+        .route("/subjects", get(get_subjects))
+        .route("/reference/{category}", get(get_reference))
         .with_state(app_state);
 
     let mut router = Router::new().nest("/api", api_router);
@@ -249,7 +255,10 @@ async fn status(State(state): State<AppState>) -> Json<StatusResponse> {
         );
     }
 
-    let overall_status = if services.values().any(|s| matches!(s.status, ServiceStatus::Error)) {
+    let overall_status = if services
+        .values()
+        .any(|s| matches!(s.status, ServiceStatus::Error))
+    {
         ServiceStatus::Error
     } else if !services.is_empty()
         && services
@@ -281,3 +290,280 @@ async fn metrics() -> Json<Value> {
         "timestamp": chrono::Utc::now().to_rfc3339()
     }))
 }
+
+// ============================================================
+// Course search & detail API
+// ============================================================
+
+#[derive(Deserialize)]
+struct SearchParams {
+    term: String,
+    subject: Option<String>,
+    q: Option<String>,
+    course_number_low: Option<i32>,
+    course_number_high: Option<i32>,
+    #[serde(default)]
+    open_only: bool,
+    instructional_method: Option<String>,
+    campus: Option<String>,
+    #[serde(default = "default_limit")]
+    limit: i32,
+    #[serde(default)]
+    offset: i32,
+}
+
+fn default_limit() -> i32 {
+    25
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct CourseResponse {
+    crn: String,
+    subject: String,
+    course_number: String,
+    title: String,
+    term_code: String,
+    sequence_number: Option<String>,
+    instructional_method: Option<String>,
+    campus: Option<String>,
+    enrollment: i32,
+    max_enrollment: i32,
+    wait_count: i32,
+    wait_capacity: i32,
+    credit_hours: Option<i32>,
+    credit_hour_low: Option<i32>,
+    credit_hour_high: Option<i32>,
+    cross_list: Option<String>,
+    cross_list_capacity: Option<i32>,
+    cross_list_count: Option<i32>,
+    link_identifier: Option<String>,
+    is_section_linked: Option<bool>,
+    part_of_term: Option<String>,
+    meeting_times: Value,
+    attributes: Value,
+    instructors: Vec<InstructorResponse>,
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct InstructorResponse {
+    banner_id: String,
+    display_name: String,
+    email: Option<String>,
+    is_primary: bool,
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct SearchResponse {
+    courses: Vec<CourseResponse>,
+    total_count: i64,
+    offset: i32,
+    limit: i32,
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "camelCase")]
+struct CodeDescription {
+    code: String,
+    description: String,
+}
+
+/// Build a `CourseResponse` from a DB course, fetching its instructors.
+async fn build_course_response(
+    course: &crate::data::models::Course,
+    db_pool: &sqlx::PgPool,
+) -> CourseResponse {
+    let instructors = crate::data::courses::get_course_instructors(db_pool, course.id)
+        .await
+        .unwrap_or_default()
+        .into_iter()
+        .map(
+            |(banner_id, display_name, email, is_primary)| InstructorResponse {
+                banner_id,
+                display_name,
+                email,
+                is_primary,
+            },
+        )
+        .collect();
+
+    CourseResponse {
+        crn: course.crn.clone(),
+        subject: course.subject.clone(),
+        course_number: course.course_number.clone(),
+        title: course.title.clone(),
+        term_code: course.term_code.clone(),
+        sequence_number: course.sequence_number.clone(),
+        instructional_method: course.instructional_method.clone(),
+        campus: course.campus.clone(),
+        enrollment: course.enrollment,
+        max_enrollment: course.max_enrollment,
+        wait_count: course.wait_count,
+        wait_capacity: course.wait_capacity,
+        credit_hours: course.credit_hours,
+        credit_hour_low: course.credit_hour_low,
+        credit_hour_high: course.credit_hour_high,
+        cross_list: course.cross_list.clone(),
+        cross_list_capacity: course.cross_list_capacity,
+        cross_list_count: course.cross_list_count,
+        link_identifier: course.link_identifier.clone(),
+        is_section_linked: course.is_section_linked,
+        part_of_term: course.part_of_term.clone(),
+        meeting_times: course.meeting_times.clone(),
+        attributes: course.attributes.clone(),
+        instructors,
+    }
+}
+
+/// `GET /api/courses/search`
+async fn search_courses(
+    State(state): State<AppState>,
+    Query(params): Query<SearchParams>,
+) -> Result<Json<SearchResponse>, (AxumStatusCode, String)> {
+    let limit = params.limit.clamp(1, 100);
+    let offset = params.offset.max(0);
+
+    let (courses, total_count) = crate::data::courses::search_courses(
+        &state.db_pool,
+        &params.term,
+        params.subject.as_deref(),
+        params.q.as_deref(),
+        params.course_number_low,
+        params.course_number_high,
+        params.open_only,
+        params.instructional_method.as_deref(),
+        params.campus.as_deref(),
+        limit,
+        offset,
+    )
+    .await
+    .map_err(|e| {
+        tracing::error!(error = %e, "Course search failed");
+        (
+            AxumStatusCode::INTERNAL_SERVER_ERROR,
+            "Search failed".to_string(),
+        )
+    })?;
+
+    let mut course_responses = Vec::with_capacity(courses.len());
+    for course in &courses {
+        course_responses.push(build_course_response(course, &state.db_pool).await);
+    }
+
+    Ok(Json(SearchResponse {
+        courses: course_responses,
+        total_count,
+        offset,
+        limit,
+    }))
+}
+
+/// `GET /api/courses/:term/:crn`
+async fn get_course(
+    State(state): State<AppState>,
+    Path((term, crn)): Path<(String, String)>,
+) -> Result<Json<CourseResponse>, (AxumStatusCode, String)> {
+    let course = crate::data::courses::get_course_by_crn(&state.db_pool, &crn, &term)
+        .await
+        .map_err(|e| {
+            tracing::error!(error = %e, "Course lookup failed");
+            (
+                AxumStatusCode::INTERNAL_SERVER_ERROR,
+                "Lookup failed".to_string(),
+            )
+        })?
+        .ok_or_else(|| (AxumStatusCode::NOT_FOUND, "Course not found".to_string()))?;
+
+    Ok(Json(build_course_response(&course, &state.db_pool).await))
+}
+
+/// `GET /api/terms`
+async fn get_terms(
+    State(state): State<AppState>,
+) -> Result<Json<Vec<CodeDescription>>, (AxumStatusCode, String)> {
+    let cache = state.reference_cache.read().await;
+    let term_codes = crate::data::courses::get_available_terms(&state.db_pool)
+        .await
+        .map_err(|e| {
+            tracing::error!(error = %e, "Failed to get terms");
+            (
+                AxumStatusCode::INTERNAL_SERVER_ERROR,
+                "Failed to get terms".to_string(),
+            )
+        })?;
+
+    let terms: Vec<CodeDescription> = term_codes
+        .into_iter()
+        .map(|code| {
+            let description = cache
+                .lookup("term", &code)
+                .unwrap_or("Unknown Term")
+                .to_string();
+            CodeDescription { code, description }
+        })
+        .collect();
+
+    Ok(Json(terms))
+}
+
+/// `GET /api/subjects?term=202420`
+async fn get_subjects(
+    State(state): State<AppState>,
+) -> Result<Json<Vec<CodeDescription>>, (AxumStatusCode, String)> {
+    let cache = state.reference_cache.read().await;
+    let entries = cache.entries_for_category("subject");
+
+    let subjects: Vec<CodeDescription> = entries
+        .into_iter()
+        .map(|(code, description)| CodeDescription {
+            code: code.to_string(),
+            description: description.to_string(),
+        })
+        .collect();
+
+    Ok(Json(subjects))
+}
+
+/// `GET /api/reference/:category`
+async fn get_reference(
+    State(state): State<AppState>,
+    Path(category): Path<String>,
+) -> Result<Json<Vec<CodeDescription>>, (AxumStatusCode, String)> {
+    let cache = state.reference_cache.read().await;
+    let entries = cache.entries_for_category(&category);
+
+    if entries.is_empty() {
+        // Fall back to DB query in case cache doesn't have this category
+        drop(cache);
+        let rows = crate::data::reference::get_by_category(&category, &state.db_pool)
+            .await
+            .map_err(|e| {
+                tracing::error!(error = %e, category = %category, "Reference lookup failed");
+                (
+                    AxumStatusCode::INTERNAL_SERVER_ERROR,
+                    "Lookup failed".to_string(),
+                )
+            })?;
+
+        return Ok(Json(
+            rows.into_iter()
+                .map(|r| CodeDescription {
+                    code: r.code,
+                    description: r.description,
+                })
+                .collect(),
+        ));
+    }
+
+    Ok(Json(
+        entries
+            .into_iter()
+            .map(|(code, desc)| CodeDescription {
+                code: code.to_string(),
+                description: desc.to_string(),
+            })
+            .collect(),
+    ))
+}