chore: add bacon config

feat: command logging, explicit builtin command error handler
feat: move scraper into separate module, begin building data models
2025-12-06 01:14:22 -06:00 · 2025-08-29 12:10:57 -05:00 · 2025-08-29 12:10:57 -05:00 · 2025-08-29 11:07:46 -05:00 · 2025-08-27 18:57:43 -05:00
19 changed files with 501 additions and 337 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -175,6 +175,8 @@ dependencies = [
 "chrono-tz",
 "compile-time",
 "diesel",
+ "diesel-derive-enum",
+ "diesel_derives",
 "dotenvy",
 "figment",
 "fundu",
@@ -573,9 +575,23 @@ dependencies = [
 "diesel_derives",
 "itoa",
 "pq-sys",
+ "r2d2",
+ "serde_json",
 "uuid",
 ]

+[[package]]
+name = "diesel-derive-enum"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81c5131a2895ef64741dad1d483f358c2a229a3a2d1b256778cdc5e146db64d4"
+dependencies = [
+ "heck 0.4.1",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+
 [[package]]
 name = "diesel_derives"
 version = "2.2.7"
@@ -642,7 +658,7 @@ checksum = "139ae9aca7527f85f26dd76483eb38533fd84bd571065da1739656ef71c5ff5b"
 dependencies = [
 "darling",
 "either",
- "heck",
+ "heck 0.5.0",
 "proc-macro2",
 "quote",
 "syn 2.0.106",
@@ -986,6 +1002,12 @@ dependencies = [
 "foldhash",
 ]

+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -1896,6 +1918,17 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"

+[[package]]
+name = "r2d2"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93"
+dependencies = [
+ "log",
+ "parking_lot",
+ "scheduled-thread-pool",
+]
+
 [[package]]
 name = "rand"
 version = "0.8.5"
@@ -2286,6 +2319,15 @@ dependencies = [
 "windows-sys 0.59.0",
 ]

+[[package]]
+name = "scheduled-thread-pool"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19"
+dependencies = [
+ "parking_lot",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,7 +8,7 @@ tokio = { version = "1.47.1", features = ["full"] }
 axum = "0.8.4"
 serenity = { version = "0.12.4", features = ["rustls_backend"] }
 reqwest = { version = "0.12.23", features = ["json", "cookies"] }
-diesel = { version = "2.2.12", features = ["chrono", "postgres", "uuid"] }
+diesel = { version = "2.2.12", features = ["chrono", "postgres", "r2d2", "uuid", "serde_json"] }
 redis = { version = "0.32.5", features = ["tokio-comp"] }
 figment = { version = "0.10.19", features = ["toml", "env"] }
 serde_json = "1.0.143"
@@ -30,3 +30,5 @@ url = "2.5"
 compile-time = "0.2.0"
 time = "0.3.41"
 bitflags = { version = "2.9.3", features = ["serde"] }
+diesel_derives = "2.2.7"
+diesel-derive-enum = { version = "2.1.0", features = ["postgres"] }
--- a/bacon.toml
+++ b/bacon.toml
@@ -0,0 +1,92 @@
+# This is a configuration file for the bacon tool
+#
+# Complete help on configuration: https://dystroy.org/bacon/config/
+# 
+# You may check the current default at
+#   https://github.com/Canop/bacon/blob/main/defaults/default-bacon.toml
+
+default_job = "check"
+env.CARGO_TERM_COLOR = "always"
+
+[jobs.check]
+command = ["cargo", "check"]
+need_stdout = false
+
+[jobs.check-all]
+command = ["cargo", "check", "--all-targets"]
+need_stdout = false
+
+# Run clippy on the default target
+[jobs.clippy]
+command = ["cargo", "clippy"]
+need_stdout = false
+
+# Run clippy on all targets
+# To disable some lints, you may change the job this way:
+#    [jobs.clippy-all]
+#    command = [
+#        "cargo", "clippy",
+#        "--all-targets",
+#    	 "--",
+#    	 "-A", "clippy::bool_to_int_with_if",
+#    	 "-A", "clippy::collapsible_if",
+#    	 "-A", "clippy::derive_partial_eq_without_eq",
+#    ]
+# need_stdout = false
+[jobs.clippy-all]
+command = ["cargo", "clippy", "--all-targets"]
+need_stdout = false
+
+# This job lets you run
+# - all tests: bacon test
+# - a specific test: bacon test -- config::test_default_files
+# - the tests of a package: bacon test -- -- -p config
+[jobs.test]
+command = ["cargo", "test"]
+need_stdout = true
+
+[jobs.nextest]
+command = [
+    "cargo", "nextest", "run",
+    "--hide-progress-bar", "--failure-output", "final"
+]
+need_stdout = true
+analyzer = "nextest"
+
+[jobs.doc]
+command = ["cargo", "doc", "--no-deps"]
+need_stdout = false
+
+# If the doc compiles, then it opens in your browser and bacon switches
+# to the previous job
+[jobs.doc-open]
+command = ["cargo", "doc", "--no-deps", "--open"]
+need_stdout = false
+on_success = "back" # so that we don't open the browser at each change
+
+[jobs.run]
+command = [
+    "cargo", "run",
+]
+need_stdout = true
+allow_warnings = true
+background = false
+on_change_strategy = "kill_then_restart"
+# kill = ["pkill", "-TERM", "-P"]'
+
+# This parameterized job runs the example of your choice, as soon
+# as the code compiles.
+# Call it as
+#    bacon ex -- my-example
+[jobs.ex]
+command = ["cargo", "run", "--example"]
+need_stdout = true
+allow_warnings = true
+
+# You may define here keybindings that would be specific to
+# a project, for example a shortcut to launch a specific job.
+# Shortcuts to internal functions (scrolling, toggling, etc.)
+# should go in your personal global prefs.toml file instead.
+[keybindings]
+# alt-m = "job:my-job"
+c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target
--- a/diesel.toml
+++ b/diesel.toml
@@ -0,0 +1,9 @@
+# For documentation on how to configure this file,
+# see https://diesel.rs/guides/configuring-diesel-cli
+
+[print_schema]
+file = "src/data/schema.rs"
+custom_type_derives = ["diesel::query_builder::QueryId", "Clone"]
+
+[migrations_directory]
+dir = "migrations"
--- a/migrations/.keep
+++ b/migrations/.keep
--- a/migrations/00000000000000_diesel_initial_setup/down.sql
+++ b/migrations/00000000000000_diesel_initial_setup/down.sql
@@ -0,0 +1,6 @@
+-- This file was automatically created by Diesel to setup helper functions
+-- and other internal bookkeeping. This file is safe to edit, any future
+-- changes will be added to existing projects as new migrations.
+
+DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
+DROP FUNCTION IF EXISTS diesel_set_updated_at();
--- a/migrations/00000000000000_diesel_initial_setup/up.sql
+++ b/migrations/00000000000000_diesel_initial_setup/up.sql
@@ -0,0 +1,36 @@
+-- This file was automatically created by Diesel to setup helper functions
+-- and other internal bookkeeping. This file is safe to edit, any future
+-- changes will be added to existing projects as new migrations.
+
+
+
+
+-- Sets up a trigger for the given table to automatically set a column called
+-- `updated_at` whenever the row is modified (unless `updated_at` was included
+-- in the modified columns)
+--
+-- # Example
+--
+-- ```sql
+-- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
+--
+-- SELECT diesel_manage_updated_at('users');
+-- ```
+CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
+BEGIN
+    EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
+                    FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
+BEGIN
+    IF (
+        NEW IS DISTINCT FROM OLD AND
+        NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
+    ) THEN
+        NEW.updated_at := current_timestamp;
+    END IF;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
--- a/migrations/2025-08-27-231618_setup/down.sql
+++ b/migrations/2025-08-27-231618_setup/down.sql
@@ -0,0 +1,4 @@
+-- This file should undo anything in `up.sql`
+DROP TABLE IF EXISTS "courses";
+DROP TABLE IF EXISTS "course_metrics";
+DROP TABLE IF EXISTS "course_audits";
--- a/migrations/2025-08-27-231618_setup/up.sql
+++ b/migrations/2025-08-27-231618_setup/up.sql
@@ -0,0 +1,35 @@
+-- Your SQL goes here
+CREATE TABLE "courses"(
+	"id" INT4 NOT NULL PRIMARY KEY,
+	"crn" VARCHAR NOT NULL,
+	"subject" VARCHAR NOT NULL,
+	"course_number" VARCHAR NOT NULL,
+	"title" VARCHAR NOT NULL,
+	"term_code" VARCHAR NOT NULL,
+	"enrollment" INT4 NOT NULL,
+	"max_enrollment" INT4 NOT NULL,
+	"wait_count" INT4 NOT NULL,
+	"wait_capacity" INT4 NOT NULL,
+	"last_scraped_at" TIMESTAMPTZ NOT NULL
+);
+
+CREATE TABLE "course_metrics"(
+	"id" INT4 NOT NULL PRIMARY KEY,
+	"course_id" INT4 NOT NULL,
+	"timestamp" TIMESTAMPTZ NOT NULL,
+	"enrollment" INT4 NOT NULL,
+	"wait_count" INT4 NOT NULL,
+	"seats_available" INT4 NOT NULL,
+	FOREIGN KEY ("course_id") REFERENCES "courses"("id")
+);
+
+CREATE TABLE "course_audits"(
+	"id" INT4 NOT NULL PRIMARY KEY,
+	"course_id" INT4 NOT NULL,
+	"timestamp" TIMESTAMPTZ NOT NULL,
+	"field_changed" VARCHAR NOT NULL,
+	"old_value" TEXT NOT NULL,
+	"new_value" TEXT NOT NULL,
+	FOREIGN KEY ("course_id") REFERENCES "courses"("id")
+);
+
--- a/src/banner/api.rs
+++ b/src/banner/api.rs
@@ -11,15 +11,15 @@ use tracing::{error, info};
 /// Main Banner API client.
 #[derive(Debug)]
 pub struct BannerApi {
-    session_manager: SessionManager,
-    client: Client,
+    sessions: SessionManager,
+    http: Client,
    base_url: String,
 }

 impl BannerApi {
    /// Creates a new Banner API client.
    pub fn new(base_url: String) -> Result<Self> {
-        let client = Client::builder()
+        let http = Client::builder()
            .cookie_store(true)
            .user_agent(user_agent())
            .tcp_keepalive(Some(std::time::Duration::from_secs(60 * 5)))
@@ -29,11 +29,11 @@ impl BannerApi {
            .build()
            .context("Failed to create HTTP client")?;

-        let session_manager = SessionManager::new(base_url.clone(), client.clone());
+        let session_manager = SessionManager::new(base_url.clone(), http.clone());

        Ok(Self {
-            session_manager,
-            client,
+            sessions: session_manager,
+            http,
            base_url,
        })
    }
@@ -41,7 +41,7 @@ impl BannerApi {
    /// Sets up the API client by initializing session cookies.
    pub async fn setup(&self) -> Result<()> {
        info!(base_url = self.base_url, "setting up banner api client");
-        let result = self.session_manager.setup().await;
+        let result = self.sessions.setup().await;
        match &result {
            Ok(()) => info!("banner api client setup completed successfully"),
            Err(e) => error!(error = ?e, "banner api client setup failed"),
@@ -69,7 +69,7 @@ impl BannerApi {
        ];

        let response = self
-            .client
+            .http
            .get(&url)
            .query(&params)
            .send()
@@ -96,7 +96,7 @@ impl BannerApi {
            return Err(anyhow::anyhow!("Offset must be greater than 0"));
        }

-        let session_id = self.session_manager.ensure_session()?;
+        let session_id = self.sessions.ensure_session()?;
        let url = format!("{}/classSearch/get_subject", self.base_url);
        let params = [
            ("searchTerm", search),
@@ -108,7 +108,7 @@ impl BannerApi {
        ];

        let response = self
-            .client
+            .http
            .get(&url)
            .query(&params)
            .send()
@@ -135,7 +135,7 @@ impl BannerApi {
            return Err(anyhow::anyhow!("Offset must be greater than 0"));
        }

-        let session_id = self.session_manager.ensure_session()?;
+        let session_id = self.sessions.ensure_session()?;
        let url = format!("{}/classSearch/get_instructor", self.base_url);
        let params = [
            ("searchTerm", search),
@@ -147,7 +147,7 @@ impl BannerApi {
        ];

        let response = self
-            .client
+            .http
            .get(&url)
            .query(&params)
            .send()
@@ -174,7 +174,7 @@ impl BannerApi {
            return Err(anyhow::anyhow!("Offset must be greater than 0"));
        }

-        let session_id = self.session_manager.ensure_session()?;
+        let session_id = self.sessions.ensure_session()?;
        let url = format!("{}/classSearch/get_campus", self.base_url);
        let params = [
            ("searchTerm", search),
@@ -186,7 +186,7 @@ impl BannerApi {
        ];

        let response = self
-            .client
+            .http
            .get(&url)
            .query(&params)
            .send()
@@ -211,7 +211,7 @@ impl BannerApi {
        let params = [("term", term), ("courseReferenceNumber", crn)];

        let response = self
-            .client
+            .http
            .get(&url)
            .query(&params)
            .send()
@@ -260,9 +260,9 @@ impl BannerApi {
        sort: &str,
        sort_descending: bool,
    ) -> Result<SearchResult> {
-        self.session_manager.reset_data_form().await?;
+        self.sessions.reset_data_form().await?;

-        let session_id = self.session_manager.ensure_session()?;
+        let session_id = self.sessions.ensure_session()?;
        let mut params = query.to_params();

        // Add additional parameters
@@ -278,7 +278,7 @@ impl BannerApi {

        let url = format!("{}/searchResults/searchResults", self.base_url);
        let response = self
-            .client
+            .http
            .get(&url)
            .query(&params)
            .send()
@@ -301,16 +301,16 @@ impl BannerApi {

    /// Selects a term for the current session.
    pub async fn select_term(&self, term: &str) -> Result<()> {
-        self.session_manager.select_term(term).await
+        self.sessions.select_term(term).await
    }

    /// Retrieves a single course by CRN by issuing a minimal search
    pub async fn get_course_by_crn(&self, term: &str, crn: &str) -> Result<Option<Course>> {
-        self.session_manager.reset_data_form().await?;
+        self.sessions.reset_data_form().await?;
        // Ensure session is configured for this term
        self.select_term(term).await?;

-        let session_id = self.session_manager.ensure_session()?;
+        let session_id = self.sessions.ensure_session()?;

        let query = SearchQuery::new()
            .course_reference_number(crn)
@@ -326,7 +326,7 @@ impl BannerApi {

        let url = format!("{}/searchResults/searchResults", self.base_url);
        let response = self
-            .client
+            .http
            .get(&url)
            .query(&params)
            .send()
@@ -366,7 +366,7 @@ impl BannerApi {

        let url = format!("{}/searchResults/getClassDetails", self.base_url);
        let response = self
-            .client
+            .http
            .post(&url)
            .json(&body)
            .send()
--- a/src/banner/mod.rs
+++ b/src/banner/mod.rs
@@ -11,7 +11,6 @@
 pub mod api;
 pub mod models;
 pub mod query;
-pub mod scraper;
 pub mod session;
 pub mod util;

--- a/src/banner/scraper.rs
+++ b/src/banner/scraper.rs
@@ -1,292 +0,0 @@
-//! Course scraping functionality for the Banner API.
-
-use crate::banner::{api::BannerApi, models::*, query::SearchQuery};
-use anyhow::{Context, Result};
-use redis::AsyncCommands;
-use std::sync::Arc;
-use std::time::Duration;
-use tokio::time;
-use tracing::{debug, error, info, warn};
-
-/// Priority majors that should be scraped more frequently
-const PRIORITY_MAJORS: &[&str] = &["CS", "CPE", "MAT", "EE", "IS"];
-
-/// Maximum number of courses to fetch per page
-const MAX_PAGE_SIZE: i32 = 500;
-
-/// Course scraper for Banner API
-pub struct CourseScraper {
-    api: Arc<BannerApi>,
-    redis_client: redis::Client,
-}
-
-impl CourseScraper {
-    /// Creates a new course scraper
-    pub fn new(api: Arc<BannerApi>, redis_url: &str) -> Result<Self> {
-        let redis_client =
-            redis::Client::open(redis_url).context("Failed to create Redis client")?;
-
-        Ok(Self { api, redis_client })
-    }
-
-    /// Scrapes all courses and stores them in Redis
-    pub async fn scrape_all(&self, term: &str) -> Result<()> {
-        // Get all subjects
-        let subjects = self
-            .api
-            .get_subjects("", term, 1, 100)
-            .await
-            .context("Failed to get subjects for scraping")?;
-
-        if subjects.is_empty() {
-            return Err(anyhow::anyhow!("no subjects found for term {term}"));
-        }
-
-        // Categorize subjects
-        let (priority_subjects, other_subjects): (Vec<_>, Vec<_>) = subjects
-            .into_iter()
-            .partition(|subject| PRIORITY_MAJORS.contains(&subject.code.as_str()));
-
-        // Get expired subjects that need scraping
-        let mut expired_subjects = Vec::new();
-        expired_subjects.extend(self.get_expired_subjects(&priority_subjects, term).await?);
-        expired_subjects.extend(self.get_expired_subjects(&other_subjects, term).await?);
-
-        if expired_subjects.is_empty() {
-            info!("no expired subjects found, skipping scrape");
-            return Ok(());
-        }
-
-        info!(
-            "scraping {count} subjects for term {term}",
-            count = expired_subjects.len()
-        );
-
-        // Scrape each expired subject
-        for subject in expired_subjects {
-            if let Err(e) = self.scrape_subject(&subject.code, term).await {
-                error!(
-                    "failed to scrape subject {subject}: {e}",
-                    subject = subject.code
-                );
-            }
-
-            // Rate limiting between subjects
-            time::sleep(Duration::from_secs(2)).await;
-        }
-
-        Ok(())
-    }
-
-    /// Gets subjects that have expired and need to be scraped
-    async fn get_expired_subjects(&self, subjects: &[Pair], term: &str) -> Result<Vec<Pair>> {
-        let mut conn = self
-            .redis_client
-            .get_multiplexed_async_connection()
-            .await
-            .context("Failed to get Redis connection")?;
-
-        let mut expired = Vec::new();
-
-        for subject in subjects {
-            let key = format!("scraped:{code}:{term}", code = subject.code);
-            let scraped: Option<String> = conn
-                .get(&key)
-                .await
-                .context("Failed to check scrape status in Redis")?;
-
-            // If not scraped or marked as expired (empty/0), add to list
-            if scraped.is_none() || scraped.as_deref() == Some("0") {
-                expired.push(subject.clone());
-            }
-        }
-
-        Ok(expired)
-    }
-
-    /// Scrapes all courses for a specific subject
-    pub async fn scrape_subject(&self, subject: &str, term: &str) -> Result<()> {
-        let mut offset = 0;
-        let mut total_courses = 0;
-
-        loop {
-            let query = SearchQuery::new()
-                .subject(subject)
-                .offset(offset)
-                .max_results(MAX_PAGE_SIZE * 2);
-
-            // Ensure session term is selected before searching
-            self.api.select_term(term).await?;
-
-            let result = self
-                .api
-                .search(term, &query, "subjectDescription", false)
-                .await
-                .with_context(|| {
-                    format!("failed to search for subject {subject} at offset {offset}")
-                })?;
-
-            if !result.success {
-                return Err(anyhow::anyhow!(
-                    "search marked unsuccessful for subject {subject}"
-                ));
-            }
-
-            let course_count = result.data.as_ref().map(|v| v.len() as i32).unwrap_or(0);
-            total_courses += course_count;
-
-            debug!(
-                "retrieved {count} courses for subject {subject} at offset {offset}",
-                count = course_count
-            );
-
-            // Store each course in Redis
-            for course in result.data.unwrap_or_default() {
-                if let Err(e) = self.store_course(&course).await {
-                    error!(
-                        "failed to store course {crn}: {e}",
-                        crn = course.course_reference_number
-                    );
-                }
-            }
-
-            // Check if we got a full page and should continue
-            if course_count >= MAX_PAGE_SIZE {
-                if course_count > MAX_PAGE_SIZE {
-                    warn!(
-                        "course count {count} exceeds max page size {max_page_size}",
-                        count = course_count,
-                        max_page_size = MAX_PAGE_SIZE
-                    );
-                }
-
-                offset += MAX_PAGE_SIZE;
-                debug!("continuing to next page for subject {subject} at offset {offset}");
-
-                // Rate limiting between pages
-                time::sleep(Duration::from_secs(3)).await;
-                continue;
-            }
-
-            break;
-        }
-
-        info!(
-            "scraped {count} total courses for subject {subject}",
-            count = total_courses
-        );
-
-        // Mark subject as scraped with expiry
-        self.mark_subject_scraped(subject, term, total_courses)
-            .await?;
-
-        Ok(())
-    }
-
-    /// Stores a course in Redis
-    async fn store_course(&self, course: &Course) -> Result<()> {
-        let mut conn = self
-            .redis_client
-            .get_multiplexed_async_connection()
-            .await
-            .context("Failed to get Redis connection")?;
-
-        let key = format!("class:{crn}", crn = course.course_reference_number);
-        let serialized = serde_json::to_string(course).context("Failed to serialize course")?;
-
-        let _: () = conn
-            .set(&key, serialized)
-            .await
-            .context("Failed to store course in Redis")?;
-
-        Ok(())
-    }
-
-    /// Marks a subject as scraped with appropriate expiry time
-    async fn mark_subject_scraped(
-        &self,
-        subject: &str,
-        term: &str,
-        course_count: i32,
-    ) -> Result<()> {
-        let mut conn = self
-            .redis_client
-            .get_multiplexed_async_connection()
-            .await
-            .context("Failed to get Redis connection")?;
-
-        let key = format!("scraped:{subject}:{term}", subject = subject);
-        let expiry = self.calculate_expiry(subject, course_count);
-
-        let value = if course_count == 0 { -1 } else { course_count };
-
-        let _: () = conn
-            .set_ex(&key, value, expiry.as_secs())
-            .await
-            .context("Failed to mark subject as scraped")?;
-
-        debug!(
-            "marked subject {subject} as scraped with {count} courses, expiry: {expiry:?}",
-            subject = subject,
-            count = course_count,
-            expiry = expiry
-        );
-
-        Ok(())
-    }
-
-    /// Calculates expiry time for a scraped subject based on various factors
-    fn calculate_expiry(&self, subject: &str, course_count: i32) -> Duration {
-        // Base calculation: 1 hour per 100 courses
-        let mut base_expiry = Duration::from_secs(3600 * (course_count as u64 / 100).max(1));
-
-        // Special handling for subjects with few courses
-        if course_count < 50 {
-            // Linear interpolation: 1 course = 12 hours, 49 courses = 1 hour
-            let hours = 12.0 - ((course_count as f64 - 1.0) / 48.0) * 11.0;
-            base_expiry = Duration::from_secs((hours * 3600.0) as u64);
-        }
-
-        // Priority subjects get shorter expiry (more frequent updates)
-        if PRIORITY_MAJORS.contains(&subject) {
-            base_expiry /= 3;
-        }
-
-        // Add random variance (±15%)
-        let variance = (base_expiry.as_secs() as f64 * 0.15) as u64;
-        let random_offset = (rand::random::<f64>() - 0.5) * 2.0 * variance as f64;
-
-        let final_expiry = if random_offset > 0.0 {
-            base_expiry + Duration::from_secs(random_offset as u64)
-        } else {
-            base_expiry.saturating_sub(Duration::from_secs((-random_offset) as u64))
-        };
-
-        // Ensure minimum of 1 hour
-        final_expiry.max(Duration::from_secs(3600))
-    }
-
-    /// Gets a course from Redis cache
-    pub async fn get_course(&self, crn: &str) -> Result<Option<Course>> {
-        let mut conn = self
-            .redis_client
-            .get_multiplexed_async_connection()
-            .await
-            .context("Failed to get Redis connection")?;
-
-        let key = format!("class:{crn}");
-        let serialized: Option<String> = conn
-            .get(&key)
-            .await
-            .context("Failed to get course from Redis")?;
-
-        match serialized {
-            Some(data) => {
-                let course: Course = serde_json::from_str(&data)
-                    .context("Failed to deserialize course from Redis")?;
-                Ok(Some(course))
-            }
-            None => Ok(None),
-        }
-    }
-}
--- a/src/bot/commands/gcal.rs
+++ b/src/bot/commands/gcal.rs
@@ -4,7 +4,7 @@ use crate::banner::{Course, DayOfWeek, MeetingScheduleInfo};
 use crate::bot::{Context, Error, utils};
 use chrono::NaiveDate;
 use std::collections::HashMap;
-use tracing::{error, info};
+use tracing::info;
 use url::Url;

 /// Generate a link to create a Google Calendar event for a course
@@ -22,19 +22,12 @@ pub async fn gcal(
    let term = course.term.clone();

    // Get meeting times
-    let meeting_times = match ctx
+    let meeting_times = ctx
        .data()
        .app_state
        .banner_api
        .get_course_meeting_time(&term, &crn.to_string())
-        .await
-    {
-        Ok(meeting_time) => meeting_time,
-        Err(e) => {
-            error!("failed to get meeting times: {}", e);
-            return Err(e);
-        }
-    };
+        .await?;

    struct LinkDetail {
        link: String,
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@@ -0,0 +1,4 @@
+//! Database models and schema.
+
+pub mod models;
+pub mod schema;
--- a/src/data/models.rs
+++ b/src/data/models.rs
@@ -0,0 +1,123 @@
+//! Diesel models for the database schema.
+
+use crate::data::schema::{course_audits, course_metrics, courses, scrape_jobs};
+use chrono::{DateTime, Utc};
+use diesel::{Insertable, Queryable, QueryableByName, Selectable};
+use serde_json::Value;
+
+#[derive(Queryable, Selectable)]
+#[diesel(table_name = courses)]
+pub struct Course {
+    pub id: i32,
+    pub crn: String,
+    pub subject: String,
+    pub course_number: String,
+    pub title: String,
+    pub term_code: String,
+    pub enrollment: i32,
+    pub max_enrollment: i32,
+    pub wait_count: i32,
+    pub wait_capacity: i32,
+    pub last_scraped_at: DateTime<Utc>,
+}
+
+#[derive(Insertable)]
+#[diesel(table_name = courses)]
+pub struct NewCourse<'a> {
+    pub crn: &'a str,
+    pub subject: &'a str,
+    pub course_number: &'a str,
+    pub title: &'a str,
+    pub term_code: &'a str,
+    pub enrollment: i32,
+    pub max_enrollment: i32,
+    pub wait_count: i32,
+    pub wait_capacity: i32,
+    pub last_scraped_at: DateTime<Utc>,
+}
+
+#[derive(Queryable, Selectable)]
+#[diesel(table_name = course_metrics)]
+#[diesel(belongs_to(Course))]
+pub struct CourseMetric {
+    pub id: i32,
+    pub course_id: i32,
+    pub timestamp: DateTime<Utc>,
+    pub enrollment: i32,
+    pub wait_count: i32,
+    pub seats_available: i32,
+}
+
+#[derive(Insertable)]
+#[diesel(table_name = course_metrics)]
+pub struct NewCourseMetric {
+    pub course_id: i32,
+    pub timestamp: DateTime<Utc>,
+    pub enrollment: i32,
+    pub wait_count: i32,
+    pub seats_available: i32,
+}
+
+#[derive(Queryable, Selectable)]
+#[diesel(table_name = course_audits)]
+#[diesel(belongs_to(Course))]
+pub struct CourseAudit {
+    pub id: i32,
+    pub course_id: i32,
+    pub timestamp: DateTime<Utc>,
+    pub field_changed: String,
+    pub old_value: String,
+    pub new_value: String,
+}
+
+#[derive(Insertable)]
+#[diesel(table_name = course_audits)]
+pub struct NewCourseAudit<'a> {
+    pub course_id: i32,
+    pub timestamp: DateTime<Utc>,
+    pub field_changed: &'a str,
+    pub old_value: &'a str,
+    pub new_value: &'a str,
+}
+
+/// The priority level of a scrape job.
+#[derive(diesel_derive_enum::DbEnum, Copy, Debug, Clone)]
+pub enum ScrapePriority {
+    Low,
+    Medium,
+    High,
+    Critical,
+}
+
+/// The type of target for a scrape job, determining how the payload is interpreted.
+#[derive(diesel_derive_enum::DbEnum, Copy, Debug, Clone)]
+pub enum TargetType {
+    Subject,
+    CourseRange,
+    CrnList,
+    SingleCrn,
+}
+
+/// Represents a queryable job from the database.
+#[derive(Debug, Clone, Queryable, QueryableByName)]
+#[diesel(table_name = scrape_jobs)]
+pub struct ScrapeJob {
+    pub id: i32,
+    pub target_type: TargetType,
+    pub target_payload: Value,
+    pub priority: ScrapePriority,
+    pub execute_at: DateTime<Utc>,
+    pub created_at: DateTime<Utc>,
+    pub locked_at: Option<DateTime<Utc>>,
+}
+
+/// Represents a new job to be inserted into the database.
+#[derive(Debug, Clone, Insertable)]
+#[diesel(table_name = scrape_jobs)]
+pub struct NewScrapeJob {
+    pub target_type: TargetType,
+    #[diesel(sql_type = diesel::sql_types::Jsonb)]
+    pub target_payload: Value,
+    pub priority: ScrapePriority,
+    pub execute_at: DateTime<Utc>,
+}
--- a/src/data/schema.rs
+++ b/src/data/schema.rs
@@ -0,0 +1,69 @@
+pub mod sql_types {
+    #[derive(diesel::sql_types::SqlType)]
+    #[diesel(postgres_type(name = "scrape_priority"))]
+    pub struct ScrapePriority;
+
+    #[derive(diesel::sql_types::SqlType)]
+    #[diesel(postgres_type(name = "target_type"))]
+    pub struct TargetType;
+}
+
+use super::models::{ScrapePriorityMapping, TargetTypeMapping};
+
+diesel::table! {
+    use diesel::sql_types::*;
+    use super::{ScrapePriorityMapping, TargetTypeMapping};
+
+    scrape_jobs (id) {
+        id -> Int4,
+        target_type -> TargetTypeMapping,
+        target_payload -> Jsonb,
+        priority -> ScrapePriorityMapping,
+        execute_at -> Timestamptz,
+        created_at -> Timestamptz,
+        locked_at -> Nullable<Timestamptz>,
+    }
+}
+
+diesel::table! {
+    courses (id) {
+        id -> Int4,
+        crn -> Varchar,
+        subject -> Varchar,
+        course_number -> Varchar,
+        title -> Varchar,
+        term_code -> Varchar,
+        enrollment -> Int4,
+        max_enrollment -> Int4,
+        wait_count -> Int4,
+        wait_capacity -> Int4,
+        last_scraped_at -> Timestamptz,
+    }
+}
+
+diesel::table! {
+    course_metrics (id) {
+        id -> Int4,
+        course_id -> Int4,
+        timestamp -> Timestamptz,
+        enrollment -> Int4,
+        wait_count -> Int4,
+        seats_available -> Int4,
+    }
+}
+
+diesel::table! {
+    course_audits (id) {
+        id -> Int4,
+        course_id -> Int4,
+        timestamp -> Timestamptz,
+        field_changed -> Varchar,
+        old_value -> Text,
+        new_value -> Text,
+    }
+}
+
+diesel::joinable!(course_metrics -> courses (course_id));
+diesel::joinable!(course_audits -> courses (course_id));
+
+diesel::allow_tables_to_appear_in_same_query!(courses, course_metrics, course_audits, scrape_jobs,);
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,8 @@
 pub mod app_state;
 pub mod banner;
 pub mod bot;
+pub mod data;
 pub mod error;
+pub mod scraper;
 pub mod services;
 pub mod web;
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,11 +1,10 @@
-use serenity::all::{ClientBuilder, GatewayIntents};
+use serenity::all::{CacheHttp, ClientBuilder, GatewayIntents};
 use tokio::signal;
 use tracing::{error, info, warn};
 use tracing_subscriber::{EnvFilter, FmtSubscriber};

 use crate::app_state::AppState;
 use crate::banner::BannerApi;
-use crate::banner::scraper::CourseScraper;
 use crate::bot::{Data, get_commands};
 use crate::config::Config;
 use crate::services::manager::ServiceManager;
@@ -18,6 +17,7 @@ mod app_state;
 mod banner;
 mod bot;
 mod config;
+mod data;
 mod error;
 mod services;
 mod web;
@@ -79,14 +79,9 @@ async fn main() {
    let app_state = AppState::new(banner_api_arc.clone(), &config.redis_url)
        .expect("Failed to create AppState");

-    // Create CourseScraper for web service
-    let scraper = CourseScraper::new(banner_api_arc.clone(), &config.redis_url)
-        .expect("Failed to create CourseScraper");
-
    // Create BannerState for web service
    let banner_state = BannerState {
        api: banner_api_arc,
-        scraper: Arc::new(scraper),
    };

    // Configure the client with your Discord bot token in the environment
@@ -97,6 +92,50 @@ async fn main() {
    let framework = poise::Framework::builder()
        .options(poise::FrameworkOptions {
            commands: get_commands(),
+            pre_command: |ctx| {
+                Box::pin(async move {
+                    let content = match ctx {
+                        poise::Context::Application(_) => ctx.invocation_string(),
+                        poise::Context::Prefix(prefix) => prefix.msg.content.to_string(),
+                    };
+                    let channel_name = ctx
+                        .channel_id()
+                        .name(ctx.http())
+                        .await
+                        .unwrap_or("unknown".to_string());
+
+                    let span = tracing::Span::current();
+                    span.record("command_name", ctx.command().qualified_name.as_str());
+                    span.record("invocation", ctx.invocation_string());
+                    span.record("msg.content", content.as_str());
+                    span.record("msg.author", ctx.author().tag().as_str());
+                    span.record("msg.id", ctx.id());
+                    span.record("msg.channel_id", ctx.channel_id().get());
+                    span.record("msg.channel", &channel_name.as_str());
+
+                    tracing::info!(
+                        command_name = ctx.command().qualified_name.as_str(),
+                        invocation = ctx.invocation_string(),
+                        msg.content = %content,
+                        msg.author = %ctx.author().tag(),
+                        msg.author_id = %ctx.author().id,
+                        msg.id = %ctx.id(),
+                        msg.channel = %channel_name.as_str(),
+                        msg.channel_id = %ctx.channel_id(),
+                        "{} invoked by {}",
+                        ctx.command().name,
+                        ctx.author().tag()
+                    );
+                })
+            },
+            on_error: |error| {
+                Box::pin(async move {
+                    if let Err(e) = poise::builtins::on_error(error).await {
+                        tracing::error!("Fatal error while sending error message: {}", e);
+                    }
+                    // error!(error = ?error, "command error");
+                })
+            },
            ..Default::default()
        })
        .setup(move |ctx, _ready, framework| {
--- a/src/web/routes.rs
+++ b/src/web/routes.rs
@@ -5,11 +5,12 @@ use serde_json::{Value, json};
 use std::sync::Arc;
 use tracing::info;

+use crate::banner::BannerApi;
+
 /// Shared application state for web server
 #[derive(Clone)]
 pub struct BannerState {
-    pub api: Arc<crate::banner::BannerApi>,
-    pub scraper: Arc<crate::banner::scraper::CourseScraper>,
+    pub api: Arc<BannerApi>,
 }

 /// Creates the web server router
Author	SHA1	Message	Date
Xevion	f2bd02c970	chore: add bacon config	2025-08-29 12:10:57 -05:00
Xevion	8cdf969a53	feat: command logging, explicit builtin command error handler	2025-08-29 12:10:57 -05:00
Xevion	4764d48ac9	feat: move scraper into separate module, begin building data models	2025-08-29 11:07:46 -05:00
Xevion	e734e40347	feat: setup diesel & schema, course with metrics/audit tables	2025-08-27 18:57:43 -05:00