Compare commits

...

2 Commits

10 changed files with 785 additions and 381 deletions

787
Cargo.lock generated
View File

File diff suppressed because it is too large Load Diff

View File

@@ -4,31 +4,31 @@ version = "0.1.0"
edition = "2024"
[dependencies]
tokio = { version = "1.47.1", features = ["full"] }
axum = "0.8.4"
serenity = { version = "0.12.4", features = ["rustls_backend"] }
reqwest = { version = "0.12.23", features = ["json", "cookies"] }
diesel = { version = "2.2.12", features = ["chrono", "postgres", "r2d2", "uuid", "serde_json"] }
redis = { version = "0.32.5", features = ["tokio-comp"] }
figment = { version = "0.10.19", features = ["toml", "env"] }
serde_json = "1.0.143"
serde = { version = "1.0.219", features = ["derive"] }
governor = "0.10.1"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
dotenvy = "0.15.7"
poise = "0.6.1"
async-trait = "0.1"
fundu = "2.0.1"
anyhow = "1.0.99"
thiserror = "2.0.16"
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = "0.8"
rand = "0.8"
regex = "1.10"
url = "2.5"
compile-time = "0.2.0"
time = "0.3.41"
async-trait = "0.1"
axum = "0.8.4"
bitflags = { version = "2.9.3", features = ["serde"] }
diesel_derives = "2.2.7"
diesel-derive-enum = { version = "2.1.0", features = ["postgres"] }
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = "0.10.4"
compile-time = "0.2.0"
dotenvy = "0.15.7"
figment = { version = "0.10.19", features = ["toml", "env"] }
fundu = "2.0.1"
governor = "0.10.1"
poise = "0.6.1"
rand = "0.9.2"
redis = { version = "0.32.5", features = ["tokio-comp", "r2d2"] }
regex = "1.10"
reqwest = { version = "0.12.23", features = ["json", "cookies"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.143"
serenity = { version = "0.12.4", features = ["rustls_backend"] }
sqlx = { version = "0.8.6", features = ["runtime-tokio-rustls", "postgres", "chrono", "json", "macros"] }
thiserror = "2.0.16"
time = "0.3.41"
tokio = { version = "1.47.1", features = ["full"] }
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["env-filter", "json"] }
url = "2.5"
[dev-dependencies]

View File

View File

@@ -1,6 +0,0 @@
-- This file was automatically created by Diesel to setup helper functions
-- and other internal bookkeeping. This file is safe to edit, any future
-- changes will be added to existing projects as new migrations.
DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
DROP FUNCTION IF EXISTS diesel_set_updated_at();

View File

@@ -1,36 +0,0 @@
-- This file was automatically created by Diesel to setup helper functions
-- and other internal bookkeeping. This file is safe to edit, any future
-- changes will be added to existing projects as new migrations.
-- Sets up a trigger for the given table to automatically set a column called
-- `updated_at` whenever the row is modified (unless `updated_at` was included
-- in the modified columns)
--
-- # Example
--
-- ```sql
-- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
--
-- SELECT diesel_manage_updated_at('users');
-- ```
CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
BEGIN
EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
BEGIN
IF (
NEW IS DISTINCT FROM OLD AND
NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
) THEN
NEW.updated_at := current_timestamp;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

View File

@@ -1,4 +0,0 @@
-- This file should undo anything in `up.sql`
DROP TABLE IF EXISTS "courses";
DROP TABLE IF EXISTS "course_metrics";
DROP TABLE IF EXISTS "course_audits";

View File

@@ -1,35 +0,0 @@
-- Your SQL goes here
CREATE TABLE "courses"(
"id" INT4 NOT NULL PRIMARY KEY,
"crn" VARCHAR NOT NULL,
"subject" VARCHAR NOT NULL,
"course_number" VARCHAR NOT NULL,
"title" VARCHAR NOT NULL,
"term_code" VARCHAR NOT NULL,
"enrollment" INT4 NOT NULL,
"max_enrollment" INT4 NOT NULL,
"wait_count" INT4 NOT NULL,
"wait_capacity" INT4 NOT NULL,
"last_scraped_at" TIMESTAMPTZ NOT NULL
);
CREATE TABLE "course_metrics"(
"id" INT4 NOT NULL PRIMARY KEY,
"course_id" INT4 NOT NULL,
"timestamp" TIMESTAMPTZ NOT NULL,
"enrollment" INT4 NOT NULL,
"wait_count" INT4 NOT NULL,
"seats_available" INT4 NOT NULL,
FOREIGN KEY ("course_id") REFERENCES "courses"("id")
);
CREATE TABLE "course_audits"(
"id" INT4 NOT NULL PRIMARY KEY,
"course_id" INT4 NOT NULL,
"timestamp" TIMESTAMPTZ NOT NULL,
"field_changed" VARCHAR NOT NULL,
"old_value" TEXT NOT NULL,
"new_value" TEXT NOT NULL,
FOREIGN KEY ("course_id") REFERENCES "courses"("id")
);

View File

@@ -0,0 +1,56 @@
-- Drop all old tables
DROP TABLE IF EXISTS scrape_jobs;
DROP TABLE IF EXISTS course_metrics;
DROP TABLE IF EXISTS course_audits;
DROP TABLE IF EXISTS courses;
-- Enums for scrape_jobs
CREATE TYPE scrape_priority AS ENUM ('Low', 'Medium', 'High', 'Critical');
CREATE TYPE target_type AS ENUM ('Subject', 'CourseRange', 'CrnList', 'SingleCrn');
-- Main course data table
CREATE TABLE courses (
id SERIAL PRIMARY KEY,
crn VARCHAR NOT NULL,
subject VARCHAR NOT NULL,
course_number VARCHAR NOT NULL,
title VARCHAR NOT NULL,
term_code VARCHAR NOT NULL,
enrollment INTEGER NOT NULL,
max_enrollment INTEGER NOT NULL,
wait_count INTEGER NOT NULL,
wait_capacity INTEGER NOT NULL,
last_scraped_at TIMESTAMPTZ NOT NULL,
UNIQUE(crn, term_code)
);
-- Time-series data for course enrollment
CREATE TABLE course_metrics (
id SERIAL PRIMARY KEY,
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
timestamp TIMESTAMPTZ NOT NULL,
enrollment INTEGER NOT NULL,
wait_count INTEGER NOT NULL,
seats_available INTEGER NOT NULL
);
-- Audit trail for changes to course data
CREATE TABLE course_audits (
id SERIAL PRIMARY KEY,
course_id INTEGER NOT NULL REFERENCES courses(id) ON DELETE CASCADE,
timestamp TIMESTAMPTZ NOT NULL,
field_changed VARCHAR NOT NULL,
old_value TEXT NOT NULL,
new_value TEXT NOT NULL
);
-- Job queue for the scraper
CREATE TABLE scrape_jobs (
id SERIAL PRIMARY KEY,
target_type target_type NOT NULL,
target_payload JSONB NOT NULL,
priority scrape_priority NOT NULL,
execute_at TIMESTAMPTZ NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
locked_at TIMESTAMPTZ
);

View File

@@ -1,12 +1,9 @@
//! Diesel models for the database schema.
//! `sqlx` models for the database schema.
use crate::data::schema::{course_audits, course_metrics, courses, scrape_jobs};
use chrono::{DateTime, Utc};
use diesel::{Insertable, Queryable, QueryableByName, Selectable};
use serde_json::Value;
#[derive(Queryable, Selectable)]
#[diesel(table_name = courses)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct Course {
pub id: i32,
pub crn: String,
@@ -21,24 +18,7 @@ pub struct Course {
pub last_scraped_at: DateTime<Utc>,
}
#[derive(Insertable)]
#[diesel(table_name = courses)]
pub struct NewCourse<'a> {
pub crn: &'a str,
pub subject: &'a str,
pub course_number: &'a str,
pub title: &'a str,
pub term_code: &'a str,
pub enrollment: i32,
pub max_enrollment: i32,
pub wait_count: i32,
pub wait_capacity: i32,
pub last_scraped_at: DateTime<Utc>,
}
#[derive(Queryable, Selectable)]
#[diesel(table_name = course_metrics)]
#[diesel(belongs_to(Course))]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseMetric {
pub id: i32,
pub course_id: i32,
@@ -48,19 +28,7 @@ pub struct CourseMetric {
pub seats_available: i32,
}
#[derive(Insertable)]
#[diesel(table_name = course_metrics)]
pub struct NewCourseMetric {
pub course_id: i32,
pub timestamp: DateTime<Utc>,
pub enrollment: i32,
pub wait_count: i32,
pub seats_available: i32,
}
#[derive(Queryable, Selectable)]
#[diesel(table_name = course_audits)]
#[diesel(belongs_to(Course))]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct CourseAudit {
pub id: i32,
pub course_id: i32,
@@ -70,18 +38,9 @@ pub struct CourseAudit {
pub new_value: String,
}
#[derive(Insertable)]
#[diesel(table_name = course_audits)]
pub struct NewCourseAudit<'a> {
pub course_id: i32,
pub timestamp: DateTime<Utc>,
pub field_changed: &'a str,
pub old_value: &'a str,
pub new_value: &'a str,
}
/// The priority level of a scrape job.
#[derive(diesel_derive_enum::DbEnum, Copy, Debug, Clone)]
#[derive(sqlx::Type, Copy, Debug, Clone)]
#[sqlx(type_name = "scrape_priority", rename_all = "PascalCase")]
pub enum ScrapePriority {
Low,
Medium,
@@ -90,7 +49,8 @@ pub enum ScrapePriority {
}
/// The type of target for a scrape job, determining how the payload is interpreted.
#[derive(diesel_derive_enum::DbEnum, Copy, Debug, Clone)]
#[derive(sqlx::Type, Copy, Debug, Clone)]
#[sqlx(type_name = "target_type", rename_all = "PascalCase")]
pub enum TargetType {
Subject,
CourseRange,
@@ -99,8 +59,7 @@ pub enum TargetType {
}
/// Represents a queryable job from the database.
#[derive(Debug, Clone, Queryable, QueryableByName)]
#[diesel(table_name = scrape_jobs)]
#[derive(sqlx::FromRow, Debug, Clone)]
pub struct ScrapeJob {
pub id: i32,
pub target_type: TargetType,
@@ -110,14 +69,3 @@ pub struct ScrapeJob {
pub created_at: DateTime<Utc>,
pub locked_at: Option<DateTime<Utc>>,
}
/// Represents a new job to be inserted into the database.
#[derive(Debug, Clone, Insertable)]
#[diesel(table_name = scrape_jobs)]
pub struct NewScrapeJob {
pub target_type: TargetType,
#[diesel(sql_type = diesel::sql_types::Jsonb)]
pub target_payload: Value,
pub priority: ScrapePriority,
pub execute_at: DateTime<Utc>,
}

View File

@@ -1,69 +1,69 @@
pub mod sql_types {
#[derive(diesel::sql_types::SqlType)]
#[diesel(postgres_type(name = "scrape_priority"))]
pub struct ScrapePriority;
// pub mod sql_types {
// #[derive(diesel::sql_types::SqlType)]
// #[diesel(postgres_type(name = "scrape_priority"))]
// pub struct ScrapePriority;
#[derive(diesel::sql_types::SqlType)]
#[diesel(postgres_type(name = "target_type"))]
pub struct TargetType;
}
// #[derive(diesel::sql_types::SqlType)]
// #[diesel(postgres_type(name = "target_type"))]
// pub struct TargetType;
// }
use super::models::{ScrapePriorityMapping, TargetTypeMapping};
// use super::models::{ScrapePriorityMapping, TargetTypeMapping};
diesel::table! {
use diesel::sql_types::*;
use super::{ScrapePriorityMapping, TargetTypeMapping};
// diesel::table! {
// use diesel::sql_types::*;
// use super::{ScrapePriorityMapping, TargetTypeMapping};
scrape_jobs (id) {
id -> Int4,
target_type -> TargetTypeMapping,
target_payload -> Jsonb,
priority -> ScrapePriorityMapping,
execute_at -> Timestamptz,
created_at -> Timestamptz,
locked_at -> Nullable<Timestamptz>,
}
}
// scrape_jobs (id) {
// id -> Int4,
// target_type -> TargetTypeMapping,
// target_payload -> Jsonb,
// priority -> ScrapePriorityMapping,
// execute_at -> Timestamptz,
// created_at -> Timestamptz,
// locked_at -> Nullable<Timestamptz>,
// }
// }
diesel::table! {
courses (id) {
id -> Int4,
crn -> Varchar,
subject -> Varchar,
course_number -> Varchar,
title -> Varchar,
term_code -> Varchar,
enrollment -> Int4,
max_enrollment -> Int4,
wait_count -> Int4,
wait_capacity -> Int4,
last_scraped_at -> Timestamptz,
}
}
// diesel::table! {
// courses (id) {
// id -> Int4,
// crn -> Varchar,
// subject -> Varchar,
// course_number -> Varchar,
// title -> Varchar,
// term_code -> Varchar,
// enrollment -> Int4,
// max_enrollment -> Int4,
// wait_count -> Int4,
// wait_capacity -> Int4,
// last_scraped_at -> Timestamptz,
// }
// }
diesel::table! {
course_metrics (id) {
id -> Int4,
course_id -> Int4,
timestamp -> Timestamptz,
enrollment -> Int4,
wait_count -> Int4,
seats_available -> Int4,
}
}
// diesel::table! {
// course_metrics (id) {
// id -> Int4,
// course_id -> Int4,
// timestamp -> Timestamptz,
// enrollment -> Int4,
// wait_count -> Int4,
// seats_available -> Int4,
// }
// }
diesel::table! {
course_audits (id) {
id -> Int4,
course_id -> Int4,
timestamp -> Timestamptz,
field_changed -> Varchar,
old_value -> Text,
new_value -> Text,
}
}
// diesel::table! {
// course_audits (id) {
// id -> Int4,
// course_id -> Int4,
// timestamp -> Timestamptz,
// field_changed -> Varchar,
// old_value -> Text,
// new_value -> Text,
// }
// }
diesel::joinable!(course_metrics -> courses (course_id));
diesel::joinable!(course_audits -> courses (course_id));
// diesel::joinable!(course_metrics -> courses (course_id));
// diesel::joinable!(course_audits -> courses (course_id));
diesel::allow_tables_to_appear_in_same_query!(courses, course_metrics, course_audits, scrape_jobs,);
// diesel::allow_tables_to_appear_in_same_query!(courses, course_metrics, course_audits, scrape_jobs,);