feat: move scraper into separate module, begin building data models

This commit is contained in:
2025-08-29 11:07:46 -05:00
parent e734e40347
commit 4764d48ac9
10 changed files with 146 additions and 329 deletions

View File

@@ -1,8 +1,9 @@
//! Diesel models for the database schema.
use crate::data::schema::{course_audits, course_metrics, courses};
use crate::data::schema::{course_audits, course_metrics, courses, scrape_jobs};
use chrono::{DateTime, Utc};
use diesel::{Insertable, Queryable, Selectable};
use diesel::{Insertable, Queryable, QueryableByName, Selectable};
use serde_json::Value;
#[derive(Queryable, Selectable)]
#[diesel(table_name = courses)]
@@ -78,3 +79,45 @@ pub struct NewCourseAudit<'a> {
pub old_value: &'a str,
pub new_value: &'a str,
}
/// The priority level of a scrape job.
#[derive(diesel_derive_enum::DbEnum, Copy, Debug, Clone)]
pub enum ScrapePriority {
Low,
Medium,
High,
Critical,
}
/// The type of target for a scrape job, determining how the payload is interpreted.
#[derive(diesel_derive_enum::DbEnum, Copy, Debug, Clone)]
pub enum TargetType {
Subject,
CourseRange,
CrnList,
SingleCrn,
}
/// Represents a queryable job from the database.
#[derive(Debug, Clone, Queryable, QueryableByName)]
#[diesel(table_name = scrape_jobs)]
pub struct ScrapeJob {
pub id: i32,
pub target_type: TargetType,
pub target_payload: Value,
pub priority: ScrapePriority,
pub execute_at: DateTime<Utc>,
pub created_at: DateTime<Utc>,
pub locked_at: Option<DateTime<Utc>>,
}
/// Represents a new job to be inserted into the database.
#[derive(Debug, Clone, Insertable)]
#[diesel(table_name = scrape_jobs)]
pub struct NewScrapeJob {
pub target_type: TargetType,
#[diesel(sql_type = diesel::sql_types::Jsonb)]
pub target_payload: Value,
pub priority: ScrapePriority,
pub execute_at: DateTime<Utc>,
}

View File

@@ -1,3 +1,30 @@
pub mod sql_types {
#[derive(diesel::sql_types::SqlType)]
#[diesel(postgres_type(name = "scrape_priority"))]
pub struct ScrapePriority;
#[derive(diesel::sql_types::SqlType)]
#[diesel(postgres_type(name = "target_type"))]
pub struct TargetType;
}
use super::models::{ScrapePriorityMapping, TargetTypeMapping};
diesel::table! {
use diesel::sql_types::*;
use super::{ScrapePriorityMapping, TargetTypeMapping};
scrape_jobs (id) {
id -> Int4,
target_type -> TargetTypeMapping,
target_payload -> Jsonb,
priority -> ScrapePriorityMapping,
execute_at -> Timestamptz,
created_at -> Timestamptz,
locked_at -> Nullable<Timestamptz>,
}
}
diesel::table! {
courses (id) {
id -> Int4,
@@ -39,4 +66,4 @@ diesel::table! {
diesel::joinable!(course_metrics -> courses (course_id));
diesel::joinable!(course_audits -> courses (course_id));
diesel::allow_tables_to_appear_in_same_query!(courses, course_metrics, course_audits,);
diesel::allow_tables_to_appear_in_same_query!(courses, course_metrics, course_audits, scrape_jobs,);