feat: extract database operations module and add extensive test suite

This commit is contained in:
2026-01-28 17:32:27 -06:00
parent 992263205c
commit 1733ee5f86
14 changed files with 1539 additions and 80 deletions
+80
View File
@@ -102,3 +102,83 @@ impl JobType {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
// --- Valid dispatch ---
#[test]
fn test_from_target_subject_valid() {
let result =
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": "CS"}));
assert!(matches!(result, Ok(JobType::Subject(_))));
}
#[test]
fn test_from_target_subject_empty_string() {
let result =
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": ""}));
assert!(matches!(result, Ok(JobType::Subject(_))));
}
// --- Invalid JSON ---
#[test]
fn test_from_target_subject_missing_field() {
let result = JobType::from_target_type_and_payload(TargetType::Subject, json!({}));
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
}
#[test]
fn test_from_target_subject_wrong_type() {
let result =
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": 123}));
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
}
#[test]
fn test_from_target_subject_null_payload() {
let result = JobType::from_target_type_and_payload(TargetType::Subject, json!(null));
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
}
// --- Unsupported target types ---
#[test]
fn test_from_target_unsupported_variants() {
let unsupported = [
TargetType::CourseRange,
TargetType::CrnList,
TargetType::SingleCrn,
];
for target_type in unsupported {
let result =
JobType::from_target_type_and_payload(target_type, json!({"subject": "CS"}));
assert!(
matches!(result, Err(JobParseError::UnsupportedTargetType(_))),
"expected UnsupportedTargetType for {target_type:?}"
);
}
}
// --- Error Display ---
#[test]
fn test_job_parse_error_display() {
let invalid_json_err =
JobType::from_target_type_and_payload(TargetType::Subject, json!(null)).unwrap_err();
let display = invalid_json_err.to_string();
assert!(display.contains("Invalid JSON"), "got: {display}");
let unsupported_err =
JobType::from_target_type_and_payload(TargetType::CrnList, json!({})).unwrap_err();
let display = unsupported_err.to_string();
assert!(
display.contains("Unsupported target type"),
"got: {display}"
);
}
}
+12 -27
View File
@@ -1,5 +1,6 @@
use crate::banner::{BannerApi, Term};
use crate::data::models::{ScrapePriority, TargetType};
use crate::data::scrape_jobs;
use crate::error::Result;
use crate::scraper::jobs::subject::SubjectJob;
use serde_json::json;
@@ -123,21 +124,13 @@ impl Scheduler {
.collect();
// Query existing jobs for all subjects in a single query
let existing_jobs: Vec<(serde_json::Value,)> = sqlx::query_as(
"SELECT target_payload FROM scrape_jobs
WHERE target_type = $1 AND target_payload = ANY($2) AND locked_at IS NULL",
let existing_payloads = scrape_jobs::find_existing_job_payloads(
TargetType::Subject,
&subject_payloads,
db_pool,
)
.bind(TargetType::Subject)
.bind(&subject_payloads)
.fetch_all(db_pool)
.await?;
// Convert to a HashSet for efficient lookup
let existing_payloads: std::collections::HashSet<String> = existing_jobs
.into_iter()
.map(|(payload,)| payload.to_string())
.collect();
// Filter out subjects that already have jobs and prepare new jobs
let mut skipped_count = 0;
let new_jobs: Vec<_> = subjects
@@ -162,24 +155,16 @@ impl Scheduler {
// Insert all new jobs in a single batch
if !new_jobs.is_empty() {
let now = chrono::Utc::now();
let mut tx = db_pool.begin().await?;
for (payload, subject_code) in new_jobs {
sqlx::query(
"INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at) VALUES ($1, $2, $3, $4)"
)
.bind(TargetType::Subject)
.bind(&payload)
.bind(ScrapePriority::Low)
.bind(now)
.execute(&mut *tx)
.await?;
for (_, subject_code) in &new_jobs {
debug!(subject = subject_code, "New job enqueued for subject");
}
tx.commit().await?;
let jobs: Vec<_> = new_jobs
.into_iter()
.map(|(payload, _)| (payload, TargetType::Subject, ScrapePriority::Low))
.collect();
scrape_jobs::batch_insert_jobs(&jobs, db_pool).await?;
}
debug!("Job scheduling complete");
+5 -40
View File
@@ -1,5 +1,6 @@
use crate::banner::{BannerApi, BannerApiError};
use crate::data::models::ScrapeJob;
use crate::data::scrape_jobs;
use crate::error::Result;
use crate::scraper::jobs::{JobError, JobType};
use sqlx::PgPool;
@@ -83,24 +84,7 @@ impl Worker {
/// This uses a `FOR UPDATE SKIP LOCKED` query to ensure that multiple
/// workers can poll the queue concurrently without conflicts.
async fn fetch_and_lock_job(&self) -> Result<Option<ScrapeJob>> {
let mut tx = self.db_pool.begin().await?;
let job = sqlx::query_as::<_, ScrapeJob>(
"SELECT * FROM scrape_jobs WHERE locked_at IS NULL AND execute_at <= NOW() ORDER BY priority DESC, execute_at ASC LIMIT 1 FOR UPDATE SKIP LOCKED"
)
.fetch_optional(&mut *tx)
.await?;
if let Some(ref job) = job {
sqlx::query("UPDATE scrape_jobs SET locked_at = NOW() WHERE id = $1")
.bind(job.id)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
Ok(job)
scrape_jobs::fetch_and_lock_job(&self.db_pool).await
}
async fn process_job(&self, job: ScrapeJob) -> Result<(), JobError> {
@@ -139,34 +123,15 @@ impl Worker {
}
async fn delete_job(&self, job_id: i32) -> Result<()> {
sqlx::query("DELETE FROM scrape_jobs WHERE id = $1")
.bind(job_id)
.execute(&self.db_pool)
.await?;
Ok(())
scrape_jobs::delete_job(job_id, &self.db_pool).await
}
async fn unlock_job(&self, job_id: i32) -> Result<()> {
sqlx::query("UPDATE scrape_jobs SET locked_at = NULL WHERE id = $1")
.bind(job_id)
.execute(&self.db_pool)
.await?;
Ok(())
scrape_jobs::unlock_job(job_id, &self.db_pool).await
}
async fn unlock_and_increment_retry(&self, job_id: i32, max_retries: i32) -> Result<bool> {
let result = sqlx::query_scalar::<_, Option<i32>>(
"UPDATE scrape_jobs
SET locked_at = NULL, retry_count = retry_count + 1
WHERE id = $1
RETURNING CASE WHEN retry_count + 1 < $2 THEN retry_count + 1 ELSE NULL END",
)
.bind(job_id)
.bind(max_retries)
.fetch_one(&self.db_pool)
.await?;
Ok(result.is_some())
scrape_jobs::unlock_and_increment_retry(job_id, max_retries, &self.db_pool).await
}
/// Handle shutdown signal received during job processing