mirror of
https://github.com/Xevion/banner.git
synced 2026-01-31 04:23:34 -06:00
feat: extract database operations module and add extensive test suite
This commit is contained in:
@@ -102,3 +102,83 @@ impl JobType {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
// --- Valid dispatch ---
|
||||
|
||||
#[test]
|
||||
fn test_from_target_subject_valid() {
|
||||
let result =
|
||||
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": "CS"}));
|
||||
assert!(matches!(result, Ok(JobType::Subject(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_target_subject_empty_string() {
|
||||
let result =
|
||||
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": ""}));
|
||||
assert!(matches!(result, Ok(JobType::Subject(_))));
|
||||
}
|
||||
|
||||
// --- Invalid JSON ---
|
||||
|
||||
#[test]
|
||||
fn test_from_target_subject_missing_field() {
|
||||
let result = JobType::from_target_type_and_payload(TargetType::Subject, json!({}));
|
||||
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_target_subject_wrong_type() {
|
||||
let result =
|
||||
JobType::from_target_type_and_payload(TargetType::Subject, json!({"subject": 123}));
|
||||
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_target_subject_null_payload() {
|
||||
let result = JobType::from_target_type_and_payload(TargetType::Subject, json!(null));
|
||||
assert!(matches!(result, Err(JobParseError::InvalidJson(_))));
|
||||
}
|
||||
|
||||
// --- Unsupported target types ---
|
||||
|
||||
#[test]
|
||||
fn test_from_target_unsupported_variants() {
|
||||
let unsupported = [
|
||||
TargetType::CourseRange,
|
||||
TargetType::CrnList,
|
||||
TargetType::SingleCrn,
|
||||
];
|
||||
for target_type in unsupported {
|
||||
let result =
|
||||
JobType::from_target_type_and_payload(target_type, json!({"subject": "CS"}));
|
||||
assert!(
|
||||
matches!(result, Err(JobParseError::UnsupportedTargetType(_))),
|
||||
"expected UnsupportedTargetType for {target_type:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Error Display ---
|
||||
|
||||
#[test]
|
||||
fn test_job_parse_error_display() {
|
||||
let invalid_json_err =
|
||||
JobType::from_target_type_and_payload(TargetType::Subject, json!(null)).unwrap_err();
|
||||
let display = invalid_json_err.to_string();
|
||||
assert!(display.contains("Invalid JSON"), "got: {display}");
|
||||
|
||||
let unsupported_err =
|
||||
JobType::from_target_type_and_payload(TargetType::CrnList, json!({})).unwrap_err();
|
||||
let display = unsupported_err.to_string();
|
||||
assert!(
|
||||
display.contains("Unsupported target type"),
|
||||
"got: {display}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
+12
-27
@@ -1,5 +1,6 @@
|
||||
use crate::banner::{BannerApi, Term};
|
||||
use crate::data::models::{ScrapePriority, TargetType};
|
||||
use crate::data::scrape_jobs;
|
||||
use crate::error::Result;
|
||||
use crate::scraper::jobs::subject::SubjectJob;
|
||||
use serde_json::json;
|
||||
@@ -123,21 +124,13 @@ impl Scheduler {
|
||||
.collect();
|
||||
|
||||
// Query existing jobs for all subjects in a single query
|
||||
let existing_jobs: Vec<(serde_json::Value,)> = sqlx::query_as(
|
||||
"SELECT target_payload FROM scrape_jobs
|
||||
WHERE target_type = $1 AND target_payload = ANY($2) AND locked_at IS NULL",
|
||||
let existing_payloads = scrape_jobs::find_existing_job_payloads(
|
||||
TargetType::Subject,
|
||||
&subject_payloads,
|
||||
db_pool,
|
||||
)
|
||||
.bind(TargetType::Subject)
|
||||
.bind(&subject_payloads)
|
||||
.fetch_all(db_pool)
|
||||
.await?;
|
||||
|
||||
// Convert to a HashSet for efficient lookup
|
||||
let existing_payloads: std::collections::HashSet<String> = existing_jobs
|
||||
.into_iter()
|
||||
.map(|(payload,)| payload.to_string())
|
||||
.collect();
|
||||
|
||||
// Filter out subjects that already have jobs and prepare new jobs
|
||||
let mut skipped_count = 0;
|
||||
let new_jobs: Vec<_> = subjects
|
||||
@@ -162,24 +155,16 @@ impl Scheduler {
|
||||
|
||||
// Insert all new jobs in a single batch
|
||||
if !new_jobs.is_empty() {
|
||||
let now = chrono::Utc::now();
|
||||
let mut tx = db_pool.begin().await?;
|
||||
|
||||
for (payload, subject_code) in new_jobs {
|
||||
sqlx::query(
|
||||
"INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at) VALUES ($1, $2, $3, $4)"
|
||||
)
|
||||
.bind(TargetType::Subject)
|
||||
.bind(&payload)
|
||||
.bind(ScrapePriority::Low)
|
||||
.bind(now)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
for (_, subject_code) in &new_jobs {
|
||||
debug!(subject = subject_code, "New job enqueued for subject");
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
let jobs: Vec<_> = new_jobs
|
||||
.into_iter()
|
||||
.map(|(payload, _)| (payload, TargetType::Subject, ScrapePriority::Low))
|
||||
.collect();
|
||||
|
||||
scrape_jobs::batch_insert_jobs(&jobs, db_pool).await?;
|
||||
}
|
||||
|
||||
debug!("Job scheduling complete");
|
||||
|
||||
+5
-40
@@ -1,5 +1,6 @@
|
||||
use crate::banner::{BannerApi, BannerApiError};
|
||||
use crate::data::models::ScrapeJob;
|
||||
use crate::data::scrape_jobs;
|
||||
use crate::error::Result;
|
||||
use crate::scraper::jobs::{JobError, JobType};
|
||||
use sqlx::PgPool;
|
||||
@@ -83,24 +84,7 @@ impl Worker {
|
||||
/// This uses a `FOR UPDATE SKIP LOCKED` query to ensure that multiple
|
||||
/// workers can poll the queue concurrently without conflicts.
|
||||
async fn fetch_and_lock_job(&self) -> Result<Option<ScrapeJob>> {
|
||||
let mut tx = self.db_pool.begin().await?;
|
||||
|
||||
let job = sqlx::query_as::<_, ScrapeJob>(
|
||||
"SELECT * FROM scrape_jobs WHERE locked_at IS NULL AND execute_at <= NOW() ORDER BY priority DESC, execute_at ASC LIMIT 1 FOR UPDATE SKIP LOCKED"
|
||||
)
|
||||
.fetch_optional(&mut *tx)
|
||||
.await?;
|
||||
|
||||
if let Some(ref job) = job {
|
||||
sqlx::query("UPDATE scrape_jobs SET locked_at = NOW() WHERE id = $1")
|
||||
.bind(job.id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
|
||||
Ok(job)
|
||||
scrape_jobs::fetch_and_lock_job(&self.db_pool).await
|
||||
}
|
||||
|
||||
async fn process_job(&self, job: ScrapeJob) -> Result<(), JobError> {
|
||||
@@ -139,34 +123,15 @@ impl Worker {
|
||||
}
|
||||
|
||||
async fn delete_job(&self, job_id: i32) -> Result<()> {
|
||||
sqlx::query("DELETE FROM scrape_jobs WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.execute(&self.db_pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
scrape_jobs::delete_job(job_id, &self.db_pool).await
|
||||
}
|
||||
|
||||
async fn unlock_job(&self, job_id: i32) -> Result<()> {
|
||||
sqlx::query("UPDATE scrape_jobs SET locked_at = NULL WHERE id = $1")
|
||||
.bind(job_id)
|
||||
.execute(&self.db_pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
scrape_jobs::unlock_job(job_id, &self.db_pool).await
|
||||
}
|
||||
|
||||
async fn unlock_and_increment_retry(&self, job_id: i32, max_retries: i32) -> Result<bool> {
|
||||
let result = sqlx::query_scalar::<_, Option<i32>>(
|
||||
"UPDATE scrape_jobs
|
||||
SET locked_at = NULL, retry_count = retry_count + 1
|
||||
WHERE id = $1
|
||||
RETURNING CASE WHEN retry_count + 1 < $2 THEN retry_count + 1 ELSE NULL END",
|
||||
)
|
||||
.bind(job_id)
|
||||
.bind(max_retries)
|
||||
.fetch_one(&self.db_pool)
|
||||
.await?;
|
||||
|
||||
Ok(result.is_some())
|
||||
scrape_jobs::unlock_and_increment_retry(job_id, max_retries, &self.db_pool).await
|
||||
}
|
||||
|
||||
/// Handle shutdown signal received during job processing
|
||||
|
||||
Reference in New Issue
Block a user