feat: extract database operations module and add extensive test suite

2026-01-31 02:23:34 -06:00 · 2026-01-28 17:32:27 -06:00
parent 992263205c
commit 1733ee5f86
14 changed files with 1539 additions and 80 deletions
@@ -0,0 +1,212 @@
+mod helpers;
+
+use banner::data::batch::batch_upsert_courses;
+use sqlx::PgPool;
+
+#[sqlx::test]
+async fn test_batch_upsert_empty_slice(pool: PgPool) {
+    batch_upsert_courses(&[], &pool).await.unwrap();
+
+    let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM courses")
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+
+    assert_eq!(count.0, 0);
+}
+
+#[sqlx::test]
+async fn test_batch_upsert_inserts_new_courses(pool: PgPool) {
+    let courses = vec![
+        helpers::make_course("10001", "202510", "CS", "1083", "Intro to CS", 25, 30, 0, 5),
+        helpers::make_course(
+            "10002",
+            "202510",
+            "MAT",
+            "1214",
+            "Calculus I",
+            40,
+            45,
+            3,
+            10,
+        ),
+    ];
+
+    batch_upsert_courses(&courses, &pool).await.unwrap();
+
+    let rows: Vec<(String, String, String, String, i32, i32, i32, i32)> = sqlx::query_as(
+        "SELECT crn, subject, course_number, title, enrollment, max_enrollment, wait_count, wait_capacity
+         FROM courses ORDER BY crn",
+    )
+    .fetch_all(&pool)
+    .await
+    .unwrap();
+
+    assert_eq!(rows.len(), 2);
+
+    let (crn, subject, course_number, title, enrollment, max_enrollment, wait_count, wait_capacity) =
+        &rows[0];
+    assert_eq!(crn, "10001");
+    assert_eq!(subject, "CS");
+    assert_eq!(course_number, "1083");
+    assert_eq!(title, "Intro to CS");
+    assert_eq!(*enrollment, 25);
+    assert_eq!(*max_enrollment, 30);
+    assert_eq!(*wait_count, 0);
+    assert_eq!(*wait_capacity, 5);
+
+    let (crn, subject, ..) = &rows[1];
+    assert_eq!(crn, "10002");
+    assert_eq!(subject, "MAT");
+}
+
+#[sqlx::test]
+async fn test_batch_upsert_updates_existing(pool: PgPool) {
+    let initial = vec![helpers::make_course(
+        "20001",
+        "202510",
+        "CS",
+        "3443",
+        "App Programming",
+        10,
+        35,
+        0,
+        5,
+    )];
+    batch_upsert_courses(&initial, &pool).await.unwrap();
+
+    // Upsert the same CRN+term with updated enrollment
+    let updated = vec![helpers::make_course(
+        "20001",
+        "202510",
+        "CS",
+        "3443",
+        "App Programming",
+        30,
+        35,
+        2,
+        5,
+    )];
+    batch_upsert_courses(&updated, &pool).await.unwrap();
+
+    let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM courses")
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    assert_eq!(count.0, 1, "upsert should not create a duplicate row");
+
+    let (enrollment, wait_count): (i32, i32) =
+        sqlx::query_as("SELECT enrollment, wait_count FROM courses WHERE crn = '20001'")
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert_eq!(enrollment, 30);
+    assert_eq!(wait_count, 2);
+}
+
+#[sqlx::test]
+async fn test_batch_upsert_mixed_insert_and_update(pool: PgPool) {
+    let initial = vec![
+        helpers::make_course("30001", "202510", "CS", "1083", "Intro to CS", 10, 30, 0, 5),
+        helpers::make_course(
+            "30002",
+            "202510",
+            "CS",
+            "2073",
+            "Computer Architecture",
+            20,
+            30,
+            0,
+            5,
+        ),
+    ];
+    batch_upsert_courses(&initial, &pool).await.unwrap();
+
+    // Update both existing courses and add a new one
+    let mixed = vec![
+        helpers::make_course("30001", "202510", "CS", "1083", "Intro to CS", 15, 30, 1, 5),
+        helpers::make_course(
+            "30002",
+            "202510",
+            "CS",
+            "2073",
+            "Computer Architecture",
+            25,
+            30,
+            0,
+            5,
+        ),
+        helpers::make_course(
+            "30003",
+            "202510",
+            "MAT",
+            "1214",
+            "Calculus I",
+            40,
+            45,
+            3,
+            10,
+        ),
+    ];
+    batch_upsert_courses(&mixed, &pool).await.unwrap();
+
+    let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM courses")
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    assert_eq!(count.0, 3, "should have 2 updated + 1 new = 3 total rows");
+
+    // Verify updated values
+    let (enrollment,): (i32,) =
+        sqlx::query_as("SELECT enrollment FROM courses WHERE crn = '30001'")
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert_eq!(enrollment, 15);
+
+    let (enrollment,): (i32,) =
+        sqlx::query_as("SELECT enrollment FROM courses WHERE crn = '30002'")
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert_eq!(enrollment, 25);
+
+    // Verify new row
+    let (subject,): (String,) = sqlx::query_as("SELECT subject FROM courses WHERE crn = '30003'")
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    assert_eq!(subject, "MAT");
+}
+
+#[sqlx::test]
+async fn test_batch_upsert_unique_constraint_crn_term(pool: PgPool) {
+    // Same CRN, different term codes → should produce two separate rows
+    let courses = vec![
+        helpers::make_course("40001", "202510", "CS", "1083", "Intro to CS", 25, 30, 0, 5),
+        helpers::make_course("40001", "202520", "CS", "1083", "Intro to CS", 10, 30, 0, 5),
+    ];
+
+    batch_upsert_courses(&courses, &pool).await.unwrap();
+
+    let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM courses WHERE crn = '40001'")
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    assert_eq!(
+        count.0, 2,
+        "same CRN with different term codes should be separate rows"
+    );
+
+    let rows: Vec<(String, i32)> = sqlx::query_as(
+        "SELECT term_code, enrollment FROM courses WHERE crn = '40001' ORDER BY term_code",
+    )
+    .fetch_all(&pool)
+    .await
+    .unwrap();
+
+    assert_eq!(rows[0].0, "202510");
+    assert_eq!(rows[0].1, 25);
+    assert_eq!(rows[1].0, "202520");
+    assert_eq!(rows[1].1, 10);
+}
@@ -0,0 +1,435 @@
+mod helpers;
+
+use banner::data::models::{ScrapePriority, TargetType};
+use banner::data::scrape_jobs;
+use serde_json::json;
+use sqlx::PgPool;
+
+// ── fetch_and_lock_job ──────────────────────────────────────────────
+
+#[sqlx::test]
+async fn fetch_and_lock_empty_queue(pool: PgPool) {
+    let result = scrape_jobs::fetch_and_lock_job(&pool).await.unwrap();
+    assert!(result.is_none());
+}
+
+#[sqlx::test]
+async fn fetch_and_lock_returns_job_and_sets_locked_at(pool: PgPool) {
+    let id = helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "CS"}),
+        ScrapePriority::Medium,
+        false,
+        0,
+        3,
+    )
+    .await;
+
+    let job = scrape_jobs::fetch_and_lock_job(&pool)
+        .await
+        .unwrap()
+        .expect("should return a job");
+
+    assert_eq!(job.id, id);
+    assert!(matches!(job.target_type, TargetType::Subject));
+    assert_eq!(job.target_payload, json!({"subject": "CS"}));
+
+    // Verify locked_at was set in the database
+    let (locked_at,): (Option<chrono::DateTime<chrono::Utc>>,) =
+        sqlx::query_as("SELECT locked_at FROM scrape_jobs WHERE id = $1")
+            .bind(id)
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert!(locked_at.is_some(), "locked_at should be set after fetch");
+}
+
+#[sqlx::test]
+async fn fetch_and_lock_skips_locked_jobs(pool: PgPool) {
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "CS"}),
+        ScrapePriority::Medium,
+        true, // locked
+        0,
+        3,
+    )
+    .await;
+
+    let result = scrape_jobs::fetch_and_lock_job(&pool).await.unwrap();
+    assert!(result.is_none(), "locked jobs should be skipped");
+}
+
+#[sqlx::test]
+async fn fetch_and_lock_skips_future_execute_at(pool: PgPool) {
+    // Insert a job with execute_at in the future via raw SQL
+    sqlx::query(
+        "INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at)
+         VALUES ('Subject', '{\"subject\": \"CS\"}', 'Medium', NOW() + INTERVAL '1 hour')",
+    )
+    .execute(&pool)
+    .await
+    .unwrap();
+
+    let result = scrape_jobs::fetch_and_lock_job(&pool).await.unwrap();
+    assert!(result.is_none(), "future execute_at jobs should be skipped");
+}
+
+#[sqlx::test]
+async fn fetch_and_lock_priority_desc_ordering(pool: PgPool) {
+    // Insert low priority first, then critical
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "LOW"}),
+        ScrapePriority::Low,
+        false,
+        0,
+        3,
+    )
+    .await;
+
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "CRIT"}),
+        ScrapePriority::Critical,
+        false,
+        0,
+        3,
+    )
+    .await;
+
+    let job = scrape_jobs::fetch_and_lock_job(&pool)
+        .await
+        .unwrap()
+        .expect("should return a job");
+
+    assert_eq!(
+        job.target_payload,
+        json!({"subject": "CRIT"}),
+        "Critical priority should be fetched before Low"
+    );
+}
+
+#[sqlx::test]
+async fn fetch_and_lock_execute_at_asc_ordering(pool: PgPool) {
+    // Insert an older job and a newer job, both same priority
+    sqlx::query(
+        "INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at)
+         VALUES ('Subject', '{\"subject\": \"OLDER\"}', 'Medium', NOW() - INTERVAL '2 hours')",
+    )
+    .execute(&pool)
+    .await
+    .unwrap();
+
+    sqlx::query(
+        "INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at)
+         VALUES ('Subject', '{\"subject\": \"NEWER\"}', 'Medium', NOW() - INTERVAL '1 hour')",
+    )
+    .execute(&pool)
+    .await
+    .unwrap();
+
+    let job = scrape_jobs::fetch_and_lock_job(&pool)
+        .await
+        .unwrap()
+        .expect("should return a job");
+
+    assert_eq!(
+        job.target_payload,
+        json!({"subject": "OLDER"}),
+        "Older execute_at should be fetched first"
+    );
+}
+
+// ── delete_job ──────────────────────────────────────────────────────
+
+#[sqlx::test]
+async fn delete_job_removes_row(pool: PgPool) {
+    let id = helpers::insert_scrape_job(
+        &pool,
+        TargetType::SingleCrn,
+        json!({"crn": "12345"}),
+        ScrapePriority::High,
+        false,
+        0,
+        3,
+    )
+    .await;
+
+    scrape_jobs::delete_job(id, &pool).await.unwrap();
+
+    let (count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM scrape_jobs WHERE id = $1")
+        .bind(id)
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    assert_eq!(count, 0, "row should be deleted");
+}
+
+#[sqlx::test]
+async fn delete_job_nonexistent_id_no_error(pool: PgPool) {
+    // Deleting a non-existent ID should not error
+    scrape_jobs::delete_job(999_999, &pool).await.unwrap();
+}
+
+// ── unlock_job ──────────────────────────────────────────────────────
+
+#[sqlx::test]
+async fn unlock_job_clears_locked_at(pool: PgPool) {
+    let id = helpers::insert_scrape_job(
+        &pool,
+        TargetType::CrnList,
+        json!({"crns": [1, 2, 3]}),
+        ScrapePriority::Medium,
+        true, // locked
+        0,
+        3,
+    )
+    .await;
+
+    scrape_jobs::unlock_job(id, &pool).await.unwrap();
+
+    let (locked_at,): (Option<chrono::DateTime<chrono::Utc>>,) =
+        sqlx::query_as("SELECT locked_at FROM scrape_jobs WHERE id = $1")
+            .bind(id)
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert!(locked_at.is_none(), "locked_at should be cleared");
+}
+
+// ── unlock_and_increment_retry ──────────────────────────────────────
+
+#[sqlx::test]
+async fn unlock_and_increment_retry_has_retries_remaining(pool: PgPool) {
+    let id = helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "CS"}),
+        ScrapePriority::Medium,
+        true,
+        0, // retry_count
+        3, // max_retries
+    )
+    .await;
+
+    let has_retries = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
+        .await
+        .unwrap();
+    assert!(has_retries, "should have retries remaining (0→1, max=3)");
+
+    // Verify state in DB
+    let (retry_count, locked_at): (i32, Option<chrono::DateTime<chrono::Utc>>) =
+        sqlx::query_as("SELECT retry_count, locked_at FROM scrape_jobs WHERE id = $1")
+            .bind(id)
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert_eq!(retry_count, 1);
+    assert!(locked_at.is_none(), "should be unlocked");
+}
+
+#[sqlx::test]
+async fn unlock_and_increment_retry_exhausted(pool: PgPool) {
+    let id = helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "CS"}),
+        ScrapePriority::Medium,
+        true,
+        2, // retry_count
+        3, // max_retries
+    )
+    .await;
+
+    let has_retries = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
+        .await
+        .unwrap();
+    assert!(
+        !has_retries,
+        "should NOT have retries remaining (2→3, max=3)"
+    );
+
+    let (retry_count,): (i32,) =
+        sqlx::query_as("SELECT retry_count FROM scrape_jobs WHERE id = $1")
+            .bind(id)
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert_eq!(retry_count, 3);
+}
+
+#[sqlx::test]
+async fn unlock_and_increment_retry_already_exceeded(pool: PgPool) {
+    let id = helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "CS"}),
+        ScrapePriority::Medium,
+        true,
+        5, // retry_count already past max
+        3, // max_retries
+    )
+    .await;
+
+    let has_retries = scrape_jobs::unlock_and_increment_retry(id, 3, &pool)
+        .await
+        .unwrap();
+    assert!(
+        !has_retries,
+        "should NOT have retries remaining (5→6, max=3)"
+    );
+
+    let (retry_count,): (i32,) =
+        sqlx::query_as("SELECT retry_count FROM scrape_jobs WHERE id = $1")
+            .bind(id)
+            .fetch_one(&pool)
+            .await
+            .unwrap();
+    assert_eq!(retry_count, 6);
+}
+
+// ── find_existing_job_payloads ──────────────────────────────────────
+
+#[sqlx::test]
+async fn find_existing_payloads_returns_matching(pool: PgPool) {
+    let payload_a = json!({"subject": "CS"});
+    let payload_b = json!({"subject": "MAT"});
+    let payload_c = json!({"subject": "ENG"});
+
+    // Insert A and B as Subject jobs
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        payload_a.clone(),
+        ScrapePriority::Medium,
+        false,
+        0,
+        3,
+    )
+    .await;
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        payload_b.clone(),
+        ScrapePriority::Medium,
+        false,
+        0,
+        3,
+    )
+    .await;
+    // Insert C as a different target type
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::SingleCrn,
+        payload_c.clone(),
+        ScrapePriority::Medium,
+        false,
+        0,
+        3,
+    )
+    .await;
+
+    let candidates = vec![payload_a.clone(), payload_b.clone(), payload_c.clone()];
+    let existing = scrape_jobs::find_existing_job_payloads(TargetType::Subject, &candidates, &pool)
+        .await
+        .unwrap();
+
+    assert!(existing.contains(&payload_a.to_string()));
+    assert!(existing.contains(&payload_b.to_string()));
+    // payload_c is SingleCrn, not Subject — should not match
+    assert!(!existing.contains(&payload_c.to_string()));
+}
+
+#[sqlx::test]
+async fn find_existing_payloads_ignores_locked(pool: PgPool) {
+    let payload = json!({"subject": "CS"});
+
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        payload.clone(),
+        ScrapePriority::Medium,
+        true, // locked
+        0,
+        3,
+    )
+    .await;
+
+    let candidates = vec![payload.clone()];
+    let existing = scrape_jobs::find_existing_job_payloads(TargetType::Subject, &candidates, &pool)
+        .await
+        .unwrap();
+
+    assert!(existing.is_empty(), "locked jobs should be ignored");
+}
+
+#[sqlx::test]
+async fn find_existing_payloads_empty_candidates(pool: PgPool) {
+    // Insert a job so the table isn't empty
+    helpers::insert_scrape_job(
+        &pool,
+        TargetType::Subject,
+        json!({"subject": "CS"}),
+        ScrapePriority::Medium,
+        false,
+        0,
+        3,
+    )
+    .await;
+
+    let existing = scrape_jobs::find_existing_job_payloads(TargetType::Subject, &[], &pool)
+        .await
+        .unwrap();
+
+    assert!(
+        existing.is_empty(),
+        "empty candidates should return empty result"
+    );
+}
+
+// ── batch_insert_jobs ───────────────────────────────────────────────
+
+#[sqlx::test]
+async fn batch_insert_jobs_inserts_multiple(pool: PgPool) {
+    let jobs = vec![
+        (
+            json!({"subject": "CS"}),
+            TargetType::Subject,
+            ScrapePriority::High,
+        ),
+        (
+            json!({"subject": "MAT"}),
+            TargetType::Subject,
+            ScrapePriority::Medium,
+        ),
+        (
+            json!({"crn": "12345"}),
+            TargetType::SingleCrn,
+            ScrapePriority::Low,
+        ),
+    ];
+
+    scrape_jobs::batch_insert_jobs(&jobs, &pool).await.unwrap();
+
+    let (count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM scrape_jobs")
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    assert_eq!(count, 3);
+}
+
+#[sqlx::test]
+async fn batch_insert_jobs_empty_slice(pool: PgPool) {
+    scrape_jobs::batch_insert_jobs(&[], &pool).await.unwrap();
+
+    let (count,): (i64,) = sqlx::query_as("SELECT COUNT(*) FROM scrape_jobs")
+        .fetch_one(&pool)
+        .await
+        .unwrap();
+    assert_eq!(count, 0);
+}
@@ -0,0 +1,88 @@
+use banner::banner::Course;
+use banner::data::models::{ScrapePriority, TargetType};
+use chrono::Utc;
+use sqlx::PgPool;
+
+/// Build a test `Course` (Banner API model) with sensible defaults.
+///
+/// Only the fields used by `batch_upsert_courses` need meaningful values;
+/// the rest are filled with harmless placeholders.
+pub fn make_course(
+    crn: &str,
+    term: &str,
+    subject: &str,
+    course_number: &str,
+    title: &str,
+    enrollment: i32,
+    max_enrollment: i32,
+    wait_count: i32,
+    wait_capacity: i32,
+) -> Course {
+    Course {
+        id: 0,
+        term: term.to_owned(),
+        term_desc: String::new(),
+        course_reference_number: crn.to_owned(),
+        part_of_term: "1".to_owned(),
+        course_number: course_number.to_owned(),
+        subject: subject.to_owned(),
+        subject_description: subject.to_owned(),
+        sequence_number: "001".to_owned(),
+        campus_description: "Main Campus".to_owned(),
+        schedule_type_description: "Lecture".to_owned(),
+        course_title: title.to_owned(),
+        credit_hours: Some(3),
+        maximum_enrollment: max_enrollment,
+        enrollment,
+        seats_available: max_enrollment - enrollment,
+        wait_capacity,
+        wait_count,
+        cross_list: None,
+        cross_list_capacity: None,
+        cross_list_count: None,
+        cross_list_available: None,
+        credit_hour_high: None,
+        credit_hour_low: None,
+        credit_hour_indicator: None,
+        open_section: enrollment < max_enrollment,
+        link_identifier: None,
+        is_section_linked: false,
+        subject_course: format!("{subject}{course_number}"),
+        reserved_seat_summary: None,
+        instructional_method: "FF".to_owned(),
+        instructional_method_description: "Face to Face".to_owned(),
+        section_attributes: vec![],
+        faculty: vec![],
+        meetings_faculty: vec![],
+    }
+}
+
+/// Insert a scrape job row directly via SQL, returning the generated ID.
+pub async fn insert_scrape_job(
+    pool: &PgPool,
+    target_type: TargetType,
+    payload: serde_json::Value,
+    priority: ScrapePriority,
+    locked: bool,
+    retry_count: i32,
+    max_retries: i32,
+) -> i32 {
+    let locked_at = if locked { Some(Utc::now()) } else { None };
+
+    let (id,): (i32,) = sqlx::query_as(
+        "INSERT INTO scrape_jobs (target_type, target_payload, priority, execute_at, locked_at, retry_count, max_retries)
+         VALUES ($1, $2, $3, NOW(), $4, $5, $6)
+         RETURNING id",
+    )
+    .bind(target_type)
+    .bind(payload)
+    .bind(priority)
+    .bind(locked_at)
+    .bind(retry_count)
+    .bind(max_retries)
+    .fetch_one(pool)
+    .await
+    .expect("insert_scrape_job failed");
+
+    id
+}