feat: implement worker timeout protection and crash recovery for job queue

Add JOB_TIMEOUT constant to fail stuck jobs after 5 minutes, and
LOCK_EXPIRY to reclaim abandoned locks after 10 minutes. Introduce
force_unlock_all to recover orphaned jobs at startup. Fix retry limit
off-by-one error and update deduplication to include locked jobs.
This commit is contained in:
2026-01-29 15:50:09 -06:00
parent db0ec1e69d
commit e880126281
4 changed files with 74 additions and 18 deletions
+8 -5
View File
@@ -241,7 +241,7 @@ async fn unlock_and_increment_retry_exhausted(pool: PgPool) {
json!({"subject": "CS"}),
ScrapePriority::Medium,
true,
2, // retry_count
3, // retry_count (already used all 3 retries)
3, // max_retries
)
.await;
@@ -251,7 +251,7 @@ async fn unlock_and_increment_retry_exhausted(pool: PgPool) {
.unwrap();
assert!(
!has_retries,
"should NOT have retries remaining (2→3, max=3)"
"should NOT have retries remaining (3→4, max=3)"
);
let (retry_count,): (i32,) =
@@ -260,7 +260,7 @@ async fn unlock_and_increment_retry_exhausted(pool: PgPool) {
.fetch_one(&pool)
.await
.unwrap();
assert_eq!(retry_count, 3);
assert_eq!(retry_count, 4);
}
#[sqlx::test]
@@ -346,7 +346,7 @@ async fn find_existing_payloads_returns_matching(pool: PgPool) {
}
#[sqlx::test]
async fn find_existing_payloads_ignores_locked(pool: PgPool) {
async fn find_existing_payloads_includes_locked(pool: PgPool) {
let payload = json!({"subject": "CS"});
helpers::insert_scrape_job(
@@ -365,7 +365,10 @@ async fn find_existing_payloads_ignores_locked(pool: PgPool) {
.await
.unwrap();
assert!(existing.is_empty(), "locked jobs should be ignored");
assert!(
existing.contains(&payload.to_string()),
"locked jobs should be included in deduplication"
);
}
#[sqlx::test]