mirror of
https://github.com/Xevion/banner.git
synced 2026-01-31 08:23:35 -06:00
refactor: implement comprehensive graceful shutdown across all services
Implements graceful shutdown with broadcast channels and proper timeout handling for scraper workers, scheduler, bot service, and status update tasks. Introduces centralized shutdown utilities and improves service manager to handle parallel shutdown with per-service timeouts instead of shared timeout budgets. Key changes: - Add utils module with shutdown helper functions - Update ScraperService to return errors on shutdown failures - Refactor scheduler with cancellable work tasks and 5s grace period - Extract worker shutdown logic into helper methods for clarity - Add broadcast channel shutdown support to BotService and status task - Improve ServiceManager to shutdown services in parallel with individual timeouts
This commit is contained in:
+48
-66
@@ -30,28 +30,25 @@ impl Worker {
|
||||
|
||||
/// Runs the worker's main loop.
|
||||
pub async fn run(&self, mut shutdown_rx: broadcast::Receiver<()>) {
|
||||
info!(worker_id = self.id, "Worker started.");
|
||||
info!(worker_id = self.id, "Worker started");
|
||||
|
||||
loop {
|
||||
// Fetch and lock a job, racing against shutdown signal
|
||||
let job = tokio::select! {
|
||||
_ = shutdown_rx.recv() => {
|
||||
info!(worker_id = self.id, "Worker received shutdown signal");
|
||||
info!(worker_id = self.id, "Worker exiting gracefully");
|
||||
info!(worker_id = self.id, "Worker received shutdown signal, exiting gracefully");
|
||||
break;
|
||||
}
|
||||
result = self.fetch_and_lock_job() => {
|
||||
match result {
|
||||
Ok(Some(job)) => job,
|
||||
Ok(None) => {
|
||||
// No job found, wait for a bit before polling again
|
||||
trace!(worker_id = self.id, "No jobs available, waiting");
|
||||
time::sleep(Duration::from_secs(5)).await;
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(worker_id = self.id, error = ?e, "Failed to fetch job");
|
||||
// Wait before retrying to avoid spamming errors
|
||||
warn!(worker_id = self.id, error = ?e, "Failed to fetch job, waiting");
|
||||
time::sleep(Duration::from_secs(10)).await;
|
||||
continue;
|
||||
}
|
||||
@@ -65,63 +62,14 @@ impl Worker {
|
||||
// Process the job, racing against shutdown signal
|
||||
let process_result = tokio::select! {
|
||||
_ = shutdown_rx.recv() => {
|
||||
info!(worker_id = self.id, job_id, "Shutdown received during job processing");
|
||||
|
||||
// Unlock the job so it can be retried
|
||||
if let Err(e) = self.unlock_job(job_id).await {
|
||||
warn!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
error = ?e,
|
||||
"Failed to unlock job during shutdown"
|
||||
);
|
||||
} else {
|
||||
debug!(worker_id = self.id, job_id, "Job unlocked during shutdown");
|
||||
}
|
||||
|
||||
info!(worker_id = self.id, "Worker exiting gracefully");
|
||||
self.handle_shutdown_during_processing(job_id).await;
|
||||
break;
|
||||
}
|
||||
result = self.process_job(job) => {
|
||||
result
|
||||
}
|
||||
result = self.process_job(job) => result
|
||||
};
|
||||
|
||||
// Handle the job processing result
|
||||
match process_result {
|
||||
Ok(()) => {
|
||||
debug!(worker_id = self.id, job_id, "Job completed");
|
||||
// If successful, delete the job
|
||||
if let Err(delete_err) = self.delete_job(job_id).await {
|
||||
error!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
?delete_err,
|
||||
"Failed to delete job"
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(JobError::Recoverable(e)) => {
|
||||
self.handle_recoverable_error(job_id, e).await;
|
||||
}
|
||||
Err(JobError::Unrecoverable(e)) => {
|
||||
error!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
error = ?e,
|
||||
"Job corrupted, deleting"
|
||||
);
|
||||
// Parse errors are unrecoverable - delete the job
|
||||
if let Err(delete_err) = self.delete_job(job_id).await {
|
||||
error!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
?delete_err,
|
||||
"Failed to delete corrupted job"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.handle_job_result(job_id, process_result).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,24 +139,58 @@ impl Worker {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Handle shutdown signal received during job processing
|
||||
async fn handle_shutdown_during_processing(&self, job_id: i32) {
|
||||
info!(worker_id = self.id, job_id, "Shutdown received during job processing");
|
||||
|
||||
if let Err(e) = self.unlock_job(job_id).await {
|
||||
warn!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
error = ?e,
|
||||
"Failed to unlock job during shutdown"
|
||||
);
|
||||
} else {
|
||||
debug!(worker_id = self.id, job_id, "Job unlocked during shutdown");
|
||||
}
|
||||
|
||||
info!(worker_id = self.id, "Worker exiting gracefully");
|
||||
}
|
||||
|
||||
/// Handle the result of job processing
|
||||
async fn handle_job_result(&self, job_id: i32, result: Result<(), JobError>) {
|
||||
match result {
|
||||
Ok(()) => {
|
||||
debug!(worker_id = self.id, job_id, "Job completed successfully");
|
||||
if let Err(e) = self.delete_job(job_id).await {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete completed job");
|
||||
}
|
||||
}
|
||||
Err(JobError::Recoverable(e)) => {
|
||||
self.handle_recoverable_error(job_id, e).await;
|
||||
}
|
||||
Err(JobError::Unrecoverable(e)) => {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Job corrupted, deleting");
|
||||
if let Err(e) = self.delete_job(job_id).await {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to delete corrupted job");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle recoverable errors by logging appropriately and unlocking the job
|
||||
async fn handle_recoverable_error(&self, job_id: i32, e: anyhow::Error) {
|
||||
if let Some(BannerApiError::InvalidSession(_)) = e.downcast_ref::<BannerApiError>() {
|
||||
warn!(
|
||||
worker_id = self.id,
|
||||
job_id, "Invalid session detected. Forcing session refresh."
|
||||
job_id, "Invalid session detected, forcing session refresh"
|
||||
);
|
||||
} else {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to process job");
|
||||
}
|
||||
|
||||
if let Err(unlock_err) = self.unlock_job(job_id).await {
|
||||
error!(
|
||||
worker_id = self.id,
|
||||
job_id,
|
||||
?unlock_err,
|
||||
"Failed to unlock job"
|
||||
);
|
||||
if let Err(e) = self.unlock_job(job_id).await {
|
||||
error!(worker_id = self.id, job_id, error = ?e, "Failed to unlock job");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user