From dcc9618e1f88ef7de51d03d01d87739fc28c2b2e Mon Sep 17 00:00:00 2001 From: liquidpurple <67183976+liquidpurple@users.noreply.github.com> Date: Sat, 21 Mar 2026 14:49:14 -0700 Subject: [PATCH] Fix: stop_crawl() overwrites "completed" status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Race condition: the cleanup thread calls stop_crawl() after a crawl has already been marked "completed", changing its status back to "stopped". Added a status check before overwriting — if the crawl is already "completed", leave it alone. --- src/crawler.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/crawler.py b/src/crawler.py index ebf871f..79ba622 100644 --- a/src/crawler.py +++ b/src/crawler.py @@ -331,8 +331,11 @@ def stop_crawl(self): # Save final data to database if self.db_save_enabled and self.crawl_id: self._save_batch_to_db(force=True) - from src.crawl_db import set_crawl_status - set_crawl_status(self.crawl_id, 'stopped') + from src.crawl_db import get_crawl_by_id, set_crawl_status + crawl = get_crawl_by_id(self.crawl_id) + # Don't overwrite 'completed' status (e.g. from cleanup thread) + if crawl and crawl['status'] != 'completed': + set_crawl_status(self.crawl_id, 'stopped') # Clean up JavaScript resources if enabled if self.js_renderer: