Gets rid of FetchController checkRestart code

Chris Pollett [2023-09-03 16:Sep:rd]

Gets rid of FetchController checkRestart code

Filename
src/controllers/FetchController.php
src/models/CrawlModel.php

diff --git a/src/controllers/FetchController.php b/src/controllers/FetchController.php
index c35805c07..b33e9cc9f 100755
--- a/src/controllers/FetchController.php
+++ b/src/controllers/FetchController.php
@@ -159,17 +159,7 @@ class FetchController extends Controller implements CrawlConstants
                     $upload_data, $messages_bundle);
             }
         }
-        if (empty($message)) {
-            /*  check if scheduler part of queue server went down
-                and needs to be restarted with current crawl time.
-                Idea is fetcher has recently spoken with name server
-                so knows the crawl time. queue server knows time
-                only by file messages never by making curl requests
-             */
-            $this->checkRestart(self::WEB_CRAWL);
-        } else {
-            $data['MESSAGE'] = $message;
-        }
+        $data['MESSAGE'] = $message;
         $this->displayView($view, $data);
     }
     /**
@@ -211,7 +201,6 @@ class FetchController extends Controller implements CrawlConstants
                 $fetch_pages = false;
                 $info = [];
             }
-            $this->checkRestart(self::ARCHIVE_CRAWL);
         } else {
             $fetch_pages = false;
             $info = [];
@@ -285,63 +274,6 @@ class FetchController extends Controller implements CrawlConstants
         $data['MESSAGE'] = $info_string;
         $this->displayView($view, $data);
     }
-    /**
-     * Checks if the queue server crawl needs to be restarted
-     * Called when a fetcher sends info that invokes the FetchController's
-     * update method (on sending schedule, index, robot, etag, etc data).
-     * If the expected to be running crawl is closed on this queue server,
-     * and the check_crawl_time (last time fetcher checked name server to see
-     * what the active crawl was) is more recent than the time at which
-     * it was closed, restart the crawl on the current queue server.
-     *
-     * @param string $crawl_type if it does use restart the crawl as a crawl
-     *     of this type. For example, self::WEB_CRAWL or self::ARCHIVE_CRAWL
-     */
-    public function checkRestart($crawl_type)
-    {
-        if (isset($_REQUEST['crawl_time'])) {;
-            $crawl_time = substr($this->clean($_REQUEST['crawl_time'], 'int'),
-                0, C\TIMESTAMP_LEN);
-            if (isset($_REQUEST['check_crawl_time'])) {
-                $check_crawl_time = substr($this->clean(
-                    $_REQUEST['check_crawl_time'], 'int'), 0, C\TIMESTAMP_LEN);
-            }
-        } else {
-            $crawl_time = 0;
-            $check_crawl_time = 0;
-        }
-        $channel = $this->getChannel();
-        $index_schedule_file = C\SCHEDULES_DIR . "/" .
-            self::index_closed_name . $crawl_time . ".txt";
-        $queue_server_messages_file_name = CrawlDaemon::getMessageFileName(
-            "QueueServer", $channel);
-        if ($crawl_time > 0 && file_exists($index_schedule_file) &&
-            $check_crawl_time > intval(fileatime($index_schedule_file)) &&
-            !file_exists($queue_server_messages_file_name) ) {
-            $restart = true;
-            if (file_exists($this->crawl_status_file_name)) {
-                $crawl_status = unserialize(file_get_contents(
-                    $this->crawl_status_file_name));
-                if (!empty($crawl_status['CRAWL_TIME'])) {
-                    $restart = false;
-                }
-            }
-            if ($restart == true && file_exists(C\CACHE_DIR . '/'.
-                self::index_data_base_name . $crawl_time)) {
-                $crawl_params = [];
-                $crawl_params[self::STATUS] = "RESUME_CRAWL";
-                $crawl_params[self::CRAWL_TIME] = $crawl_time;
-                $crawl_params[self::CRAWL_TYPE] = $crawl_type;
-                /*
-                    we only set crawl time. Other data such as allowed sites
-                    should come from index.
-                */
-                $this->model("crawl")->sendStartCrawlMessage($crawl_params,
-                    fetcher_queue_server_ratio: $this->model("machine")
-                        ->getFetchersQueueServerRatio());
-            }
-        }
-    }
     /**
      * Processes Robot, To Crawl, and Index data sent from a fetcher
      * Acknowledge to the fetcher if this data was received okay.
@@ -364,9 +296,6 @@ class FetchController extends Controller implements CrawlConstants
         }
         $crawl_type = $this->clean($_REQUEST['crawl_type'] ?? "", 'string');
         $arc_type = $this->clean($_REQUEST['arc_type'] ?? "", 'string');
-        if (!empty($crawl_type)) {
-            $this->checkRestart($crawl_type);
-        }
         $crawl_time = substr($this->clean($_REQUEST['crawl_time'] ?? "0",'int'),
                 0, C\TIMESTAMP_LEN);
         if ($part_flag &&
diff --git a/src/models/CrawlModel.php b/src/models/CrawlModel.php
index 895deb5fc..fe01bcc7f 100755
--- a/src/models/CrawlModel.php
+++ b/src/models/CrawlModel.php
@@ -1358,7 +1358,7 @@ EOT;
      * one field of which has the boolean data concerning stalled statis
      *
      * @param string $data_field field of $stall_statuses to use for data
-     *     if null then each element of $stall_statuses is a wen encoded
+     *     if null then each element of $stall_statuses is a web encoded
      *     serialized boolean
      * @return array
      */

ViewGit