Adda debug field to processes that are managed by CrawlDaemon to make it easier to send them messages for debuggging, fixes restart bug for Indexer, a=chris

Chris Pollett [2019-07-08 03:Jul:th]
Adda debug field to processes that are managed by CrawlDaemon to make it easier to send them messages for debuggging, fixes restart bug for Indexer, a=chris
Filename
src/executables/Fetcher.php
src/executables/QueueServer.php
src/library/CrawlConstants.php
src/library/CrawlDaemon.php
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 9a6a5ed3f..88d48d3dd 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -402,6 +402,12 @@ class Fetcher implements CrawlConstants
      * @var array
      */
     public $proxy_servers;
+    /**
+     * Holds the value of a debug message that might have been sent from
+     * the command line during the current execution of loop();
+     * @var string
+     */
+    public $debug;
     /**
      * Before receiving any data from a queue server's web app this is
      * the default assumed post_max_size in bytes
@@ -485,6 +491,7 @@ class Fetcher implements CrawlConstants
         $this->crawl_order = self::PAGE_IMPORTANCE;
         $this->max_depth = -1;
         $this->summarizer_option = self::BASIC_SUMMARIZER;
+        $this->debug = "";
     }
     /**
      * Return the fetcher's copy of a page processor for the given
@@ -594,6 +601,11 @@ class Fetcher implements CrawlConstants
                 "/schedules/{$prefix}FetcherMessages.txt";
             if (file_exists($fetcher_message_file)) {
                 $info = unserialize(file_get_contents($fetcher_message_file));
+                if (isset($info[self::DEBUG])) {
+                    $this->debug = $info[self::DEBUG];
+                    unlink($message_file);
+                    continue;
+                }
                 unlink($fetcher_message_file);
                 if (isset($info[self::STATUS]) &&
                     $info[self::STATUS] == self::STOP_STATE) {
@@ -736,7 +748,11 @@ class Fetcher implements CrawlConstants
                     "Ensure minimum loop time by sleeping..." . $sleep_time);
                 sleep($sleep_time);
             }
-        } //end while
+            if (!empty($this->debug)) {
+                L\crawlLog("Debug Message: {$this->debug} has been processed");
+                $this->debug = "";
+            }
+        }//end while
         L\crawlLog("Fetcher shutting down!!");
     }
     /**
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 96a14b448..3daea00e0 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -34,12 +34,10 @@ use seekquarry\yioop\configs as C;
 use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\CrawlConstants;
 use seekquarry\yioop\library\CrawlDaemon;
-use seekquarry\yioop\library\FetchUrl;
 use seekquarry\yioop\library\IndexShard;
 use seekquarry\yioop\library\IndexArchiveBundle;
 use seekquarry\yioop\library\Join;
 use seekquarry\yioop\library\processors\PageProcessor;
-use seekquarry\yioop\library\PhraseParser;
 use seekquarry\yioop\library\DoubleIndexBundle;
 use seekquarry\yioop\library\UrlParser;
 use seekquarry\yioop\library\WebQueueBundle;
@@ -52,7 +50,7 @@ if (!defined("seekquarry\\yioop\\configs\\UNIT_TEST_MODE")) {
 }
 ini_set("memory_limit","2500M"); //so have enough memory to crawl big pages
 /**  For crawlHash function and Yioop constants */
-require_once __DIR__."/../library/Utility.php";
+require_once __DIR__ . "/../library/Utility.php";
 if (!C\PROFILE) {
     echo "Please configure the search engine instance ".
         "by visiting its web interface on localhost.\n";
@@ -284,6 +282,12 @@ class QueueServer implements CrawlConstants, Join
      * @var string
      */
     public $process_name;
+    /**
+     * Holds the value of a debug message that might have been sent from
+     * the command line during the current execution of loop();
+     * @var string
+     */
+    public $debug;
     /**
      * A mapping between class field names and parameters which might
      * be sent to a queue server via an info associative array.
@@ -348,6 +352,7 @@ class QueueServer implements CrawlConstants, Join
         $this->waiting_hosts = [];
         $this->server_name = "IndexerAndScheduler";
         $this->process_name = "0-QueueServer";
+        $this->debug = "";
     }
     /**
      * This is the function that should be called to get the queue server
@@ -518,13 +523,15 @@ class QueueServer implements CrawlConstants, Join
         static $first_check;
         static $last_check;
         $time = time();
-        if (!isset($first_check)) {
-            $first_check = $time;
-            $last_check = $time;
-        }
-        if ($time - $last_check < C\LOG_TIMEOUT ||
-            $time - $first_check < C\PROCESS_TIMEOUT ) {
-            return;
+        if ($this->debug != "NOT_RUNNING") {
+            if (!isset($first_check)) {
+                $first_check = $time;
+                $last_check = $time;
+            }
+            if ($time - $last_check < C\LOG_TIMEOUT ||
+                $time - $first_check < C\PROCESS_TIMEOUT ) {
+                return;
+            }
         }
         $last_check = $time;
         L\crawlLog("Checking if both processes still running ...");
@@ -534,7 +541,7 @@ class QueueServer implements CrawlConstants, Join
             $lines_to_check);
         $time = time(); // just in case took time to compute lines
         L\crawlLog("...Got " . $this->process_name . ".log lines");
-        if (count($lines) < $lines_to_check) {
+        if ($this->debug != "NOT_RUNNING" && count($lines) < $lines_to_check) {
             L\crawlLog("...Too few log lines to check if both processes " .
                 "running.  Assume still running.");
             return;
@@ -564,10 +571,12 @@ class QueueServer implements CrawlConstants, Join
         }
         L\crawlLog("...difference last timestamp and current time ".
             ($time - $last_process_timestamp));
-        if ($time - $last_process_timestamp < C\PROCESS_TIMEOUT ) {
+        if ($this->debug != "NOT_RUNNING" &&
+            $time - $last_process_timestamp < C\PROCESS_TIMEOUT ) {
             L\crawlLog("...done check. Both processes still running.");
             return;
         }
+        $this->debug = "";
         L\crawlLog( "$process seems to have died restarting...");
         $process_lines = array_slice($process_lines, -10);
         $time_string = date("r", time());
@@ -589,7 +598,7 @@ class QueueServer implements CrawlConstants, Join
         } else {
             file_put_contents($error_log, $out_msg, FILE_APPEND);
         }
-        $init_args = ["QueueServer.php", "start", $this->channel, $process];
+        $init_args = ["QueueServer.php", "start", "{$this->channel}", $process];
         L\crawlLog( "!!!!Writing to $error_log ".
             "crash message about $process...");
         CrawlDaemon::init($init_args, "QueueServer", -3);
@@ -620,7 +629,7 @@ class QueueServer implements CrawlConstants, Join
     {
         L\crawlLog("Repeating Crawl Check for Swap...");
         if (empty($this->repeat_type) || $this->repeat_type <= 0) {
-            L\crawlLog("...not a repeating crawl, no swap needed," .
+            L\crawlLog("SW...not a repeating crawl, no swap needed," .
                 "continuing crawl.");
             return;
         }
@@ -631,7 +640,7 @@ class QueueServer implements CrawlConstants, Join
             $this->crawl_time . ".txt";
         if ($this->isAScheduler() && file_exists($start_swap_file) &&
             !file_exists($finish_swap_file)) {
-            L\crawlLog("...performing scheduler swap activities.");
+            L\crawlLog("SW...performing scheduler swap activities.");
             // Delete everything associated with the queue
             $delete_files = [
                 C\CRAWL_DIR . "/cache/" . self::network_base_name .
@@ -672,23 +681,27 @@ class QueueServer implements CrawlConstants, Join
             DoubleIndexBundle::getStartSchedule($dir_name, $this->channel);
             file_put_contents($finish_swap_file, time());
             unlink($start_swap_file);
-            L\crawlLog("...done scheduler swap activities!!");
+            L\crawlLog("SW...done scheduler swap activities!!");
             return;
         }
-        if ($this->isAIndexer() && $this->index_archive->swapTimeReached()) {
+        if ($this->isAIndexer() && ($this->index_archive->swapTimeReached()
+            || $this->debug == 'FORCE_SWAP')) {
             if (!file_exists($start_swap_file) &&
                 !file_exists($finish_swap_file)) {
-                L\crawlLog("...swapping live and search crawl!!");
-                L\crawlLog("...writing StartSwap file for scheduler !!");
-                L\crawlLog("...indexer waits for scheduler to do swap");
+                L\crawlLog("SW...swapping live and search crawl!!");
+                L\crawlLog("SW...writing StartSwap file for scheduler !!");
+                L\crawlLog("SW...indexer waits for scheduler to do swap");
                 file_put_contents($start_swap_file, time());
             }
             if (!file_exists($start_swap_file) &&
                 file_exists($finish_swap_file)) {
-                L\crawlLog("...indexer performing swap activities");
+                L\crawlLog("SW...indexer performing swap activities");
                 $this->index_archive->swapActiveBundle();
                 unlink($finish_swap_file);
-                    L\crawlLog("...done indexer swap activities!!");
+                    L\crawlLog("SW...done indexer swap activities!!");
+            }
+            if ($this->debug == 'FORCE_SWAP') {
+                $this->debug = "";
             }
             return;
         }
@@ -773,7 +786,7 @@ class QueueServer implements CrawlConstants, Join
             case self::ARCHIVE_CRAWL:
                 if ($this->isAIndexer()) {
                     $this->processRecrawlRobotUrls();
-                    if (!file_exists(C\CRAWL_DIR."/schedules/".
+                    if (!file_exists(C\CRAWL_DIR . "/schedules/".
                         self::schedule_name . $this->crawl_time . ".txt")) {
                         $this->writeArchiveCrawlInfo();
                     }
@@ -919,6 +932,12 @@ class QueueServer implements CrawlConstants, Join
             "...");
         if (file_exists($message_file)) {
             $info = unserialize(file_get_contents($message_file));
+            if (isset($info[self::DEBUG])) {
+                $this->debug = $info[self::DEBUG];
+                L\crawlLog("The following debug message found: ". $this->debug);
+                unlink($message_file);
+                return $old_info;
+            }
             if (empty($info[$this->server_type])) {
                 $info[$this->server_type] = true;
                 if ($this->server_type == self::BOTH ||
@@ -946,6 +965,7 @@ class QueueServer implements CrawlConstants, Join
                         $this->stopCrawl();
                     }
                     $this->startCrawl($info);
+                    $info[self::STATUS] == self::CONTINUE_STATE;
                     if (!$is_scheduler) {
                         L\crawlLog("Starting new crawl. Timestamp:" .
                             $this->crawl_time);
@@ -1004,6 +1024,7 @@ class QueueServer implements CrawlConstants, Join
                             $this->stopCrawl();
                         }
                         $this->startCrawl($info);
+                        $info[self::STATUS] == self::CONTINUE_STATE;
                         if (!$is_scheduler) {
                             L\crawlLog("Resuming crawl");
                             $this->writeAdminMessage("RESUME_CRAWL");
@@ -1117,10 +1138,10 @@ class QueueServer implements CrawlConstants, Join
             $this->writeAdminMessage("SHUTDOWN_QUEUE");
         }
         if (!isset($this->web_queue->to_crawl_queue)) {
-            L\crawlLog("URL queue appears to be empty or null");
+            L\crawlLog("DQ URL queue appears to be empty or null");
             return;
         }
-        L\crawlLog("Writing queue contents back to schedules...");
+        L\crawlLog("DQ Writing queue contents back to schedules...");
         $dir = C\CRAWL_DIR . "/schedules/" . self::schedule_data_base_name .
             $this->crawl_time;
         if (!file_exists($dir)) {
@@ -1152,8 +1173,8 @@ class QueueServer implements CrawlConstants, Join
         $schedule_data[self::TO_CRAWL] = [];
         $fh = $this->web_queue->openUrlArchive();
         for ($time = 1; $time < $count; $time++) {
-            L\crawlTimeoutLog("..have written %s urls of %s urls so far", $time,
-                $count);
+            L\crawlTimeoutLog("DQ..have written %s urls of %s urls so far",
+                $time, $count);
             $tmp =  $this->web_queue->peekQueue($time, $fh);
             list($url, $weight, , ) = $tmp;
             // if queue error skip
@@ -1454,7 +1475,7 @@ class QueueServer implements CrawlConstants, Join
         $count = $this->web_queue->to_crawl_queue->count;
         $fh = $this->web_queue->openUrlArchive();
         for ($i = $count; $i > 0; $i--) {
-            L\crawlTimeoutLog("..Scheduler: Removing least url %s of %s ".
+            L\crawlTimeoutLog("CW..Scheduler: Removing least url %s of %s ".
                 "from queue.", ($count - $i), floor($count/2));
             $tmp = $this->web_queue->peekQueue($i, $fh);
             list($url, $weight, $flag, $probe) = $tmp;
@@ -1918,7 +1939,8 @@ class QueueServer implements CrawlConstants, Join
     {
         $memory_limit = L\metricToInt(ini_get("memory_limit"));
         $current_usage = memory_get_usage();
-        if ((0.7 * $memory_limit) < $current_usage) {
+        if ((0.7 * $memory_limit) < $current_usage ||
+            in_array($this->debug, ['EXCEED_MEMORY', 'EXCEED_MEMORY_HARD'])) {
             L\crawlLog("Indexer memory usage threshold exceeded!!!");
             L\crawlLog("...Threshold is: " . (0.7 * $memory_limit));
             L\crawlLog("...Current usage is: " . $current_usage);
@@ -1934,7 +1956,11 @@ class QueueServer implements CrawlConstants, Join
             $current_usage = memory_get_usage();
             L\crawlLog("Done index bundle reset, current memory usage is: ".
                 $current_usage);
-            if ((0.7 * $memory_limit) < $current_usage) {
+            if ($this->debug == 'EXCEED_MEMORY') {
+                $this->debug = "";
+            }
+            if ((0.7 * $memory_limit) < $current_usage ||
+                $this->debug == 'EXCEED_MEMORY_HARD') {
                 L\crawlLog("!!!Indexer usage still exceeds threshold, exiting");
                 exit();
             }
@@ -1991,9 +2017,9 @@ class QueueServer implements CrawlConstants, Join
                 $this->web_queue->addGotRobotTxtFilter($robot_host);
                 $scheme = UrlParser::getScheme($robot_host);
                 if ($scheme == "gopher") {
-                    $robot_url = $robot_host."/0/robots.txt";
+                    $robot_url = $robot_host . "/0/robots.txt";
                 } else {
-                    $robot_url = $robot_host."/robots.txt";
+                    $robot_url = $robot_host . "/robots.txt";
                 }
                 if ($this->web_queue->containsUrlQueue($robot_url)) {
                     L\crawlLog("Scheduler Removing $robot_url from queue");
@@ -2145,7 +2171,7 @@ class QueueServer implements CrawlConstants, Join
                     unset($this->waiting_hosts[$hash_host]);
                         //allows crawl-delayed host to be scheduled again
                 }
-                L\crawlLog("Scheduler Done removing host delayed for schedule ".
+                L\crawlLog("Scheduler done removing host delayed for schedule ".
                     $sites[self::SCHEDULE_TIME]);
                 $now = time(); /* no schedule should take more than one hour
                     on the other hand schedule data might be waiting for days
@@ -2449,9 +2475,9 @@ class QueueServer implements CrawlConstants, Join
     {
         $i = 1; // array implementation of priority queue starts at 1 not 0
         $fetch_size = 0;
-        L\crawlLog("Scheduler: Start Produce Fetch Batch.");
-        L\crawlLog("Crawl Time is: ". $this->crawl_time);
-        L\crawlLog("Memory usage is " . memory_get_usage() );
+        L\crawlLog("FB Scheduler: Start Produce Fetch Batch.");
+        L\crawlLog("FB Crawl Time is: ". $this->crawl_time);
+        L\crawlLog("FB Memory usage is " . memory_get_usage() );
         $count = $this->web_queue->to_crawl_queue->count;
         $schedule_time = time();
         $first_line = $this->calculateScheduleMetaInfo($schedule_time);
@@ -2461,7 +2487,7 @@ class QueueServer implements CrawlConstants, Join
         $time_per_request_guess = C\MINIMUM_FETCH_LOOP_TIME ;
             // it would be impressive if we can achieve this speed
         $current_crawl_index = -1;
-        L\crawlLog("Scheduler: Trying to Produce Fetch Batch; " .
+        L\crawlLog("FB Scheduler: Trying to Produce Fetch Batch; " .
             "Queue Size $count");
         $start_time = microtime(true);
         $fh = $this->web_queue->openUrlArchive();
@@ -2477,7 +2503,7 @@ class QueueServer implements CrawlConstants, Join
         $max_queue_size =  C\NUM_URLS_QUEUE_RAM -
             C\SEEN_URLS_BEFORE_UPDATE_SCHEDULER * $max_links;
         while ($i <= $count && $fetch_size < C\MAX_FETCH_SIZE) {
-            L\crawlTimeoutLog("..Scheduler: still producing fetch batch. ".
+            L\crawlTimeoutLog("FB..Scheduler: still producing fetch batch. ".
                 "Examining location %s in queue of %s.", $i, $count);
             //look in queue for url and its weight
             $tmp = $this->web_queue->peekQueue($i, $fh);
@@ -2485,7 +2511,7 @@ class QueueServer implements CrawlConstants, Join
             // if queue error remove entry any loop
             if ($tmp === false || strcmp($url, "LOOKUP ERROR") == 0) {
                 $delete_urls[$i] = false;
-                L\crawlLog("Scheduler: Removing lookup error at".
+                L\crawlLog("FB Scheduler: Removing lookup error at".
                     " $i during produce fetch");
                 $i++;
                 continue;
@@ -2672,17 +2698,17 @@ class QueueServer implements CrawlConstants, Join
         } //end while
         $this->web_queue->closeUrlArchive($fh);
         $new_time = microtime(true);
-        L\crawlLog("...Scheduler: Done selecting URLS for fetch batch time ".
+        L\crawlLog("FB...Scheduler: Done selecting URLS for fetch batch time ".
             "so far:". L\changeInMicrotime($start_time));
-        L\crawlLog("...Scheduler: Examined urls while making fetch batch:" .
+        L\crawlLog("FB...Scheduler: Examined urls while making fetch batch:" .
             ($i - 1));
-        L\crawlLog("...Scheduler: Number of waiting urls seen in queue:" .
+        L\crawlLog("FB...Scheduler: Number of waiting urls seen in queue:" .
             $num_waiting_urls);
         $num_deletes = count($delete_urls);
         $k = 0;
         foreach ($delete_urls as $delete_url) {
             $k++;
-            L\crawlTimeoutLog("..Scheduler: Removing selected url %s of %s ".
+            L\crawlTimeoutLog("FB..Scheduler: Removing selected url %s of %s ".
                 "from queue.", $k, $num_deletes);
             if ($delete_url) {
                 $this->web_queue->removeQueue($delete_url);
@@ -2692,7 +2718,7 @@ class QueueServer implements CrawlConstants, Join
                 $this->web_queue->to_crawl_queue->poll($k);
             }
         }
-        L\crawlLog("...Scheduler: Removed $k URLS for fetch batch from ".
+        L\crawlLog("FB...Scheduler: Removed $k URLS for fetch batch from ".
             "queue in time: " . L\changeInMicrotime($new_time));
         $new_time = microtime(true);
         if (isset($sites) && count($sites) > 0 ) {
@@ -2722,8 +2748,9 @@ class QueueServer implements CrawlConstants, Join
             $num_sites = count($sites);
             $k = 0;
             foreach ($sites as $site) {
-                L\crawlTimeoutLog("..Scheduler: Still Writing fetch schedule" .
-                    " %s of %s.", $k, $num_sites);
+                L\crawlTimeoutLog(
+                    "FB..Scheduler: Still Writing fetch schedule %s of %s.",
+                    $k, $num_sites);
                 $k++;
                 $extracted_etag = null;
                 list($url, $weight, $delay) = $site;
@@ -2777,17 +2804,17 @@ class QueueServer implements CrawlConstants, Join
                 fwrite($fh, $out_string);
             }
             fclose($fh);
-            L\crawlLog("...Scheduler: Sort URLS and write schedule time: ".
+            L\crawlLog("FB...Scheduler: Sort URLS and write schedule time: ".
                 L\changeInMicrotime($new_time));
-            L\crawlLog("Scheduler: End Produce Fetch Batch Memory usage".
+            L\crawlLog("FB Scheduler: End Produce Fetch Batch Memory usage: ".
                 memory_get_usage() );
-            L\crawlLog("Scheduler: Created fetch batch of size $num_sites.".
+            L\crawlLog("FB Scheduler: Created fetch batch of size $num_sites.".
                 " $num_deletes urls were deleted.".
                 " Queue size is now ". $this->web_queue->to_crawl_queue->count.
                 "...Total Time to create batch: ".
                 L\changeInMicrotime($start_time));
         } else {
-            L\crawlLog("Scheduler: No fetch batch created!! " .
+            L\crawlLog("FB Scheduler: No fetch batch created!! " .
                 "Time failing to make a fetch batch:" .
                 L\changeInMicrotime($start_time).". Loop properties:$i $count".
                 " $num_deletes urls were deleted in failed attempt.");
@@ -2795,9 +2822,9 @@ class QueueServer implements CrawlConstants, Join
             if ($num_deletes < 5 && $i >= $count &&
                     $count >= C\NUM_URLS_QUEUE_RAM -
                     C\SEEN_URLS_BEFORE_UPDATE_SCHEDULER * $max_links) {
-                L\crawlLog("Scheduler: Queue Full and Couldn't produce Fetch ".
-                    "Batch!! Or Delete any URLS!!!");
-                L\crawlLog("Scheduler: Rescheduling Queue Contents ".
+                L\crawlLog("FB Scheduler: Queue Full and Couldn't produce ".
+                    "Fetch Batch!! Or Delete any URLS!!!");
+                L\crawlLog("FB Scheduler: Rescheduling Queue Contents ".
                     "(not marking seen) to try to unjam!");
                 $this->dumpQueueToSchedules(true);
                 $this->clearWebQueue();
diff --git a/src/library/CrawlConstants.php b/src/library/CrawlConstants.php
index 52768208e..28f065b3f 100755
--- a/src/library/CrawlConstants.php
+++ b/src/library/CrawlConstants.php
@@ -111,6 +111,7 @@ interface CrawlConstants
     const HEIGHT = 'B';
     const WIDTH = 'C';
     const ROBOTS_TXT = 'D';
+    const DEBUG = "E";
     // codes available here
     const DOC_DEPTH = 'M';
     const DOC_RANK = 'N';
diff --git a/src/library/CrawlDaemon.php b/src/library/CrawlDaemon.php
index 654b8552a..b99d5f3bc 100644
--- a/src/library/CrawlDaemon.php
+++ b/src/library/CrawlDaemon.php
@@ -111,7 +111,7 @@ class CrawlDaemon implements CrawlConstants
             $name_string = CrawlDaemon::getNameString(self::$name,
                 self::$subname);
             if (($now - $time) > C\PROCESS_TIMEOUT) {
-                crawlLog($name_string.": ".($now - $time) .
+                crawlLog($name_string . ": ".($now - $time) .
                     " seconds has elapsed since processHandler last called.",
                     null, true);
                 crawlLog("Timeout exceeded...", null, true);
@@ -129,7 +129,7 @@ class CrawlDaemon implements CrawlConstants
      * Used to send a message the given daemon or run the program in the
      * foreground.
      *
-     * @param array $argv an array of command line arguments. The argument
+     * @param array $init_argv an array of command line arguments. The argument
      *     start will check if the process control functions exists if these
      *     do they will fork and detach a child process to act as a daemon.
      *     a lock file will be created to prevent additional daemons from
@@ -144,7 +144,7 @@ class CrawlDaemon implements CrawlConstants
      *     to see if already running before starting
      * @param string $use_message echo'd if incorrect parameters sent
      */
-    public static function init($argv, $name, $exit_type = 1,
+    public static function init($init_argv, $name, $exit_type = 1,
         $use_message = "")
     {
         $use_message = ($use_message) ? $use_message :
@@ -157,8 +157,8 @@ class CrawlDaemon implements CrawlConstants
             "Additional arguments are described in Yioop documentation.\n";

         self::$name = $name;
-        if (isset($argv[2]) && $argv[2] != "none") {
-            self::$subname = $argv[2];
+        if (isset($init_argv[2]) && $init_argv[2] != "none") {
+            self::$subname = $init_argv[2];
         } else {
             self::$subname = "";
         }
@@ -168,24 +168,51 @@ class CrawlDaemon implements CrawlConstants
             echo "BAD REQUEST";
             exit();
         }
-        if (!isset($argv[1])) {
+        if (!isset($init_argv[1])) {
             echo $use_message;
             exit();
         }
         $messages_file = self::getMesssageFileName(self::$name, self::$subname);
-        switch ($argv[1]) {
+        switch ($init_argv[1]) {
+            case "child":
+                self::$mode = 'daemon';
+                $info = [];
+                $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE;
+                if ($name != 'index') {
+                    file_put_contents($messages_file, serialize($info));
+                    chmod($messages_file, 0777);
+                }
+                $_SERVER["LOG_TO_FILES"] = true;
+                    // if false log messages are sent to the console
+                break;
+            case "debug":
+                $num_args = count($init_argv);
+                if ($num_args <= 3) {
+                    echo "Too few args. Might need to specify channel.\n";
+                } else if ($num_args > 3) {
+                    $last_arg = $init_argv[$num_args - 1];
+                    echo $messages_file;
+                    $info = [];
+                    $info[self::DEBUG] = $last_arg;
+                    file_put_contents($messages_file, serialize($info));
+                    chmod($messages_file, 0777);
+                }
+                exit();
             case "start":
                 $options = "";
                 $quote = (strstr(PHP_OS, "WIN")) ? '' : '"';
-                for ($i = 3; $i < count($argv); $i++) {
-                    $options .= $quote . $argv[$i]. $quote . " ";
+                for ($i = 3; $i < count($init_argv); $i++) {
+                    $options .= $quote . $init_argv[$i]. $quote . " ";
                 }
                 $options = trim($options);
-                $subname = (!isset($argv[2]) || $argv[2] == 'none') ?
+                $subname = (!isset($init_argv[2]) || $init_argv[2] == 'none') ?
                     'none' :self::$subname;
-                $name_prefix = (isset($argv[3])) ? $argv[3] : self::$subname;
+                $name_prefix = (isset($init_argv[3])) ? $init_argv[3] :
+                    self::$subname;
                 $name_string = CrawlDaemon::getNameString($name, $name_prefix);
-                echo "Starting $name_string...\n";
+                self::daemonLog("Starting $name_string...", $exit_type);
+                self::daemonLog("options: $name, $subname, $options",
+                    $exit_type);
                 CrawlDaemon::start($name, $subname, $options, $exit_type);
                 break;
             case "stop":
@@ -201,22 +228,31 @@ class CrawlDaemon implements CrawlConstants
                 }
                 $_SERVER["LOG_TO_FILES"] = false;
                 break;
-            case "child":
-                self::$mode = 'daemon';
-                $info = [];
-                $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE;
-                if ($name != 'index') {
-                    file_put_contents($messages_file, serialize($info));
-                    chmod($messages_file, 0777);
-                }
-                $_SERVER["LOG_TO_FILES"] = true;
-                    // if false log messages are sent to the console
-                break;
             default:
                 echo $use_message;
                 exit();
         }
     }
+    /**
+     * Used to print a log message in a way helpful to aid debugging
+     * CrawlDaemon tasks where crawlLog() might not yet be set up
+     * Sends the message to stardard out if crawlLog not set up; otherwise,
+     * sends to crawlLog()
+     *
+     * @param string $masg string to log to either standard out or
+     *  to Yioop's crawlLog
+     * @param int $exit_type the exit_type used by init() and start()
+     *  values of absolute value >2 are only used if crawlLog has
+     *  already been set up
+     */
+    public static function daemonLog($msg, $exit_type)
+    {
+        if (in_array($exit_type, [-2, -1, 0, 1, 2])) {
+            echo "$msg\n";
+        } else {
+            crawlLog($msg);
+        }
+    }
     /**
      * Used to start a daemon running in the background
      *
@@ -399,7 +435,7 @@ class CrawlDaemon implements CrawlConstants
      */
     public static function getNameString($name, $subname)
     {
-        return ($subname == "") ? $name : $subname . "-" . $name;
+        return ($subname === "") ? $name : $subname . "-" . $name;
     }
     /**
      * Returns the statuses of the running daemons
ViewGit