diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 9a6a5ed3f..88d48d3dd 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -402,6 +402,12 @@ class Fetcher implements CrawlConstants
* @var array
*/
public $proxy_servers;
+ /**
+ * Holds the value of a debug message that might have been sent from
+ * the command line during the current execution of loop();
+ * @var string
+ */
+ public $debug;
/**
* Before receiving any data from a queue server's web app this is
* the default assumed post_max_size in bytes
@@ -485,6 +491,7 @@ class Fetcher implements CrawlConstants
$this->crawl_order = self::PAGE_IMPORTANCE;
$this->max_depth = -1;
$this->summarizer_option = self::BASIC_SUMMARIZER;
+ $this->debug = "";
}
/**
* Return the fetcher's copy of a page processor for the given
@@ -594,6 +601,11 @@ class Fetcher implements CrawlConstants
"/schedules/{$prefix}FetcherMessages.txt";
if (file_exists($fetcher_message_file)) {
$info = unserialize(file_get_contents($fetcher_message_file));
+ if (isset($info[self::DEBUG])) {
+ $this->debug = $info[self::DEBUG];
+ unlink($message_file);
+ continue;
+ }
unlink($fetcher_message_file);
if (isset($info[self::STATUS]) &&
$info[self::STATUS] == self::STOP_STATE) {
@@ -736,7 +748,11 @@ class Fetcher implements CrawlConstants
"Ensure minimum loop time by sleeping..." . $sleep_time);
sleep($sleep_time);
}
- } //end while
+ if (!empty($this->debug)) {
+ L\crawlLog("Debug Message: {$this->debug} has been processed");
+ $this->debug = "";
+ }
+ }//end while
L\crawlLog("Fetcher shutting down!!");
}
/**
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 96a14b448..3daea00e0 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -34,12 +34,10 @@ use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\library\CrawlConstants;
use seekquarry\yioop\library\CrawlDaemon;
-use seekquarry\yioop\library\FetchUrl;
use seekquarry\yioop\library\IndexShard;
use seekquarry\yioop\library\IndexArchiveBundle;
use seekquarry\yioop\library\Join;
use seekquarry\yioop\library\processors\PageProcessor;
-use seekquarry\yioop\library\PhraseParser;
use seekquarry\yioop\library\DoubleIndexBundle;
use seekquarry\yioop\library\UrlParser;
use seekquarry\yioop\library\WebQueueBundle;
@@ -52,7 +50,7 @@ if (!defined("seekquarry\\yioop\\configs\\UNIT_TEST_MODE")) {
}
ini_set("memory_limit","2500M"); //so have enough memory to crawl big pages
/** For crawlHash function and Yioop constants */
-require_once __DIR__."/../library/Utility.php";
+require_once __DIR__ . "/../library/Utility.php";
if (!C\PROFILE) {
echo "Please configure the search engine instance ".
"by visiting its web interface on localhost.\n";
@@ -284,6 +282,12 @@ class QueueServer implements CrawlConstants, Join
* @var string
*/
public $process_name;
+ /**
+ * Holds the value of a debug message that might have been sent from
+ * the command line during the current execution of loop();
+ * @var string
+ */
+ public $debug;
/**
* A mapping between class field names and parameters which might
* be sent to a queue server via an info associative array.
@@ -348,6 +352,7 @@ class QueueServer implements CrawlConstants, Join
$this->waiting_hosts = [];
$this->server_name = "IndexerAndScheduler";
$this->process_name = "0-QueueServer";
+ $this->debug = "";
}
/**
* This is the function that should be called to get the queue server
@@ -518,13 +523,15 @@ class QueueServer implements CrawlConstants, Join
static $first_check;
static $last_check;
$time = time();
- if (!isset($first_check)) {
- $first_check = $time;
- $last_check = $time;
- }
- if ($time - $last_check < C\LOG_TIMEOUT ||
- $time - $first_check < C\PROCESS_TIMEOUT ) {
- return;
+ if ($this->debug != "NOT_RUNNING") {
+ if (!isset($first_check)) {
+ $first_check = $time;
+ $last_check = $time;
+ }
+ if ($time - $last_check < C\LOG_TIMEOUT ||
+ $time - $first_check < C\PROCESS_TIMEOUT ) {
+ return;
+ }
}
$last_check = $time;
L\crawlLog("Checking if both processes still running ...");
@@ -534,7 +541,7 @@ class QueueServer implements CrawlConstants, Join
$lines_to_check);
$time = time(); // just in case took time to compute lines
L\crawlLog("...Got " . $this->process_name . ".log lines");
- if (count($lines) < $lines_to_check) {
+ if ($this->debug != "NOT_RUNNING" && count($lines) < $lines_to_check) {
L\crawlLog("...Too few log lines to check if both processes " .
"running. Assume still running.");
return;
@@ -564,10 +571,12 @@ class QueueServer implements CrawlConstants, Join
}
L\crawlLog("...difference last timestamp and current time ".
($time - $last_process_timestamp));
- if ($time - $last_process_timestamp < C\PROCESS_TIMEOUT ) {
+ if ($this->debug != "NOT_RUNNING" &&
+ $time - $last_process_timestamp < C\PROCESS_TIMEOUT ) {
L\crawlLog("...done check. Both processes still running.");
return;
}
+ $this->debug = "";
L\crawlLog( "$process seems to have died restarting...");
$process_lines = array_slice($process_lines, -10);
$time_string = date("r", time());
@@ -589,7 +598,7 @@ class QueueServer implements CrawlConstants, Join
} else {
file_put_contents($error_log, $out_msg, FILE_APPEND);
}
- $init_args = ["QueueServer.php", "start", $this->channel, $process];
+ $init_args = ["QueueServer.php", "start", "{$this->channel}", $process];
L\crawlLog( "!!!!Writing to $error_log ".
"crash message about $process...");
CrawlDaemon::init($init_args, "QueueServer", -3);
@@ -620,7 +629,7 @@ class QueueServer implements CrawlConstants, Join
{
L\crawlLog("Repeating Crawl Check for Swap...");
if (empty($this->repeat_type) || $this->repeat_type <= 0) {
- L\crawlLog("...not a repeating crawl, no swap needed," .
+ L\crawlLog("SW...not a repeating crawl, no swap needed," .
"continuing crawl.");
return;
}
@@ -631,7 +640,7 @@ class QueueServer implements CrawlConstants, Join
$this->crawl_time . ".txt";
if ($this->isAScheduler() && file_exists($start_swap_file) &&
!file_exists($finish_swap_file)) {
- L\crawlLog("...performing scheduler swap activities.");
+ L\crawlLog("SW...performing scheduler swap activities.");
// Delete everything associated with the queue
$delete_files = [
C\CRAWL_DIR . "/cache/" . self::network_base_name .
@@ -672,23 +681,27 @@ class QueueServer implements CrawlConstants, Join
DoubleIndexBundle::getStartSchedule($dir_name, $this->channel);
file_put_contents($finish_swap_file, time());
unlink($start_swap_file);
- L\crawlLog("...done scheduler swap activities!!");
+ L\crawlLog("SW...done scheduler swap activities!!");
return;
}
- if ($this->isAIndexer() && $this->index_archive->swapTimeReached()) {
+ if ($this->isAIndexer() && ($this->index_archive->swapTimeReached()
+ || $this->debug == 'FORCE_SWAP')) {
if (!file_exists($start_swap_file) &&
!file_exists($finish_swap_file)) {
- L\crawlLog("...swapping live and search crawl!!");
- L\crawlLog("...writing StartSwap file for scheduler !!");
- L\crawlLog("...indexer waits for scheduler to do swap");
+ L\crawlLog("SW...swapping live and search crawl!!");
+ L\crawlLog("SW...writing StartSwap file for scheduler !!");
+ L\crawlLog("SW...indexer waits for scheduler to do swap");
file_put_contents($start_swap_file, time());
}
if (!file_exists($start_swap_file) &&
file_exists($finish_swap_file)) {
- L\crawlLog("...indexer performing swap activities");
+ L\crawlLog("SW...indexer performing swap activities");
$this->index_archive->swapActiveBundle();
unlink($finish_swap_file);
- L\crawlLog("...done indexer swap activities!!");
+ L\crawlLog("SW...done indexer swap activities!!");
+ }
+ if ($this->debug == 'FORCE_SWAP') {
+ $this->debug = "";
}
return;
}
@@ -773,7 +786,7 @@ class QueueServer implements CrawlConstants, Join
case self::ARCHIVE_CRAWL:
if ($this->isAIndexer()) {
$this->processRecrawlRobotUrls();
- if (!file_exists(C\CRAWL_DIR."/schedules/".
+ if (!file_exists(C\CRAWL_DIR . "/schedules/".
self::schedule_name . $this->crawl_time . ".txt")) {
$this->writeArchiveCrawlInfo();
}
@@ -919,6 +932,12 @@ class QueueServer implements CrawlConstants, Join
"...");
if (file_exists($message_file)) {
$info = unserialize(file_get_contents($message_file));
+ if (isset($info[self::DEBUG])) {
+ $this->debug = $info[self::DEBUG];
+ L\crawlLog("The following debug message found: ". $this->debug);
+ unlink($message_file);
+ return $old_info;
+ }
if (empty($info[$this->server_type])) {
$info[$this->server_type] = true;
if ($this->server_type == self::BOTH ||
@@ -946,6 +965,7 @@ class QueueServer implements CrawlConstants, Join
$this->stopCrawl();
}
$this->startCrawl($info);
+ $info[self::STATUS] == self::CONTINUE_STATE;
if (!$is_scheduler) {
L\crawlLog("Starting new crawl. Timestamp:" .
$this->crawl_time);
@@ -1004,6 +1024,7 @@ class QueueServer implements CrawlConstants, Join
$this->stopCrawl();
}
$this->startCrawl($info);
+ $info[self::STATUS] == self::CONTINUE_STATE;
if (!$is_scheduler) {
L\crawlLog("Resuming crawl");
$this->writeAdminMessage("RESUME_CRAWL");
@@ -1117,10 +1138,10 @@ class QueueServer implements CrawlConstants, Join
$this->writeAdminMessage("SHUTDOWN_QUEUE");
}
if (!isset($this->web_queue->to_crawl_queue)) {
- L\crawlLog("URL queue appears to be empty or null");
+ L\crawlLog("DQ URL queue appears to be empty or null");
return;
}
- L\crawlLog("Writing queue contents back to schedules...");
+ L\crawlLog("DQ Writing queue contents back to schedules...");
$dir = C\CRAWL_DIR . "/schedules/" . self::schedule_data_base_name .
$this->crawl_time;
if (!file_exists($dir)) {
@@ -1152,8 +1173,8 @@ class QueueServer implements CrawlConstants, Join
$schedule_data[self::TO_CRAWL] = [];
$fh = $this->web_queue->openUrlArchive();
for ($time = 1; $time < $count; $time++) {
- L\crawlTimeoutLog("..have written %s urls of %s urls so far", $time,
- $count);
+ L\crawlTimeoutLog("DQ..have written %s urls of %s urls so far",
+ $time, $count);
$tmp = $this->web_queue->peekQueue($time, $fh);
list($url, $weight, , ) = $tmp;
// if queue error skip
@@ -1454,7 +1475,7 @@ class QueueServer implements CrawlConstants, Join
$count = $this->web_queue->to_crawl_queue->count;
$fh = $this->web_queue->openUrlArchive();
for ($i = $count; $i > 0; $i--) {
- L\crawlTimeoutLog("..Scheduler: Removing least url %s of %s ".
+ L\crawlTimeoutLog("CW..Scheduler: Removing least url %s of %s ".
"from queue.", ($count - $i), floor($count/2));
$tmp = $this->web_queue->peekQueue($i, $fh);
list($url, $weight, $flag, $probe) = $tmp;
@@ -1918,7 +1939,8 @@ class QueueServer implements CrawlConstants, Join
{
$memory_limit = L\metricToInt(ini_get("memory_limit"));
$current_usage = memory_get_usage();
- if ((0.7 * $memory_limit) < $current_usage) {
+ if ((0.7 * $memory_limit) < $current_usage ||
+ in_array($this->debug, ['EXCEED_MEMORY', 'EXCEED_MEMORY_HARD'])) {
L\crawlLog("Indexer memory usage threshold exceeded!!!");
L\crawlLog("...Threshold is: " . (0.7 * $memory_limit));
L\crawlLog("...Current usage is: " . $current_usage);
@@ -1934,7 +1956,11 @@ class QueueServer implements CrawlConstants, Join
$current_usage = memory_get_usage();
L\crawlLog("Done index bundle reset, current memory usage is: ".
$current_usage);
- if ((0.7 * $memory_limit) < $current_usage) {
+ if ($this->debug == 'EXCEED_MEMORY') {
+ $this->debug = "";
+ }
+ if ((0.7 * $memory_limit) < $current_usage ||
+ $this->debug == 'EXCEED_MEMORY_HARD') {
L\crawlLog("!!!Indexer usage still exceeds threshold, exiting");
exit();
}
@@ -1991,9 +2017,9 @@ class QueueServer implements CrawlConstants, Join
$this->web_queue->addGotRobotTxtFilter($robot_host);
$scheme = UrlParser::getScheme($robot_host);
if ($scheme == "gopher") {
- $robot_url = $robot_host."/0/robots.txt";
+ $robot_url = $robot_host . "/0/robots.txt";
} else {
- $robot_url = $robot_host."/robots.txt";
+ $robot_url = $robot_host . "/robots.txt";
}
if ($this->web_queue->containsUrlQueue($robot_url)) {
L\crawlLog("Scheduler Removing $robot_url from queue");
@@ -2145,7 +2171,7 @@ class QueueServer implements CrawlConstants, Join
unset($this->waiting_hosts[$hash_host]);
//allows crawl-delayed host to be scheduled again
}
- L\crawlLog("Scheduler Done removing host delayed for schedule ".
+ L\crawlLog("Scheduler done removing host delayed for schedule ".
$sites[self::SCHEDULE_TIME]);
$now = time(); /* no schedule should take more than one hour
on the other hand schedule data might be waiting for days
@@ -2449,9 +2475,9 @@ class QueueServer implements CrawlConstants, Join
{
$i = 1; // array implementation of priority queue starts at 1 not 0
$fetch_size = 0;
- L\crawlLog("Scheduler: Start Produce Fetch Batch.");
- L\crawlLog("Crawl Time is: ". $this->crawl_time);
- L\crawlLog("Memory usage is " . memory_get_usage() );
+ L\crawlLog("FB Scheduler: Start Produce Fetch Batch.");
+ L\crawlLog("FB Crawl Time is: ". $this->crawl_time);
+ L\crawlLog("FB Memory usage is " . memory_get_usage() );
$count = $this->web_queue->to_crawl_queue->count;
$schedule_time = time();
$first_line = $this->calculateScheduleMetaInfo($schedule_time);
@@ -2461,7 +2487,7 @@ class QueueServer implements CrawlConstants, Join
$time_per_request_guess = C\MINIMUM_FETCH_LOOP_TIME ;
// it would be impressive if we can achieve this speed
$current_crawl_index = -1;
- L\crawlLog("Scheduler: Trying to Produce Fetch Batch; " .
+ L\crawlLog("FB Scheduler: Trying to Produce Fetch Batch; " .
"Queue Size $count");
$start_time = microtime(true);
$fh = $this->web_queue->openUrlArchive();
@@ -2477,7 +2503,7 @@ class QueueServer implements CrawlConstants, Join
$max_queue_size = C\NUM_URLS_QUEUE_RAM -
C\SEEN_URLS_BEFORE_UPDATE_SCHEDULER * $max_links;
while ($i <= $count && $fetch_size < C\MAX_FETCH_SIZE) {
- L\crawlTimeoutLog("..Scheduler: still producing fetch batch. ".
+ L\crawlTimeoutLog("FB..Scheduler: still producing fetch batch. ".
"Examining location %s in queue of %s.", $i, $count);
//look in queue for url and its weight
$tmp = $this->web_queue->peekQueue($i, $fh);
@@ -2485,7 +2511,7 @@ class QueueServer implements CrawlConstants, Join
// if queue error remove entry any loop
if ($tmp === false || strcmp($url, "LOOKUP ERROR") == 0) {
$delete_urls[$i] = false;
- L\crawlLog("Scheduler: Removing lookup error at".
+ L\crawlLog("FB Scheduler: Removing lookup error at".
" $i during produce fetch");
$i++;
continue;
@@ -2672,17 +2698,17 @@ class QueueServer implements CrawlConstants, Join
} //end while
$this->web_queue->closeUrlArchive($fh);
$new_time = microtime(true);
- L\crawlLog("...Scheduler: Done selecting URLS for fetch batch time ".
+ L\crawlLog("FB...Scheduler: Done selecting URLS for fetch batch time ".
"so far:". L\changeInMicrotime($start_time));
- L\crawlLog("...Scheduler: Examined urls while making fetch batch:" .
+ L\crawlLog("FB...Scheduler: Examined urls while making fetch batch:" .
($i - 1));
- L\crawlLog("...Scheduler: Number of waiting urls seen in queue:" .
+ L\crawlLog("FB...Scheduler: Number of waiting urls seen in queue:" .
$num_waiting_urls);
$num_deletes = count($delete_urls);
$k = 0;
foreach ($delete_urls as $delete_url) {
$k++;
- L\crawlTimeoutLog("..Scheduler: Removing selected url %s of %s ".
+ L\crawlTimeoutLog("FB..Scheduler: Removing selected url %s of %s ".
"from queue.", $k, $num_deletes);
if ($delete_url) {
$this->web_queue->removeQueue($delete_url);
@@ -2692,7 +2718,7 @@ class QueueServer implements CrawlConstants, Join
$this->web_queue->to_crawl_queue->poll($k);
}
}
- L\crawlLog("...Scheduler: Removed $k URLS for fetch batch from ".
+ L\crawlLog("FB...Scheduler: Removed $k URLS for fetch batch from ".
"queue in time: " . L\changeInMicrotime($new_time));
$new_time = microtime(true);
if (isset($sites) && count($sites) > 0 ) {
@@ -2722,8 +2748,9 @@ class QueueServer implements CrawlConstants, Join
$num_sites = count($sites);
$k = 0;
foreach ($sites as $site) {
- L\crawlTimeoutLog("..Scheduler: Still Writing fetch schedule" .
- " %s of %s.", $k, $num_sites);
+ L\crawlTimeoutLog(
+ "FB..Scheduler: Still Writing fetch schedule %s of %s.",
+ $k, $num_sites);
$k++;
$extracted_etag = null;
list($url, $weight, $delay) = $site;
@@ -2777,17 +2804,17 @@ class QueueServer implements CrawlConstants, Join
fwrite($fh, $out_string);
}
fclose($fh);
- L\crawlLog("...Scheduler: Sort URLS and write schedule time: ".
+ L\crawlLog("FB...Scheduler: Sort URLS and write schedule time: ".
L\changeInMicrotime($new_time));
- L\crawlLog("Scheduler: End Produce Fetch Batch Memory usage".
+ L\crawlLog("FB Scheduler: End Produce Fetch Batch Memory usage: ".
memory_get_usage() );
- L\crawlLog("Scheduler: Created fetch batch of size $num_sites.".
+ L\crawlLog("FB Scheduler: Created fetch batch of size $num_sites.".
" $num_deletes urls were deleted.".
" Queue size is now ". $this->web_queue->to_crawl_queue->count.
"...Total Time to create batch: ".
L\changeInMicrotime($start_time));
} else {
- L\crawlLog("Scheduler: No fetch batch created!! " .
+ L\crawlLog("FB Scheduler: No fetch batch created!! " .
"Time failing to make a fetch batch:" .
L\changeInMicrotime($start_time).". Loop properties:$i $count".
" $num_deletes urls were deleted in failed attempt.");
@@ -2795,9 +2822,9 @@ class QueueServer implements CrawlConstants, Join
if ($num_deletes < 5 && $i >= $count &&
$count >= C\NUM_URLS_QUEUE_RAM -
C\SEEN_URLS_BEFORE_UPDATE_SCHEDULER * $max_links) {
- L\crawlLog("Scheduler: Queue Full and Couldn't produce Fetch ".
- "Batch!! Or Delete any URLS!!!");
- L\crawlLog("Scheduler: Rescheduling Queue Contents ".
+ L\crawlLog("FB Scheduler: Queue Full and Couldn't produce ".
+ "Fetch Batch!! Or Delete any URLS!!!");
+ L\crawlLog("FB Scheduler: Rescheduling Queue Contents ".
"(not marking seen) to try to unjam!");
$this->dumpQueueToSchedules(true);
$this->clearWebQueue();
diff --git a/src/library/CrawlConstants.php b/src/library/CrawlConstants.php
index 52768208e..28f065b3f 100755
--- a/src/library/CrawlConstants.php
+++ b/src/library/CrawlConstants.php
@@ -111,6 +111,7 @@ interface CrawlConstants
const HEIGHT = 'B';
const WIDTH = 'C';
const ROBOTS_TXT = 'D';
+ const DEBUG = "E";
// codes available here
const DOC_DEPTH = 'M';
const DOC_RANK = 'N';
diff --git a/src/library/CrawlDaemon.php b/src/library/CrawlDaemon.php
index 654b8552a..b99d5f3bc 100644
--- a/src/library/CrawlDaemon.php
+++ b/src/library/CrawlDaemon.php
@@ -111,7 +111,7 @@ class CrawlDaemon implements CrawlConstants
$name_string = CrawlDaemon::getNameString(self::$name,
self::$subname);
if (($now - $time) > C\PROCESS_TIMEOUT) {
- crawlLog($name_string.": ".($now - $time) .
+ crawlLog($name_string . ": ".($now - $time) .
" seconds has elapsed since processHandler last called.",
null, true);
crawlLog("Timeout exceeded...", null, true);
@@ -129,7 +129,7 @@ class CrawlDaemon implements CrawlConstants
* Used to send a message the given daemon or run the program in the
* foreground.
*
- * @param array $argv an array of command line arguments. The argument
+ * @param array $init_argv an array of command line arguments. The argument
* start will check if the process control functions exists if these
* do they will fork and detach a child process to act as a daemon.
* a lock file will be created to prevent additional daemons from
@@ -144,7 +144,7 @@ class CrawlDaemon implements CrawlConstants
* to see if already running before starting
* @param string $use_message echo'd if incorrect parameters sent
*/
- public static function init($argv, $name, $exit_type = 1,
+ public static function init($init_argv, $name, $exit_type = 1,
$use_message = "")
{
$use_message = ($use_message) ? $use_message :
@@ -157,8 +157,8 @@ class CrawlDaemon implements CrawlConstants
"Additional arguments are described in Yioop documentation.\n";
self::$name = $name;
- if (isset($argv[2]) && $argv[2] != "none") {
- self::$subname = $argv[2];
+ if (isset($init_argv[2]) && $init_argv[2] != "none") {
+ self::$subname = $init_argv[2];
} else {
self::$subname = "";
}
@@ -168,24 +168,51 @@ class CrawlDaemon implements CrawlConstants
echo "BAD REQUEST";
exit();
}
- if (!isset($argv[1])) {
+ if (!isset($init_argv[1])) {
echo $use_message;
exit();
}
$messages_file = self::getMesssageFileName(self::$name, self::$subname);
- switch ($argv[1]) {
+ switch ($init_argv[1]) {
+ case "child":
+ self::$mode = 'daemon';
+ $info = [];
+ $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE;
+ if ($name != 'index') {
+ file_put_contents($messages_file, serialize($info));
+ chmod($messages_file, 0777);
+ }
+ $_SERVER["LOG_TO_FILES"] = true;
+ // if false log messages are sent to the console
+ break;
+ case "debug":
+ $num_args = count($init_argv);
+ if ($num_args <= 3) {
+ echo "Too few args. Might need to specify channel.\n";
+ } else if ($num_args > 3) {
+ $last_arg = $init_argv[$num_args - 1];
+ echo $messages_file;
+ $info = [];
+ $info[self::DEBUG] = $last_arg;
+ file_put_contents($messages_file, serialize($info));
+ chmod($messages_file, 0777);
+ }
+ exit();
case "start":
$options = "";
$quote = (strstr(PHP_OS, "WIN")) ? '' : '"';
- for ($i = 3; $i < count($argv); $i++) {
- $options .= $quote . $argv[$i]. $quote . " ";
+ for ($i = 3; $i < count($init_argv); $i++) {
+ $options .= $quote . $init_argv[$i]. $quote . " ";
}
$options = trim($options);
- $subname = (!isset($argv[2]) || $argv[2] == 'none') ?
+ $subname = (!isset($init_argv[2]) || $init_argv[2] == 'none') ?
'none' :self::$subname;
- $name_prefix = (isset($argv[3])) ? $argv[3] : self::$subname;
+ $name_prefix = (isset($init_argv[3])) ? $init_argv[3] :
+ self::$subname;
$name_string = CrawlDaemon::getNameString($name, $name_prefix);
- echo "Starting $name_string...\n";
+ self::daemonLog("Starting $name_string...", $exit_type);
+ self::daemonLog("options: $name, $subname, $options",
+ $exit_type);
CrawlDaemon::start($name, $subname, $options, $exit_type);
break;
case "stop":
@@ -201,22 +228,31 @@ class CrawlDaemon implements CrawlConstants
}
$_SERVER["LOG_TO_FILES"] = false;
break;
- case "child":
- self::$mode = 'daemon';
- $info = [];
- $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE;
- if ($name != 'index') {
- file_put_contents($messages_file, serialize($info));
- chmod($messages_file, 0777);
- }
- $_SERVER["LOG_TO_FILES"] = true;
- // if false log messages are sent to the console
- break;
default:
echo $use_message;
exit();
}
}
+ /**
+ * Used to print a log message in a way helpful to aid debugging
+ * CrawlDaemon tasks where crawlLog() might not yet be set up
+ * Sends the message to stardard out if crawlLog not set up; otherwise,
+ * sends to crawlLog()
+ *
+ * @param string $masg string to log to either standard out or
+ * to Yioop's crawlLog
+ * @param int $exit_type the exit_type used by init() and start()
+ * values of absolute value >2 are only used if crawlLog has
+ * already been set up
+ */
+ public static function daemonLog($msg, $exit_type)
+ {
+ if (in_array($exit_type, [-2, -1, 0, 1, 2])) {
+ echo "$msg\n";
+ } else {
+ crawlLog($msg);
+ }
+ }
/**
* Used to start a daemon running in the background
*
@@ -399,7 +435,7 @@ class CrawlDaemon implements CrawlConstants
*/
public static function getNameString($name, $subname)
{
- return ($subname == "") ? $name : $subname . "-" . $name;
+ return ($subname === "") ? $name : $subname . "-" . $name;
}
/**
* Returns the statuses of the running daemons