Fix crash when Fetcher web_archive null, a=chris
Fix crash when Fetcher web_archive null, a=chris
diff --git a/src/controllers/ResourceController.php b/src/controllers/ResourceController.php
index 80a4ccc8f..b79966e77 100644
--- a/src/controllers/ResourceController.php
+++ b/src/controllers/ResourceController.php
@@ -186,7 +186,7 @@ class ResourceController extends Controller implements CrawlConstants
*/
public function getNameAndBaseFolder($is_src_folder = false)
{
- $name = $this->clean($_REQUEST['n'], "file_name");
+ $name = $this->clean($_REQUEST['n'] ?? "", "file_name");
$type = UrlParser::getDocumentType($name);
if (isset($_REQUEST['feed']) ||
(!empty($_REQUEST['t']) && $_REQUEST['t'] == 'feed')) {
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 1021c0053..34acbe75d 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -742,32 +742,37 @@ class Fetcher implements CrawlConstants
continue;
}
}
- switch ($this->crawl_type) {
- case self::WEB_CRAWL:
- $downloaded_pages = $this->downloadPagesWebCrawl();
- break;
- case self::ARCHIVE_CRAWL:
- if (isset($info[self::ARC_DATA])) {
- $downloaded_pages = $info[self::ARC_DATA];
- } else {
- $downloaded_pages = $this->downloadPagesArchiveCrawl();
- }
- break;
+ if (empty($this->web_archive)) {
+ L\crawlLog("Fetcher web_archive empty skipping page download");
+ } else {
+ switch ($this->crawl_type) {
+ case self::WEB_CRAWL:
+ $downloaded_pages = $this->downloadPagesWebCrawl();
+ break;
+ case self::ARCHIVE_CRAWL:
+ if (isset($info[self::ARC_DATA])) {
+ $downloaded_pages = $info[self::ARC_DATA];
+ } else {
+ $downloaded_pages =
+ $this->downloadPagesArchiveCrawl();
+ }
+ break;
+ }
+ if (isset($downloaded_pages["NO_PROCESS"])) {
+ unset($downloaded_pages["NO_PROCESS"]);
+ $summarized_site_pages = array_values($downloaded_pages);
+ $this->no_process_links = true;
+ } else {
+ $summarized_site_pages =
+ $this->processFetchPages($downloaded_pages);
+ $this->no_process_links = false;
+ }
+ L\crawlLog("Number of summarized pages ".
+ count($summarized_site_pages));
+ $force_send = (isset($info[self::END_ITERATOR]) &&
+ $info[self::END_ITERATOR]) ? true : false;
+ $this->updateFoundSites($summarized_site_pages, $force_send);
}
- if (isset($downloaded_pages["NO_PROCESS"])) {
- unset($downloaded_pages["NO_PROCESS"]);
- $summarized_site_pages = array_values($downloaded_pages);
- $this->no_process_links = true;
- } else{
- $summarized_site_pages =
- $this->processFetchPages($downloaded_pages);
- $this->no_process_links = false;
- }
- L\crawlLog("Number of summarized pages ".
- count($summarized_site_pages));
- $force_send = (isset($info[self::END_ITERATOR]) &&
- $info[self::END_ITERATOR]) ? true : false;
- $this->updateFoundSites($summarized_site_pages, $force_send);
$sleep_time = max(0, ceil($this->minimum_fetch_loop_time
- L\changeInMicrotime($start_time)));
if ($sleep_time > 0) {
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 05472dddd..3b2541a87 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -1997,9 +1997,9 @@ class QueueServer implements CrawlConstants, Join
if ((0.7 * $memory_limit) < $current_usage ||
in_array($this->debug, ['EXCEED_MEMORY', 'EXCEED_MEMORY_HARD'])) {
L\crawlLog("Indexer memory usage threshold exceeded!!!");
- L\crawlLog("...Threshold is: " . (0.7 * $memory_limit));
- L\crawlLog("...Current usage is: " . $current_usage);
- L\crawlLog("...Trying to free memory by resetting " .
+ L\crawlLog("...Indexer Threshold is: " . (0.7 * $memory_limit));
+ L\crawlLog("...Indexer Current usage is: " . $current_usage);
+ L\crawlLog("...Indexer trying to free memory by resetting " .
"index bundle.");
$this->index_archive->forceSave();
$this->index_archive = null;
diff --git a/src/library/StochasticTermSegmenter.php b/src/library/StochasticTermSegmenter.php
index 4d64658d9..089cfaa05 100644
--- a/src/library/StochasticTermSegmenter.php
+++ b/src/library/StochasticTermSegmenter.php
@@ -485,7 +485,8 @@ class StochasticTermSegmenter
}
$subdic = $subdic[$characters[$j]];
if (isset($subdic['$']) && (!isset($score[$j]) ||
- $score[$index - 1] + $subdic['$'] < $score[$j])) {
+ (isset($score[$index - 1]) &&
+ $score[$index - 1] + $subdic['$'] < $score[$j]))) {
$score[$j] = $score[$index - 1] +
$this->getScore($subdic['$']);
$path[$j] = $index - 1;
@@ -501,7 +502,8 @@ class StochasticTermSegmenter
}
$subdic = $subdic[$characters[$j]];
if (isset($subdic['$']) && (!isset($score[$j]) ||
- $score[$index - 1] + $subdic['$'] < $score[$j])) {
+ (isset($score[$index - 1]) &&
+ $score[$index - 1] + $subdic['$'] < $score[$j]))) {
$score[$j] = $score[$index - 1] +
$this->getScore($subdic['$']);
$path[$j] = $index - 1;