diff --git a/src/configs/Config.php b/src/configs/Config.php
index aa2be2f1c..4fe1275b3 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -172,7 +172,7 @@ nsdefine('MIN_AD_VERSION', 36);
nsdefine('RESOURCES_WIKI_VERSION', 5);
/**
* nsdefine's the BASE_URL constant for this script
- * if run from the command line as part of index.php HTTP server scrip
+ * if run from the command line as part of index.php HTTP server script
* set the current working directory as well
*/
function initializeBaseUrlAndCurrentWorkingDirectory()
@@ -224,14 +224,17 @@ function initializeBaseUrlAndCurrentWorkingDirectory()
$port = ( ($http == "http://" && ($server_port != 80) ||
($http == "https://" && $server_port != 443))) ?
":" . $server_port : "";
- if (nsdefined('SERVER_CONTEXT')) {;
- $context = SERVER_CONTEXT;
- if (!empty($context['SERVER_NAME'])) {
- $_SERVER['SERVER_NAME'] = $context['SERVER_NAME'];
- }
+ if (nsdefined('SERVER_CONTEXT')) {
+ $context = SERVER_CONTEXT;
+ if (!empty($context['SERVER_NAME'])) {
+ $_SERVER['SERVER_NAME'] = $context['SERVER_NAME'];
}
+ }
$server_name = isset($_SERVER['SERVER_NAME']) ? $_SERVER['SERVER_NAME'] :
"localhost";
+ if (nsdefined('NAME_SERVER') && NAME_SERVER == "www." . $server_name) {
+ $server_name = NAME_SERVER;
+ }
if (strpos($server_name, ":") !== false && $server_name[0] != '[') {
$server_name = "[$server_name]"; //guessing ipv6 address
}
@@ -266,7 +269,6 @@ if (file_exists(BASE_DIR . "/configs/LocalConfig.php")) {
way to set work directory) */
require_once(BASE_DIR . "/configs/LocalConfig.php");
}
-initializeBaseUrlAndCurrentWorkingDirectory();
/** Yioop Namespace*/
nsdefine('NS', "seekquarry\\yioop\\");
/** controllers sub-namespace */
@@ -346,7 +348,9 @@ nsconddefine('MAINTENANCE_MESSAGE', <<<EOD
This Yioop! installation is undergoing maintenance, please come back later!
EOD
);
-if (MAINTENANCE_MODE && $_SERVER["SERVER_ADDR"] != $_SERVER["REMOTE_ADDR"]) {
+if (MAINTENANCE_MODE && !empty($_SERVER["SERVER_ADDR"]) &&
+ !empty($_SERVER["REMOTE_ADDR"]) &&
+ $_SERVER["SERVER_ADDR"] != $_SERVER["REMOTE_ADDR"]) {
echo MAINTENANCE_MESSAGE;
exit();
}
@@ -408,6 +412,7 @@ if (file_exists(WORK_DIRECTORY . PROFILE_FILE_NAME)) {
file_put_contents(WORK_DIRECTORY . PROFILE_FILE_NAME, $new_profile);
}
require_once WORK_DIRECTORY . PROFILE_FILE_NAME;
+ initializeBaseUrlAndCurrentWorkingDirectory();
nsdefine('PROFILE', true);
nsdefine('CRAWL_DIR', WORK_DIRECTORY);
if (is_dir(APP_DIR."/locale")) {
@@ -430,6 +435,7 @@ if (file_exists(WORK_DIRECTORY . PROFILE_FILE_NAME)) {
nsdefine("FIX_NAME_SERVER", true);
}
} else {
+ initializeBaseUrlAndCurrentWorkingDirectory();
if ((!isset( $_SERVER['SERVER_NAME']) ||
$_SERVER['SERVER_NAME']!=='localhost')
&& !nsdefined("NO_LOCAL_CHECK") && !nsdefined("WORK_DIRECTORY")
diff --git a/src/controllers/FetchController.php b/src/controllers/FetchController.php
index 6c1d2d0a8..beb10ac37 100755
--- a/src/controllers/FetchController.php
+++ b/src/controllers/FetchController.php
@@ -299,7 +299,7 @@ class FetchController extends Controller implements CrawlConstants
if (file_exists($this->crawl_status_file_name)) {
$crawl_status = unserialize(file_get_contents(
$this->crawl_status_file_name));
- if ($crawl_status['CRAWL_TIME'] != 0) {
+ if (!empty($crawl_status['CRAWL_TIME'])) {
$restart = false;
}
}
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 3daea00e0..30de04c36 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -1773,7 +1773,7 @@ class QueueServer implements CrawlConstants, Join
static $blocked = false;
if ($blocking && $blocked) {
L\crawlLog("Indexer waiting for merge tiers to ".
- "complete before write partition. B");
+ "complete before write partition.");
return;
}
if (!$blocking) {
@@ -1785,10 +1785,10 @@ class QueueServer implements CrawlConstants, Join
L\crawlLog("Indexer: Processing index data in $file...");
$start_time = microtime(true);
$start_total_time = microtime(true);
- $pre_sites = L\webdecode(file_get_contents($file));
- $len_urls = L\unpackInt(substr($pre_sites, 0, 4));
- $seen_urls_string = substr($pre_sites, 4, $len_urls);
- $pre_sites = substr($pre_sites, 4 + $len_urls);
+ $pre_sites_and_index = L\webdecode(file_get_contents($file));
+ $len_urls = L\unpackInt(substr($pre_sites_and_index, 0, 4));
+ $seen_urls_string = substr($pre_sites_and_index, 4, $len_urls);
+ $pre_sites_and_index = substr($pre_sites_and_index, 4 + $len_urls);
$sites[self::SEEN_URLS] = [];
$pos = 0;
$num = 0;
@@ -1828,12 +1828,30 @@ class QueueServer implements CrawlConstants, Join
return;
}
L\crawlLog("A. Indexer Load SEEN_URLS. Memory usage:".
- memory_get_usage() ." time: ".L\changeInMicrotime($start_time));
+ memory_get_usage() ." time: " . L\changeInMicrotime($start_time));
$sites[self::INVERTED_INDEX] = IndexShard::load("fetcher_shard",
- $pre_sites);
- unset($pre_sites);
+ $pre_sites_and_index);
+ if (empty($sites[self::INVERTED_INDEX])) {
+ L\crawlLog("Index data file inverted index empty or corrupt.");
+ L\crawlLog("Indexer Done Index Processing File: $file. " .
+ "Total time: " . L\changeInMicrotime($start_total_time));
+ unlink($file);
+ return;
+ }
+ $index_shard = $sites[self::INVERTED_INDEX];
+ $generation = $this->index_archive->initGenerationToAdd(
+ $index_shard->num_docs, $this, $blocking);
+ if ($generation == -1) {
+ L\crawlLog("Indexer waiting for merge tiers to ".
+ "complete before write partition. A");
+ $blocked = true;
+ // In this case if we block, will end up reprocess file
+ return; /* if don't return here can process rest of
+ method */
+ }
+ unset($pre_sites_and_index);
L\crawlLog("B. Indexer Load Sent shard. Memory usage:".
- memory_get_usage() ." time: ".(L\changeInMicrotime($start_time)));
+ memory_get_usage() ." time: " . (L\changeInMicrotime($start_time)));
$start_time = microtime(true);
//do deduplication of summaries
if (isset($sites[self::SEEN_URLS]) &&
@@ -1860,7 +1878,7 @@ class QueueServer implements CrawlConstants, Join
L\crawlHash($link_url_parts[1], true)
. L\crawlHash($seen_sites[$i][self::URL], true)
. $reftype . substr(L\crawlHash(
- UrlParser::getHost($link_url_parts[5]) . "/", true), 1);
+ UrlParser::getHost($link_url_parts[5]) . "/", true), 1);
$seen_sites[$i][self::IS_DOC] = false;
} else {
$seen_sites[$i][self::IS_DOC] = true;
@@ -1872,40 +1890,29 @@ class QueueServer implements CrawlConstants, Join
$recent_urls_count++;
}
}
- if (isset($sites[self::INVERTED_INDEX])) {
- $index_shard = $sites[self::INVERTED_INDEX];
- $generation = $this->index_archive->initGenerationToAdd(
- $index_shard->num_docs, $this, $blocking);
- if ($generation == -1) {
- L\crawlLog("Indexer waiting for merge tiers to ".
- "complete before write partition. A");
- $blocked = true;
- return;
- }
- $summary_offsets = [];
- if (isset($seen_sites)) {
- $this->index_archive->addPages(
- $generation, self::SUMMARY_OFFSET, $seen_sites,
- $visited_urls_count);
- foreach ($seen_sites as $site) {
- if ($site[self::IS_DOC]) { // so not link
- $site_url = str_replace('|', "%7C", $site[self::URL]);
- $host = UrlParser::getHost($site_url);
- $hash = L\crawlHash($site_url, true).
- $site[self::HASH] .
- "d". substr(L\crawlHash($host."/", true), 1);
- } else {
- $hash = $site[self::HASH_URL];
- }
- $summary_offsets[$hash] = $site[self::SUMMARY_OFFSET];
+ $summary_offsets = [];
+ if (!empty($seen_sites)) {
+ $this->index_archive->addPages($generation, self::SUMMARY_OFFSET,
+ $seen_sites, $visited_urls_count);
+ foreach ($seen_sites as $site) {
+ if ($site[self::IS_DOC]) { // so not link
+ $site_url = str_replace('|', "%7C", $site[self::URL]);
+ $host = UrlParser::getHost($site_url);
+ $hash = L\crawlHash($site_url, true) . $site[self::HASH] .
+ "d". substr(L\crawlHash($host . "/", true), 1);
+ } else {
+ $hash = $site[self::HASH_URL];
}
- unset($seen_sites);
+ $summary_offsets[$hash] = $site[self::SUMMARY_OFFSET];
}
- L\crawlLog("C. Indexer init local shard, store ".
- "Summaries memory usage: ". memory_get_usage() .
- " time: " . L\changeInMicrotime($start_time));
- $start_time = microtime(true);
- // added summary offset info to inverted index data
+ unset($seen_sites);
+ }
+ L\crawlLog("C. Indexer init local shard, store " .
+ "Summaries memory usage: ". memory_get_usage() .
+ " time: " . L\changeInMicrotime($start_time));
+ $start_time = microtime(true);
+ // added summary offset info to inverted index data
+ if (!empty($summary_offsets)) {
$index_shard->changeDocumentOffsets($summary_offsets);
L\crawlLog("D. Indexer Update shard offsets. Memory usage: ".
memory_get_usage() . " time: " .
@@ -1913,16 +1920,16 @@ class QueueServer implements CrawlConstants, Join
$start_time = microtime(true);
$this->index_archive->addIndexData($index_shard);
$this->index_dirty = true;
+ L\crawlLog("E. Indexer Add index shard. Memory usage: ".
+ memory_get_usage() . " time: " .
+ L\changeInMicrotime($start_time));
}
- L\crawlLog("E. Indexer Add index shard. Memory usage: ".
- memory_get_usage() . " time: " .
- L\changeInMicrotime($start_time));
- L\crawlLog("Indexer Done Index Processing File: $file. Total time: ".
- L\changeInMicrotime($start_total_time));
if (isset($recent_urls)) {
$sites[self::RECENT_URLS] = $recent_urls;
$this->writeCrawlStatus($sites);
}
+ L\crawlLog("Indexer Done Index Processing File: $file. Total time: ".
+ L\changeInMicrotime($start_total_time));
if (file_exists($file)) {
//Haven't tracked down yet, but can try to delete twice giving warn
unlink($file);
diff --git a/src/locale/en_US/configure.ini b/src/locale/en_US/configure.ini
index 8939419f3..8b6be01b0 100644
--- a/src/locale/en_US/configure.ini
+++ b/src/locale/en_US/configure.ini
@@ -1444,7 +1444,7 @@ trending_element_term = "Term"
trending_element_score = "Score"
trending_element_date = "Computed %s"
trending_element_hourly_trend = "Hourly Trend Score for '%s' for Last Day"
-trending_element_daily_trend = "Dailly Trend Score for '%s' for Last Week"
+trending_element_daily_trend = "Daily Trend Score for '%s' for Last Week"
;
; ManageusersElement.php
manageusers_element_users = "User List"
diff --git a/src/models/SearchfiltersModel.php b/src/models/SearchfiltersModel.php
index 04fbec6da..bcd9a893f 100644
--- a/src/models/SearchfiltersModel.php
+++ b/src/models/SearchfiltersModel.php
@@ -60,8 +60,7 @@ class SearchfiltersModel extends Model
$this->dir_name = C\CRAWL_DIR . "/search_filters";
if (!file_exists(C\CRAWL_DIR . "/search_filters")) {
mkdir($this->dir_name);
- $this->db->setWorldPermissionsRecursive(
- $this->dir_name, true);
+ $this->db->setWorldPermissionsRecursive($this->dir_name, true);
}
}
/**