Try to make cache evictions work better, more parameter tuning, a=chris
Try to make cache evictions work better, more parameter tuning, a=chris
diff --git a/src/configs/Config.php b/src/configs/Config.php
index 1f77572da..a21749b87 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -757,7 +757,7 @@ nsconddefine('URL_FILTER_SIZE', MEMORY_PROFILE * 5000000);
*/
nsconddefine('NUM_URLS_QUEUE_RAM', MEMORY_PROFILE * 80000);
/** number of documents before next gen */
-nsconddefine('NUM_DOCS_PER_PARTITION', 4 * MEMORY_PROFILE * 10000);
+nsconddefine('NUM_DOCS_PER_PARTITION', 8 * MEMORY_PROFILE * 10000);
/** precision to round floating points document scores */
nsconddefine('PRECISION', 10);
/** maximum number of links to extract from a page on an initial pass*/
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 8b2d955ce..36a6fb081 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -515,6 +515,7 @@ class QueueServer implements CrawlConstants
if (($wake_up =
L\checkTimeInterval($this->sleep_start,
$this->sleep_duration)) > 0) {
+ L\crawlLog("SM: Scheduler message!!");
L\crawlLog("SM: CRAWL IN QUIESCENT/SLEEP MODE!!");
L\crawlLog("SM: Will continue processing crawl data, but");
L\crawlLog("SM: fetchers will stop downloading.");
diff --git a/src/library/BPlusTree.php b/src/library/BPlusTree.php
index 119a7ca11..4fa6ec0c8 100644
--- a/src/library/BPlusTree.php
+++ b/src/library/BPlusTree.php
@@ -47,6 +47,10 @@ class BPlusTree
* more)
*/
const MAX_KEYS = 501;
+ /**
+ * Maximum size of a cache of any type used by this BPlusTree
+ */
+ const MAX_CACHE_SIZE = 200;
/**
*
*/
@@ -232,12 +236,15 @@ class BPlusTree
if (count($insert_node) > $this->parameters["MAX_KEYS"]) {
$this->flushLastPutNode();
$this->splitRecordsInLeaf($insert_node_path, $insert_node);
- $insert_cache = [];
+ $this->insert_node_cache = [];
$this->tree_path_cache = [];
$parent_path = $this->getParentFolder($insert_node_path);
$this->updateNodePath($parent_path);
return true;
}
+ if (count($insert_cache) >= self::MAX_CACHE_SIZE) {
+ $this->insert_node_cache = [];
+ }
$insert_cache[$insert_node_path] = $insert_node;
$this->last_insert_node_path = $insert_node_path;
return true;
@@ -556,6 +563,9 @@ class BPlusTree
if ($nodes == $current_folder) {
$return_folder = $current_folder;
} else if (file_exists($current_folder)) {
+ if (count($cache) >= self::MAX_CACHE_SIZE) {
+ $this->tree_path_cache = [];
+ }
$cache[$current_folder] = $current_folder;
$return_folder = $current_folder;
}
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 4c8997e92..25af4ef5b 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -249,7 +249,7 @@ class IndexDocumentBundle implements CrawlConstants
$before_usage);
$advanced_partition = false;
while ($next_partition < $save_partition) {
- crawlLog("Adding Partition to dictionary...");
+ crawlLog("Indexer adding Partition to dictionary...");
crawlLog("...because save partition changed");
$switch_time = microtime(true);
// Save current shard dictionary to main dictionary
diff --git a/src/library/PartitionDocumentBundle.php b/src/library/PartitionDocumentBundle.php
index 0cd68cb38..9af277352 100644
--- a/src/library/PartitionDocumentBundle.php
+++ b/src/library/PartitionDocumentBundle.php
@@ -74,7 +74,7 @@ class PartitionDocumentBundle
* How many parition index files (.ix files - files without blob items)
* can be held cached in memory at one time
*/
- const INDEX_CACHE_SIZE = 10;
+ const INDEX_CACHE_SIZE = 50;
/**
* Extension for PartitionDocumentBundle partition files used to contain
* records
@@ -103,10 +103,6 @@ class PartitionDocumentBundle
*
*/
public $add_archive_cache = [null, "", -1];
- /**
- *
- */
- public $add_index_cache = ["", [], -1];
/**
*
*/
@@ -345,17 +341,17 @@ class PartitionDocumentBundle
public function loadPartitionIndex($partition, $force_load = false)
{
$index_file_name = $this->getPartitionIndex($partition);
- $this->index_cache['time'] ??= 0;
- $this->index_cache['time']++;
if (!empty($this->index_cache[$partition]) && !$force_load) {
$index = $this->index_cache[$partition][0];
$this->index_cache[$partition][1] = $this->index_cache['time'];
} else {
if (count($this->index_cache) > $this->index_cache_size) {
$oldest_partition = -1;
- $oldest_time = -1;
+ $oldest_time = 2 * time();
foreach ($this->index_cache as $index_partition => $cache_info){
- if (!empty($cache_info[1]) &&
+ if (empty($cache_info[1])) {
+ unset($this->index_cache[$index_partition]);
+ } else if (!empty($cache_info[1]) &&
$cache_info[1] < $oldest_time) {
$oldest_time = $cache_info[1];
$oldest_partition = $index_partition;
@@ -368,7 +364,7 @@ class PartitionDocumentBundle
}
$this->index_cache[$partition] = [
$this->table_tools->load($index_file_name),
- $this->index_cache['time']];
+ time()];
$index = $this->index_cache[$partition][0];
}
if (empty($index)) {
@@ -515,7 +511,7 @@ class PartitionDocumentBundle
$this->saveParameters();
}
/**
- *
+ *
*/
public function saveParameters()
{