Try to make cache evictions work better, more parameter tuning, a=chris

Chris Pollett [2021-09-12 23:Sep:th]
Try to make cache evictions work better, more parameter tuning, a=chris
Filename
src/configs/Config.php
src/executables/QueueServer.php
src/library/BPlusTree.php
src/library/IndexDocumentBundle.php
src/library/PartitionDocumentBundle.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index 1f77572da..a21749b87 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -757,7 +757,7 @@ nsconddefine('URL_FILTER_SIZE', MEMORY_PROFILE * 5000000);
  */
 nsconddefine('NUM_URLS_QUEUE_RAM', MEMORY_PROFILE * 80000);
 /** number of documents before next gen */
-nsconddefine('NUM_DOCS_PER_PARTITION', 4 * MEMORY_PROFILE * 10000);
+nsconddefine('NUM_DOCS_PER_PARTITION', 8 * MEMORY_PROFILE * 10000);
 /** precision to round floating points document scores */
 nsconddefine('PRECISION', 10);
 /** maximum number of links to extract from a page on an initial pass*/
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 8b2d955ce..36a6fb081 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -515,6 +515,7 @@ class QueueServer implements CrawlConstants
                 if (($wake_up =
                     L\checkTimeInterval($this->sleep_start,
                     $this->sleep_duration)) > 0) {
+                    L\crawlLog("SM: Scheduler message!!");
                     L\crawlLog("SM: CRAWL IN QUIESCENT/SLEEP MODE!!");
                     L\crawlLog("SM: Will continue processing crawl data, but");
                     L\crawlLog("SM: fetchers will stop downloading.");
diff --git a/src/library/BPlusTree.php b/src/library/BPlusTree.php
index 119a7ca11..4fa6ec0c8 100644
--- a/src/library/BPlusTree.php
+++ b/src/library/BPlusTree.php
@@ -47,6 +47,10 @@ class BPlusTree
      * more)
      */
     const MAX_KEYS = 501;
+    /**
+     * Maximum size of a cache of any type used by this BPlusTree
+     */
+    const MAX_CACHE_SIZE = 200;
     /**
      *
      */
@@ -232,12 +236,15 @@ class BPlusTree
         if (count($insert_node) > $this->parameters["MAX_KEYS"]) {
             $this->flushLastPutNode();
             $this->splitRecordsInLeaf($insert_node_path, $insert_node);
-            $insert_cache = [];
+            $this->insert_node_cache = [];
             $this->tree_path_cache = [];
             $parent_path = $this->getParentFolder($insert_node_path);
             $this->updateNodePath($parent_path);
             return true;
         }
+        if (count($insert_cache) >= self::MAX_CACHE_SIZE) {
+            $this->insert_node_cache = [];
+        }
         $insert_cache[$insert_node_path] = $insert_node;
         $this->last_insert_node_path = $insert_node_path;
         return true;
@@ -556,6 +563,9 @@ class BPlusTree
         if ($nodes == $current_folder) {
             $return_folder =  $current_folder;
         } else if (file_exists($current_folder)) {
+            if (count($cache) >= self::MAX_CACHE_SIZE) {
+                $this->tree_path_cache = [];
+            }
             $cache[$current_folder] = $current_folder;
             $return_folder =  $current_folder;
         }
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 4c8997e92..25af4ef5b 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -249,7 +249,7 @@ class IndexDocumentBundle implements CrawlConstants
             $before_usage);
         $advanced_partition = false;
         while ($next_partition < $save_partition) {
-            crawlLog("Adding Partition to dictionary...");
+            crawlLog("Indexer adding Partition to dictionary...");
             crawlLog("...because save partition changed");
             $switch_time = microtime(true);
             // Save current shard dictionary to main dictionary
diff --git a/src/library/PartitionDocumentBundle.php b/src/library/PartitionDocumentBundle.php
index 0cd68cb38..9af277352 100644
--- a/src/library/PartitionDocumentBundle.php
+++ b/src/library/PartitionDocumentBundle.php
@@ -74,7 +74,7 @@ class PartitionDocumentBundle
      * How many parition index files (.ix files - files without blob items)
      * can be held cached in memory at one time
      */
-    const INDEX_CACHE_SIZE = 10;
+    const INDEX_CACHE_SIZE = 50;
     /**
      * Extension for PartitionDocumentBundle partition files used to contain
      * records
@@ -103,10 +103,6 @@ class PartitionDocumentBundle
      *
      */
     public $add_archive_cache = [null, "", -1];
-    /**
-     *
-     */
-    public $add_index_cache = ["", [], -1];
     /**
      *
      */
@@ -345,17 +341,17 @@ class PartitionDocumentBundle
     public function loadPartitionIndex($partition, $force_load = false)
     {
         $index_file_name = $this->getPartitionIndex($partition);
-        $this->index_cache['time'] ??= 0;
-        $this->index_cache['time']++;
         if (!empty($this->index_cache[$partition]) && !$force_load) {
             $index = $this->index_cache[$partition][0];
             $this->index_cache[$partition][1] = $this->index_cache['time'];
         } else {
             if (count($this->index_cache) > $this->index_cache_size) {
                 $oldest_partition = -1;
-                $oldest_time = -1;
+                $oldest_time = 2 * time();
                 foreach ($this->index_cache as $index_partition => $cache_info){
-                    if (!empty($cache_info[1]) &&
+                    if (empty($cache_info[1])) {
+                        unset($this->index_cache[$index_partition]);
+                    } else if (!empty($cache_info[1]) &&
                         $cache_info[1] < $oldest_time) {
                         $oldest_time = $cache_info[1];
                         $oldest_partition = $index_partition;
@@ -368,7 +364,7 @@ class PartitionDocumentBundle
             }
             $this->index_cache[$partition] = [
                 $this->table_tools->load($index_file_name),
-                $this->index_cache['time']];
+                time()];
             $index = $this->index_cache[$partition][0];
         }
         if (empty($index)) {
@@ -515,7 +511,7 @@ class PartitionDocumentBundle
         $this->saveParameters();
     }
     /**
-     *
+     *
      */
     public function saveParameters()
     {
ViewGit