Try to make addPartitionPostingsDictionary more memory efficient, a=chris

Chris Pollett [2022-08-09 02:Aug:th]
Try to make addPartitionPostingsDictionary more memory efficient, a=chris
Filename
src/configs/Config.php
src/library/IndexDocumentBundle.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index ac9d72811..eb09cada4 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -792,7 +792,7 @@ nsconddefine('MIRROR_MEMORY_LIMIT', ceil(MEMORY_PROFILE/2) ."000M");
 /** Max memory a ClassifierTrainer can use */
 nsconddefine('CLASSIFIER_TRAINER_LIMIT', ceil(MEMORY_PROFILE/4) ."000M");
 /** Max memory a QueueServer can use */
-nsconddefine('ARC_TOOL_MEMORY_LIMIT', (3 * MEMORY_PROFILE) . "000M");
+nsconddefine('ARC_TOOL_MEMORY_LIMIT', (2 * MEMORY_PROFILE) . "000M");
 /** Max memory a TokenTool can use */
 nsconddefine('TOKEN_TOOL_MEMORY_LIMIT', ceil(MEMORY_PROFILE/2) . "000M");
 /** Used to control fraction of memory filled of current process
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 2df17689f..346146b12 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -447,25 +447,23 @@ class IndexDocumentBundle implements CrawlConstants
         }
         crawlLog("Start Adding Partition Posting Info to Dictionary");
         $start_time = microtime(true);
-        $markers = $postings_tools->getEntryMarkers($postings_filename);
-        $hash_postings_name = crawlHash($postings_filename);
-        $postings_string = $postings_tools->table_cache[$hash_postings_name];
+        $postings_string = $postings_tools->load($postings_filename,
+            PackedTableTools::AS_STRING_MODE, true);
         $temp_postings_filename = $base_folder . "/" .
             self::TEMP_POSTINGS_FILENAME;
         rename($postings_filename, $temp_postings_filename);
-        unset($postings_tools->table_cache[$hash_postings_name]);
         $posting_files_len = strlen($postings_string);
         //add a marker for the end of the file as a string
         $key_len = $this->postings_tools->key_len;
         $this->last_entries = $last_entries_tools->load($last_entries_filename);
-        $num_postings = count($markers) + 1;
+        $num_postings = substr_count($postings_string, "\xFF") + 1;
         $last_marker = 0;
         $out_postings = "";
         $postings_offset = 0;
         $fh = fopen($postings_filename, "w");
         for ($i = 0; $i < $num_postings; $i++) {
-            $cur_marker = $markers[$i] ?? null;
-            $diff = ($cur_marker === null) ? $cur_marker :
+            $cur_marker = strpos($postings_string, "\xFF", $last_marker);
+            $diff = ($cur_marker === false) ? null :
                 $cur_marker - $last_marker;
             $pre_row = substr($postings_string, $last_marker, $diff);
             $last_marker = $cur_marker + 1;
ViewGit