Try to make addPartitionPostingsDictionary more memory efficient, a=chris
Try to make addPartitionPostingsDictionary more memory efficient, a=chris
diff --git a/src/configs/Config.php b/src/configs/Config.php
index ac9d72811..eb09cada4 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -792,7 +792,7 @@ nsconddefine('MIRROR_MEMORY_LIMIT', ceil(MEMORY_PROFILE/2) ."000M");
/** Max memory a ClassifierTrainer can use */
nsconddefine('CLASSIFIER_TRAINER_LIMIT', ceil(MEMORY_PROFILE/4) ."000M");
/** Max memory a QueueServer can use */
-nsconddefine('ARC_TOOL_MEMORY_LIMIT', (3 * MEMORY_PROFILE) . "000M");
+nsconddefine('ARC_TOOL_MEMORY_LIMIT', (2 * MEMORY_PROFILE) . "000M");
/** Max memory a TokenTool can use */
nsconddefine('TOKEN_TOOL_MEMORY_LIMIT', ceil(MEMORY_PROFILE/2) . "000M");
/** Used to control fraction of memory filled of current process
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 2df17689f..346146b12 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -447,25 +447,23 @@ class IndexDocumentBundle implements CrawlConstants
}
crawlLog("Start Adding Partition Posting Info to Dictionary");
$start_time = microtime(true);
- $markers = $postings_tools->getEntryMarkers($postings_filename);
- $hash_postings_name = crawlHash($postings_filename);
- $postings_string = $postings_tools->table_cache[$hash_postings_name];
+ $postings_string = $postings_tools->load($postings_filename,
+ PackedTableTools::AS_STRING_MODE, true);
$temp_postings_filename = $base_folder . "/" .
self::TEMP_POSTINGS_FILENAME;
rename($postings_filename, $temp_postings_filename);
- unset($postings_tools->table_cache[$hash_postings_name]);
$posting_files_len = strlen($postings_string);
//add a marker for the end of the file as a string
$key_len = $this->postings_tools->key_len;
$this->last_entries = $last_entries_tools->load($last_entries_filename);
- $num_postings = count($markers) + 1;
+ $num_postings = substr_count($postings_string, "\xFF") + 1;
$last_marker = 0;
$out_postings = "";
$postings_offset = 0;
$fh = fopen($postings_filename, "w");
for ($i = 0; $i < $num_postings; $i++) {
- $cur_marker = $markers[$i] ?? null;
- $diff = ($cur_marker === null) ? $cur_marker :
+ $cur_marker = strpos($postings_string, "\xFF", $last_marker);
+ $diff = ($cur_marker === false) ? null :
$cur_marker - $last_marker;
$pre_row = substr($postings_string, $last_marker, $diff);
$last_marker = $cur_marker + 1;