Take 2 on last

Chris Pollett [2024-01-23 00:Jan:rd]
Take 2 on last
Filename
src/executables/ArcTool.php
src/library/IndexDocumentBundle.php
src/library/IndexManager.php
src/library/index_bundle_iterators/WordIterator.php
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index 2c8345806..da30b797f 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -1476,6 +1476,8 @@ EOD;
             $recent_log_times[$i] = time();
         }
         $rebuild_dones = [];
+        $save_partition = 10;
+        echo "$next_partition < $save_partition\n";
         while ($next_partition < $save_partition) {
             if ($old_next_partition != $next_partition) {
                 $old_next_partition = $next_partition;
@@ -1548,7 +1550,6 @@ EOD;
               */
             $this->rebuildIndexBundle($archive_path, $start_generation, false);
         }
-        $index_archive->forceSave();
         echo "\nIndex $rebuild complete!\n";
     }
     /**
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index d565177ae..ed65f7a05 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -1539,7 +1539,7 @@ class IndexDocumentBundle implements CrawlConstants
         static $memory_limit = 0;
         if (!$memory_limit) {
             $memory_limit =
-                C\INDEX_FILE_MEMORY_LIMIT * C\MEMORY_FILL_FACTOR;
+                metricToInt(C\INDEX_FILE_MEMORY_LIMIT) * C\MEMORY_FILL_FACTOR;
         }
         if (memory_get_usage() > $memory_limit ||
             count($file_handles) > self::MAX_POSTING_CACHE_ITEMS) {
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index 56a23aecb..ab867c8e9 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -286,15 +286,15 @@ class IndexManager implements CrawlConstants
             return $tmp;
         }
         $index = self::getIndex($index_name);
-        $pre_info = [];
         $start_generation = ($start_generation < 0) ? 0 : $start_generation;
-        $info_cache[$lookup_hash] = $index->getWordInfo($term_id,
+        $word_info = $index->getWordInfo($term_id,
             $threshold, $start_generation, $num_distinct_generations,
             $with_remaining_total);
+        $info_cache[$lookup_hash] = $word_info;
         if (count($info_cache) >= self::INFO_CACHE_SIZE) {
             array_shift($info_cache);
         }
-        return $info_cache[$lookup_hash];
+        return $word_info;
     }
     /**
      * Finds posting info related to the most recent version
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index da752e575..11d6bbd22 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -128,12 +128,6 @@ class WordIterator extends IndexBundleIterator
      * @var int
      */
     public $next_offset;
-    /**
-     * Used to keep track of whether getWordInfo might still get more
-     * data on the search terms as advance generations
-     * @var bool
-     */
-    public $no_more_generations;
     /**
      * The total number of shards that have data for this word
      * @var int
@@ -329,41 +323,31 @@ class WordIterator extends IndexBundleIterator
         $this->index_version = IndexManager::getVersion($index_name);
         $word_info = IndexManager::getWordInfo($index_name, $word_key, -1, -1,
             C\NUM_DISTINCT_GENERATIONS, true);
-        if ($this->index_version < 3) {
-            list($this->num_docs, $this->dictionary_info) = $word_info;
-        } else {
-            $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0;
-            $this->total_num_docs_and_links =
-                $word_info['TOTAL_NUM_LINKS_AND_DOCS'] ?? 0;
-            $this->max_items_per_partition =
-                $word_info['MAX_ITEMS_PER_PARTITION'] ??
-                PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
-            $this->avg_items_per_partition =
-                $word_info['AVG_ITEMS_PER_PARTITION'] ??
-                PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
-            $this->total_number_of_partitions =
-                $word_info['TOTAL_NUMBER_OF_PARTITIONS'] ?? 0;
-            $this->num_docs = $word_info['TOTAL_COUNT'] ?? 0;
-            $this->num_occurrences = $word_info['TOTAL_OCCURRENCES'] ?? 0;
-            $this->dictionary_info = $word_info['ROWS'] ?? [];
-            $this->threshold_exceeded = $word_info['THESHOLD_EXCEEDED'] ??
-                false;
-            $this->archive_file = $word_info['ARCHIVE_FILE'] ?? "";
-        }
+        $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0;
+        $this->total_num_docs_and_links =
+            $word_info['TOTAL_NUM_LINKS_AND_DOCS'] ?? 0;
+        $this->max_items_per_partition =
+            $word_info['MAX_ITEMS_PER_PARTITION'] ??
+            PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
+        $this->avg_items_per_partition =
+            $word_info['AVG_ITEMS_PER_PARTITION'] ??
+            PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
+        $this->total_number_of_partitions =
+            $word_info['TOTAL_NUMBER_OF_PARTITIONS'] ?? 0;
+        $this->num_docs = $word_info['TOTAL_COUNT'] ?? 0;
+        $this->num_occurrences = $word_info['TOTAL_OCCURRENCES'] ?? 0;
+        $this->dictionary_info = $word_info['ROWS'] ?? [];
+        $this->threshold_exceeded = $word_info['THESHOLD_EXCEEDED'] ??
+            false;
+        $this->archive_file = $word_info['ARCHIVE_FILE'] ?? "";
         if (empty($this->dictionary_info)) {
             $this->empty = true;
             $this->num_generations = 0;
         } else {
-            if ($this->index_version < 3) {
-                ksort($this->dictionary_info);
-                $this->dictionary_info = array_values($this->dictionary_info);
-            }
             $this->num_generations = count($this->dictionary_info);
             $this->empty = ($this->num_generations == 0);
         }
         $this->term_info_computed = true;
-        $this->no_more_generations = $this->index_version >= 3
-            || count($info) < C\NUM_DISTINCT_GENERATIONS;
     }
     /**
      * Hook function used by currentDocsWithWord to return the current block
@@ -1059,35 +1043,6 @@ class WordIterator extends IndexBundleIterator
                 $this->current_offset = ($is_ascending) ? $this->start_offset:
                     $this->last_offset;
             }
-            if (!$this->no_more_generations) {
-                $gen_check = ($is_ascending) ?
-                    ($this->current_generation < $generation &&
-                    $this->generation_pointer >= $this->num_generations) :
-                    ($this->current_generation > $generation &&
-                    $this->generation_pointer <= 0);
-                if ($gen_check) {
-                    $index_info = IndexManager::getWordInfo($this->index_name,
-                        $this->word_key, 0, $this->num_generations,
-                        C\NUM_DISTINCT_GENERATIONS, true);
-                    list($estimated_remaining_total, $info) = $index_info;
-                    if (count($info) > 0) {
-                        $this->num_docs = $this->seen_docs +
-                            $estimated_remaining_total;
-                        ksort($info);
-                        $this->dictionary_info = array_merge(
-                            $this->dictionary_info, array_values($info));
-                        $this->num_generations = count($this->dictionary_info);
-                        $this->no_more_generations = $this->index_version >= 3
-                            || count($info) < C\NUM_DISTINCT_GENERATIONS;
-                        //will increment back to where were next loop
-                        if ($is_ascending) {
-                            $this->generation_pointer--;
-                        } else {
-                            $this->generation_pointer++;
-                        }
-                    }
-                }
-            }
             $gen_check = ($is_ascending) ?
                 ($this->current_generation < $generation &&
                 $this->generation_pointer < $this->num_generations) :
ViewGit