Take 2 on last
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index 2c8345806..da30b797f 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -1476,6 +1476,8 @@ EOD;
$recent_log_times[$i] = time();
}
$rebuild_dones = [];
+ $save_partition = 10;
+ echo "$next_partition < $save_partition\n";
while ($next_partition < $save_partition) {
if ($old_next_partition != $next_partition) {
$old_next_partition = $next_partition;
@@ -1548,7 +1550,6 @@ EOD;
*/
$this->rebuildIndexBundle($archive_path, $start_generation, false);
}
- $index_archive->forceSave();
echo "\nIndex $rebuild complete!\n";
}
/**
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index d565177ae..ed65f7a05 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -1539,7 +1539,7 @@ class IndexDocumentBundle implements CrawlConstants
static $memory_limit = 0;
if (!$memory_limit) {
$memory_limit =
- C\INDEX_FILE_MEMORY_LIMIT * C\MEMORY_FILL_FACTOR;
+ metricToInt(C\INDEX_FILE_MEMORY_LIMIT) * C\MEMORY_FILL_FACTOR;
}
if (memory_get_usage() > $memory_limit ||
count($file_handles) > self::MAX_POSTING_CACHE_ITEMS) {
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index 56a23aecb..ab867c8e9 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -286,15 +286,15 @@ class IndexManager implements CrawlConstants
return $tmp;
}
$index = self::getIndex($index_name);
- $pre_info = [];
$start_generation = ($start_generation < 0) ? 0 : $start_generation;
- $info_cache[$lookup_hash] = $index->getWordInfo($term_id,
+ $word_info = $index->getWordInfo($term_id,
$threshold, $start_generation, $num_distinct_generations,
$with_remaining_total);
+ $info_cache[$lookup_hash] = $word_info;
if (count($info_cache) >= self::INFO_CACHE_SIZE) {
array_shift($info_cache);
}
- return $info_cache[$lookup_hash];
+ return $word_info;
}
/**
* Finds posting info related to the most recent version
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index da752e575..11d6bbd22 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -128,12 +128,6 @@ class WordIterator extends IndexBundleIterator
* @var int
*/
public $next_offset;
- /**
- * Used to keep track of whether getWordInfo might still get more
- * data on the search terms as advance generations
- * @var bool
- */
- public $no_more_generations;
/**
* The total number of shards that have data for this word
* @var int
@@ -329,41 +323,31 @@ class WordIterator extends IndexBundleIterator
$this->index_version = IndexManager::getVersion($index_name);
$word_info = IndexManager::getWordInfo($index_name, $word_key, -1, -1,
C\NUM_DISTINCT_GENERATIONS, true);
- if ($this->index_version < 3) {
- list($this->num_docs, $this->dictionary_info) = $word_info;
- } else {
- $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0;
- $this->total_num_docs_and_links =
- $word_info['TOTAL_NUM_LINKS_AND_DOCS'] ?? 0;
- $this->max_items_per_partition =
- $word_info['MAX_ITEMS_PER_PARTITION'] ??
- PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
- $this->avg_items_per_partition =
- $word_info['AVG_ITEMS_PER_PARTITION'] ??
- PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
- $this->total_number_of_partitions =
- $word_info['TOTAL_NUMBER_OF_PARTITIONS'] ?? 0;
- $this->num_docs = $word_info['TOTAL_COUNT'] ?? 0;
- $this->num_occurrences = $word_info['TOTAL_OCCURRENCES'] ?? 0;
- $this->dictionary_info = $word_info['ROWS'] ?? [];
- $this->threshold_exceeded = $word_info['THESHOLD_EXCEEDED'] ??
- false;
- $this->archive_file = $word_info['ARCHIVE_FILE'] ?? "";
- }
+ $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0;
+ $this->total_num_docs_and_links =
+ $word_info['TOTAL_NUM_LINKS_AND_DOCS'] ?? 0;
+ $this->max_items_per_partition =
+ $word_info['MAX_ITEMS_PER_PARTITION'] ??
+ PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
+ $this->avg_items_per_partition =
+ $word_info['AVG_ITEMS_PER_PARTITION'] ??
+ PartitionDocumentBundle::MAX_ITEMS_PER_FILE;
+ $this->total_number_of_partitions =
+ $word_info['TOTAL_NUMBER_OF_PARTITIONS'] ?? 0;
+ $this->num_docs = $word_info['TOTAL_COUNT'] ?? 0;
+ $this->num_occurrences = $word_info['TOTAL_OCCURRENCES'] ?? 0;
+ $this->dictionary_info = $word_info['ROWS'] ?? [];
+ $this->threshold_exceeded = $word_info['THESHOLD_EXCEEDED'] ??
+ false;
+ $this->archive_file = $word_info['ARCHIVE_FILE'] ?? "";
if (empty($this->dictionary_info)) {
$this->empty = true;
$this->num_generations = 0;
} else {
- if ($this->index_version < 3) {
- ksort($this->dictionary_info);
- $this->dictionary_info = array_values($this->dictionary_info);
- }
$this->num_generations = count($this->dictionary_info);
$this->empty = ($this->num_generations == 0);
}
$this->term_info_computed = true;
- $this->no_more_generations = $this->index_version >= 3
- || count($info) < C\NUM_DISTINCT_GENERATIONS;
}
/**
* Hook function used by currentDocsWithWord to return the current block
@@ -1059,35 +1043,6 @@ class WordIterator extends IndexBundleIterator
$this->current_offset = ($is_ascending) ? $this->start_offset:
$this->last_offset;
}
- if (!$this->no_more_generations) {
- $gen_check = ($is_ascending) ?
- ($this->current_generation < $generation &&
- $this->generation_pointer >= $this->num_generations) :
- ($this->current_generation > $generation &&
- $this->generation_pointer <= 0);
- if ($gen_check) {
- $index_info = IndexManager::getWordInfo($this->index_name,
- $this->word_key, 0, $this->num_generations,
- C\NUM_DISTINCT_GENERATIONS, true);
- list($estimated_remaining_total, $info) = $index_info;
- if (count($info) > 0) {
- $this->num_docs = $this->seen_docs +
- $estimated_remaining_total;
- ksort($info);
- $this->dictionary_info = array_merge(
- $this->dictionary_info, array_values($info));
- $this->num_generations = count($this->dictionary_info);
- $this->no_more_generations = $this->index_version >= 3
- || count($info) < C\NUM_DISTINCT_GENERATIONS;
- //will increment back to where were next loop
- if ($is_ascending) {
- $this->generation_pointer--;
- } else {
- $this->generation_pointer++;
- }
- }
- }
- }
$gen_check = ($is_ascending) ?
($this->current_generation < $generation &&
$this->generation_pointer < $this->num_generations) :