Improve estimates of num results
Improve estimates of num results
diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php
index 857c9eefb..c5426849a 100644
--- a/src/library/index_bundle_iterators/IntersectIterator.php
+++ b/src/library/index_bundle_iterators/IntersectIterator.php
@@ -52,6 +52,10 @@ class IntersectIterator extends IndexBundleIterator
* @var array
*/
public $index_bundle_iterators;
+ /**
+ * @var int
+ */
+ public $max_solution_docs;
/**
* Number of elements in $this->index_bundle_iterators
* @var int
@@ -136,6 +140,7 @@ class IntersectIterator extends IndexBundleIterator
$this->num_words = count($word_iterator_map);
$this->num_iterators = count($index_bundle_iterators);
$this->num_docs = 40000000000; // a really big number
+ $this->max_solution_docs = 40000000000;
$this->quote_positions = $quote_positions;
$this->weight = $weight;
$this->results_per_block = 1;
@@ -160,8 +165,11 @@ class IntersectIterator extends IndexBundleIterator
}
$this->index_bundle_iterators[$i]->setResultsPerBlock(1);
}
- $this->total_num_docs = $this->index_bundle_iterators[0]->total_num_docs
- ?? $this->num_docs;
+ $this->total_num_docs =
+ $this->index_bundle_iterators[0]->total_num_docs ?? $this->num_docs;
+ $this->max_solution_docs =
+ $this->index_bundle_iterators[$this->least_num_doc_index
+ ]->num_docs ?? $this->num_docs;
}
/**
* Returns the iterators to the first document block that it could iterate
@@ -420,13 +428,13 @@ class IntersectIterator extends IndexBundleIterator
public function advance($gen_doc_offset = null)
{
$this->current_block_fresh = false;
- $this->seen_docs += 1;
$i = $this->least_num_doc_index;
$this->seen_docs_unfiltered =
$this->index_bundle_iterators[$i]->seen_docs;
if ($this->seen_docs_unfiltered > 0) {
+ $this->seen_docs += 1;
$this->num_docs =
- floor(($this->seen_docs * $this->total_num_docs) /
+ floor(($this->seen_docs * $this->max_solution_docs) /
$this->seen_docs_unfiltered);
}
$this->index_bundle_iterators[0]->advance($gen_doc_offset);
diff --git a/src/library/index_bundle_iterators/UnionIterator.php b/src/library/index_bundle_iterators/UnionIterator.php
index 069118087..538e4fd47 100644
--- a/src/library/index_bundle_iterators/UnionIterator.php
+++ b/src/library/index_bundle_iterators/UnionIterator.php
@@ -47,19 +47,20 @@ class UnionIterator extends IndexBundleIterator
* @var array
*/
public $index_bundle_iterators;
+ /**
+ * @var int
+ */
+ public $max_solution_docs;
/**
* Number of elements in $this->index_bundle_iterators
* @var int
*/
public $num_iterators;
/**
- * The number of documents in the current block before filtering
- * by restricted words
* @var int
*/
public $count_block_unfiltered;
/**
- * The number of iterated docs before the restriction test
* @var int
*/
public $seen_docs_unfiltered;
@@ -68,21 +69,15 @@ class UnionIterator extends IndexBundleIterator
* @var string
*/
public $index_name;
- /**
- * The total count of indexed documents in the current index
- * @var int
- */
- public $total_num_docs;
/**
* Creates a union iterator with the given parameters.
*
* @param object $index_bundle_iterators to use as a source of documents
* to iterate over
* @param string $index_name time_stamp of the index to use
- * @param int $total_num_docs total number of documents in the current index
*/
public function __construct($index_bundle_iterators,
- $index_name, $total_num_docs)
+ $index_name)
{
/*
estimate number of results by sum of all iterator counts,
@@ -100,24 +95,17 @@ class UnionIterator extends IndexBundleIterator
$this->seen_docs = 0;
$this->seen_docs_unfiltered = 0;
$this->index_name = $index_name;
- $this->total_num_docs = $total_num_docs;
$num_smaller = array_fill(0, $num_iterators, 0);
+ $this->max_solution_docs = 0;;
for ($i = 0; $i < $num_iterators; $i++) {
$index_bundle_iterators[$i]->setResultsPerBlock(1);
$num_docs = $index_bundle_iterators[$i]->num_docs;
- $this->num_docs += $num_docs;
+ $this->max_solution_docs += $num_docs;
for ($j = 0; $j < $i; $j++) {
if ($num_docs < $index_bundle_iterators[$j]->num_docs) {
$num_smaller[$j]++;
}
}
- $this->seen_docs += $index_bundle_iterators[$i]->seen_docs;
- if (isset($this->index_bundle_iterators[$i]->seen_docs_unfiltered)){
- $this->seen_docs_unfiltered +=
- $this->index_bundle_iterators[$i]->seen_docs_unfiltered;
- } else {
- $this->seen_docs_unfiltered += $this->seen_docs;
- }
}
asort($num_smaller);
$i = 0;
@@ -305,7 +293,7 @@ class UnionIterator extends IndexBundleIterator
$this->seen_docs += $this->count_block;
$this->seen_docs_unfiltered += $this->count_block_unfiltered;
$this->num_docs =
- floor(($this->seen_docs * $this->total_num_docs) /
+ floor(($this->seen_docs * $this->max_solution_docs) /
$this->seen_docs_unfiltered);
if ($gen_doc_offset != null) {
foreach ($this->index_bundle_iterators as $iterator) {
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index d1f99df9a..cdb3c075f 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -2002,7 +2002,7 @@ class PhraseModel extends ParallelModel
$index = IndexManager::getIndex($actual_index_name);
$index_info = $index->getArchiveInfo($index->dir_name);
$union_iterator = new I\UnionIterator($iterators,
- $actual_index_name, $index_info['VISITED_URLS_COUNT']);
+ $actual_index_name);
}
$raw = intval($raw);
if ($raw > 0) {