Revise proximity normalization calc, a=chris
Revise proximity normalization calc, a=chris
diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php
index 6791db79c..a48040827 100644
--- a/src/library/index_bundle_iterators/IntersectIterator.php
+++ b/src/library/index_bundle_iterators/IntersectIterator.php
@@ -225,7 +225,8 @@ class IntersectIterator extends IndexBundleIterator
$this->checkQuotes($position_lists)) {
$docs[$key][self::PROXIMITY] =
$this->computeProximity($position_lists, $len_lists,
- ($docs[$key][self::IS_DOC] ?? false));
+ ($docs[$key][self::IS_DOC] ?? false),
+ $docs[$key][self::DOC_LEN]);
} else {
$docs = [];
}
@@ -331,27 +332,20 @@ class IntersectIterator extends IndexBundleIterator
* @param array &$word_len_lists length for each item of its position list
* @param bool $is_doc whether this is the position list of a document
* or a link
+ * @param int $doc_len the length of the document
* @return sum of inverse of all covers computed by plane sweep algorithm
*/
public function computeProximity(&$word_position_lists, &$word_len_lists,
- $is_doc)
+ $is_doc, $doc_len)
{
$num_iterators = $this->num_iterators;
if ($num_iterators < 1) {
return 0;
}
$covers = [];
- $max_covers = 1;
- foreach ($word_position_lists as $positions) {
- if (!empty($positions)) {
- $last_pos = $positions[count($positions) - 1];
- $max_covers = max($max_covers, $last_pos);
- }
- }
$position_list = $word_position_lists;
$interval = [];
$num_words = count($position_list);
- $max_covers = max(1, $max_covers - $num_words);
for ($i = 0; $i < $num_words; $i++) {
$min = (!empty($position_list[$i])) ?
array_shift($position_list[$i]) : null;
@@ -408,7 +402,7 @@ class IntersectIterator extends IndexBundleIterator
foreach ($covers as $cover) {
$score += (1/($cover[1] - $cover[0] + 1));
}
- $score = ($num_words * $score)/$max_covers;
+ $score = ($num_words * $score)/max($doc_len, 1);
// this will ensure the score is less than 1
return $score;
}