Revise proximity normalization calc, a=chris

Chris Pollett [2022-07-30 03:Jul:th]
Revise proximity normalization calc, a=chris
Filename
src/library/index_bundle_iterators/IntersectIterator.php
diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php
index 6791db79c..a48040827 100644
--- a/src/library/index_bundle_iterators/IntersectIterator.php
+++ b/src/library/index_bundle_iterators/IntersectIterator.php
@@ -225,7 +225,8 @@ class IntersectIterator extends IndexBundleIterator
                     $this->checkQuotes($position_lists)) {
                     $docs[$key][self::PROXIMITY] =
                         $this->computeProximity($position_lists, $len_lists,
-                            ($docs[$key][self::IS_DOC] ?? false));
+                            ($docs[$key][self::IS_DOC] ?? false),
+                            $docs[$key][self::DOC_LEN]);
                 } else {
                     $docs = [];
                 }
@@ -331,27 +332,20 @@ class IntersectIterator extends IndexBundleIterator
      * @param array &$word_len_lists length for each item of its position list
      * @param bool $is_doc whether this is the position list of a document
      *     or a link
+     * @param int $doc_len the length of the document
      * @return sum of inverse of all covers computed by plane sweep algorithm
      */
     public function computeProximity(&$word_position_lists, &$word_len_lists,
-        $is_doc)
+        $is_doc, $doc_len)
     {
         $num_iterators = $this->num_iterators;
         if ($num_iterators < 1) {
             return 0;
         }
         $covers = [];
-        $max_covers = 1;
-        foreach ($word_position_lists as $positions) {
-            if (!empty($positions)) {
-                $last_pos = $positions[count($positions) - 1];
-                $max_covers = max($max_covers, $last_pos);
-            }
-        }
         $position_list = $word_position_lists;
         $interval = [];
         $num_words = count($position_list);
-        $max_covers = max(1, $max_covers - $num_words);
         for ($i = 0; $i < $num_words; $i++) {
             $min = (!empty($position_list[$i])) ?
                 array_shift($position_list[$i]) : null;
@@ -408,7 +402,7 @@ class IntersectIterator extends IndexBundleIterator
         foreach ($covers as $cover) {
             $score += (1/($cover[1] - $cover[0] + 1));
         }
-        $score = ($num_words * $score)/$max_covers;
+        $score = ($num_words * $score)/max($doc_len, 1);
             // this will ensure the score is less than 1
         return $score;
     }
ViewGit