get rid of debugging echo, rename variables in checkQuote for clarity, a=chris

Chris Pollett [2022-07-09 20:Jul:th]
get rid of debugging echo, rename variables in checkQuote for clarity, a=chris
Filename
src/library/index_bundle_iterators/IntersectIterator.php
src/models/PhraseModel.php
diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php
index cf0094087..6b04c5c64 100644
--- a/src/library/index_bundle_iterators/IntersectIterator.php
+++ b/src/library/index_bundle_iterators/IntersectIterator.php
@@ -206,7 +206,6 @@ class IntersectIterator extends IndexBundleIterator
                     $docs[$key][self::RELEVANCE] +=
                         $docs[$key][self::RELEVANCE];
                 } else {
-                    echo "yo";
                     // first occurrence of term case
                     $i_docs = $this->index_bundle_iterators[
                         $this->word_iterator_map[$i]]->currentDocsWithWord();
@@ -259,8 +258,10 @@ class IntersectIterator extends IndexBundleIterator
      */
     public function checkQuotes(&$position_lists)
     {
-        foreach ($this->quote_positions as $qp) {
-            if ($this->checkQuote($position_lists, 0, "*", $qp) < 1) {
+        foreach ($this->quote_positions as
+            $ngram_positions_within_quoted_query) {
+            if ($this->checkQuote($position_lists, 0, "*",
+                $ngram_positions_within_quoted_query) < 1) {
                 return false;
             }
         }
@@ -276,27 +277,31 @@ class IntersectIterator extends IndexBundleIterator
      * @param mixed $next_pos * or int if * next_pos must be >= $cur_pos
      *     +len_search_term. $next_pos represents the position the next
      *     quoted term should be at
-     * @param array $qp $position_list_index => $len_of_list_term pairs
+     * @param array $ngram_positions_within_quoted_query pairs:
+     *      $ngram_position_within_quoted_query => $len_of_ngram
      * @return int -1 on failure, 0 on backtrack, 1 on success
      */
-    public function checkQuote(&$position_lists, $cur_pos, $next_pos, $qp)
+    public function checkQuote(&$position_lists, $cur_pos, $next_pos,
+        $ngram_positions_within_quoted_query)
     {
-        if ($qp == [] || $qp == null) {
+        if ($ngram_positions_within_quoted_query == [] ||
+            $ngram_positions_within_quoted_query == null) {
             return 1;
         }
-        $list_index = key($qp);
-        $len = $qp[$list_index];
-        unset($qp[$list_index]);
+        $ngram_index = key($ngram_positions_within_quoted_query);
+        $len = $ngram_positions_within_quoted_query[$ngram_index];
+        unset($ngram_positions_within_quoted_query[$ngram_index]);
         if (strcmp($len, "*") == 0) {
-            return $this->checkQuote($position_lists, $cur_pos, "*", $qp);
+            return $this->checkQuote($position_lists, $cur_pos, "*",
+                $ngram_positions_within_quoted_query);
         }
-        $list = $position_lists[$list_index];
+        $ngram_position_list = $position_lists[$ngram_index];
         $is_star = (strcmp($next_pos, "*") == 0);
         $next_pos = ($is_star) ? $cur_pos + $len: $next_pos;
         while(true) {
             $found = false;
-            foreach ($list as $elt) {
-                if ($elt >= $next_pos) {
+            foreach ($ngram_position_list as $occurrence_position) {
+                if ($occurrence_position >= $next_pos) {
                     $found = true;
                     break;
                 }
@@ -304,13 +309,14 @@ class IntersectIterator extends IndexBundleIterator
             if (!$found) {
                 return -1;
             }
-            if ($is_star || $elt == $next_pos) {
-                $check = $this->checkQuote($position_lists, $elt,
-                    $elt + $len, $qp);
+            if ($is_star || $occurrence_position == $next_pos) {
+                $check = $this->checkQuote($position_lists,
+                    $occurrence_position, $occurrence_position + $len,
+                    $ngram_positions_within_quoted_query);
                 if ($check != 0) {
                     return $check;
                 }
-                $next_pos = $elt + $len;
+                $next_pos = $occurrence_position + $len;
             } else {
                 return 0;
             }
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index 0a2f84661..42b09abd3 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -562,33 +562,46 @@ class PhraseModel extends ParallelModel
         $phrase_parts = explode('"', $phrase_string);
         $base_words = [];
         $num_words = 0;
+        /*
+           array of arrays, one for each quoted set of terms in the query
+         */
         $quote_positions = [];
         foreach ($phrase_parts as $phrase_part) {
             if (empty(trim($phrase_part))) {
                 $quote_state = ($quote_state) ? false : true;
                 continue;
             }
-            /*still use original phrase string here to handle
+            /* still use original phrase string here to handle
                acronyms abbreviations and the like that use periods */
             if ($quote_state) {
                 $sub_parts = explode('*', $phrase_part);
                 $first_part = true;
-                $quote_position = [];
+                /*
+                   $term_positions_within_quoted_query is an associative array
+                   of pairs:  position_of_quoted_ngram_in_original_query =>
+                   number_of_terms_ngrams_corresponds_to_or_star .
+                   ngrams can be more than one term, but are usually just
+                   1 term, so often this is an array of term positions each
+                   mapped to 1. If having mapping *n => * it indicates
+                   a wild_card occurred before n
+                 */
+                $term_positions_within_quoted_query = [];
                 foreach ($sub_parts as $sub_part) {
                     if (!$first_part) {
-                        $quote_position["*$num_words"] = "*";
+                        $term_positions_within_quoted_query["*$num_words"] =
+                            "*";
                     }
                     $new_words = PhraseParser::extractPhrases(
                         $sub_part, $locale_tag, $index_name, true);
                     $base_words = array_merge($base_words, $new_words);
                     foreach ($new_words as $new_word) {
                         $len = substr_count($new_word, " ") + 1;
-                        $quote_position[$num_words] = $len;
+                        $term_positions_within_quoted_query[$num_words] = $len;
                         $num_words++;
                     }
                     $first_part = false;
                 }
-                $quote_positions[] = $quote_position;
+                $quote_positions[] = $term_positions_within_quoted_query;
             } else {
                 $new_words =
                     PhraseParser::extractPhrases($phrase_part, $locale_tag,
ViewGit