diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php index cf0094087..6b04c5c64 100644 --- a/src/library/index_bundle_iterators/IntersectIterator.php +++ b/src/library/index_bundle_iterators/IntersectIterator.php @@ -206,7 +206,6 @@ class IntersectIterator extends IndexBundleIterator $docs[$key][self::RELEVANCE] += $docs[$key][self::RELEVANCE]; } else { - echo "yo"; // first occurrence of term case $i_docs = $this->index_bundle_iterators[ $this->word_iterator_map[$i]]->currentDocsWithWord(); @@ -259,8 +258,10 @@ class IntersectIterator extends IndexBundleIterator */ public function checkQuotes(&$position_lists) { - foreach ($this->quote_positions as $qp) { - if ($this->checkQuote($position_lists, 0, "*", $qp) < 1) { + foreach ($this->quote_positions as + $ngram_positions_within_quoted_query) { + if ($this->checkQuote($position_lists, 0, "*", + $ngram_positions_within_quoted_query) < 1) { return false; } } @@ -276,27 +277,31 @@ class IntersectIterator extends IndexBundleIterator * @param mixed $next_pos * or int if * next_pos must be >= $cur_pos * +len_search_term. $next_pos represents the position the next * quoted term should be at - * @param array $qp $position_list_index => $len_of_list_term pairs + * @param array $ngram_positions_within_quoted_query pairs: + * $ngram_position_within_quoted_query => $len_of_ngram * @return int -1 on failure, 0 on backtrack, 1 on success */ - public function checkQuote(&$position_lists, $cur_pos, $next_pos, $qp) + public function checkQuote(&$position_lists, $cur_pos, $next_pos, + $ngram_positions_within_quoted_query) { - if ($qp == [] || $qp == null) { + if ($ngram_positions_within_quoted_query == [] || + $ngram_positions_within_quoted_query == null) { return 1; } - $list_index = key($qp); - $len = $qp[$list_index]; - unset($qp[$list_index]); + $ngram_index = key($ngram_positions_within_quoted_query); + $len = $ngram_positions_within_quoted_query[$ngram_index]; + unset($ngram_positions_within_quoted_query[$ngram_index]); if (strcmp($len, "*") == 0) { - return $this->checkQuote($position_lists, $cur_pos, "*", $qp); + return $this->checkQuote($position_lists, $cur_pos, "*", + $ngram_positions_within_quoted_query); } - $list = $position_lists[$list_index]; + $ngram_position_list = $position_lists[$ngram_index]; $is_star = (strcmp($next_pos, "*") == 0); $next_pos = ($is_star) ? $cur_pos + $len: $next_pos; while(true) { $found = false; - foreach ($list as $elt) { - if ($elt >= $next_pos) { + foreach ($ngram_position_list as $occurrence_position) { + if ($occurrence_position >= $next_pos) { $found = true; break; } @@ -304,13 +309,14 @@ class IntersectIterator extends IndexBundleIterator if (!$found) { return -1; } - if ($is_star || $elt == $next_pos) { - $check = $this->checkQuote($position_lists, $elt, - $elt + $len, $qp); + if ($is_star || $occurrence_position == $next_pos) { + $check = $this->checkQuote($position_lists, + $occurrence_position, $occurrence_position + $len, + $ngram_positions_within_quoted_query); if ($check != 0) { return $check; } - $next_pos = $elt + $len; + $next_pos = $occurrence_position + $len; } else { return 0; } diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php index 0a2f84661..42b09abd3 100755 --- a/src/models/PhraseModel.php +++ b/src/models/PhraseModel.php @@ -562,33 +562,46 @@ class PhraseModel extends ParallelModel $phrase_parts = explode('"', $phrase_string); $base_words = []; $num_words = 0; + /* + array of arrays, one for each quoted set of terms in the query + */ $quote_positions = []; foreach ($phrase_parts as $phrase_part) { if (empty(trim($phrase_part))) { $quote_state = ($quote_state) ? false : true; continue; } - /*still use original phrase string here to handle + /* still use original phrase string here to handle acronyms abbreviations and the like that use periods */ if ($quote_state) { $sub_parts = explode('*', $phrase_part); $first_part = true; - $quote_position = []; + /* + $term_positions_within_quoted_query is an associative array + of pairs: position_of_quoted_ngram_in_original_query => + number_of_terms_ngrams_corresponds_to_or_star . + ngrams can be more than one term, but are usually just + 1 term, so often this is an array of term positions each + mapped to 1. If having mapping *n => * it indicates + a wild_card occurred before n + */ + $term_positions_within_quoted_query = []; foreach ($sub_parts as $sub_part) { if (!$first_part) { - $quote_position["*$num_words"] = "*"; + $term_positions_within_quoted_query["*$num_words"] = + "*"; } $new_words = PhraseParser::extractPhrases( $sub_part, $locale_tag, $index_name, true); $base_words = array_merge($base_words, $new_words); foreach ($new_words as $new_word) { $len = substr_count($new_word, " ") + 1; - $quote_position[$num_words] = $len; + $term_positions_within_quoted_query[$num_words] = $len; $num_words++; } $first_part = false; } - $quote_positions[] = $quote_position; + $quote_positions[] = $term_positions_within_quoted_query; } else { $new_words = PhraseParser::extractPhrases($phrase_part, $locale_tag,