Fixes a bug in how materialized metas were checked in IndexManager, makes it so spell correction and thesaurus not done on meta words, a=chris
Fixes a bug in how materialized metas were checked in IndexManager, makes it so spell correction and thesaurus not done on meta words, a=chris
diff --git a/controllers/search_controller.php b/controllers/search_controller.php
index 72a9553b6..9e99f69e2 100755
--- a/controllers/search_controller.php
+++ b/controllers/search_controller.php
@@ -515,7 +515,13 @@ class SearchController extends Controller implements CrawlConstants
if($view == "search" && $data["RAW"] == 0 && isset($data['PAGES'])) {
$data['PAGES'] = $this->makeMediaGroups($data['PAGES']);
}
- $data['INCLUDE_SCRIPTS'] = array("suggest");
+ /* Only set up spell correction if single conjunctive query without
+ without meta words
+ */
+ if(isset($data['QUERY']) &&
+ !preg_match('/(\%7C|\%3A)/u', $data['QUERY'])) {
+ $data['INCLUDE_SCRIPTS'] = array("suggest");
+ }
if(!isset($data['SCRIPT'])) {
$data['SCRIPT'] = "";
}
diff --git a/lib/index_bundle_iterators/disjoint_iterator.php b/lib/index_bundle_iterators/disjoint_iterator.php
index 1e577b52f..e5d28e529 100644
--- a/lib/index_bundle_iterators/disjoint_iterator.php
+++ b/lib/index_bundle_iterators/disjoint_iterator.php
@@ -231,6 +231,7 @@ class DisjointIterator extends IndexBundleIterator
$this->seen_docs += 1;
$this->seen_docs_unfiltered = 0;
$least= $this->least_offset_index;
+ if(!isset($this->index_bundle_iterators[$least])) { return; }
$this->seen_docs_unfiltered +=
$this->index_bundle_iterators[$least]->seen_docs;
$total_num_docs += $this->index_bundle_iterators[$least]->num_docs;
diff --git a/lib/index_dictionary.php b/lib/index_dictionary.php
index bdce48c30..5a32e9276 100644
--- a/lib/index_dictionary.php
+++ b/lib/index_dictionary.php
@@ -737,7 +737,7 @@ class IndexDictionary implements CrawlConstants
$k = 0;
$old_k = 0;
while(($k = strpos($mask, "\xFF", $old_k)) !== false) {
- $loc = $k + 9;
+ $loc = $k + 8;
if(isset($id[$loc]) && $id[$loc] != $word_id[$loc]) {
$add_flag = false;
break;
diff --git a/lib/index_manager.php b/lib/index_manager.php
index d8b6b83a7..f36fb3aa7 100644
--- a/lib/index_manager.php
+++ b/lib/index_manager.php
@@ -88,9 +88,8 @@ class IndexManager implements CrawlConstants
}
} else {
$index_archive_name = self::index_data_base_name . $index_name;
- $tmp =
- new IndexArchiveBundle(
- CRAWL_DIR.'/cache/'.$index_archive_name);
+ $tmp = new IndexArchiveBundle(
+ CRAWL_DIR.'/cache/'.$index_archive_name);
if(!$tmp) {
return false;
}
@@ -181,7 +180,7 @@ class IndexManager implements CrawlConstants
$add_flag = true;
if($mask != "") {
for($k = 0; $k < $len; $k++) {
- $loc = 9 + $k;
+ $loc = 8 + $k;
if(ord($mask[$k]) > 0 && isset($id[$loc]) &&
$id[$loc] != $hash[$loc]) {
$add_flag = false;
diff --git a/locale/en-US/statistics.txt b/locale/en-US/statistics.txt
index b6bef56f0..5a165df53 100755
--- a/locale/en-US/statistics.txt
+++ b/locale/en-US/statistics.txt
@@ -1 +1 @@
-d:99;
\ No newline at end of file
+d:100;
\ No newline at end of file
diff --git a/models/phrase_model.php b/models/phrase_model.php
index 58c76978e..5d291b9d5 100755
--- a/models/phrase_model.php
+++ b/models/phrase_model.php
@@ -686,7 +686,7 @@ class PhraseModel extends ParallelModel
return array($word_struct, $format_words);
}
/**
- * Given a query string extracts meta word, which of these are
+ * Given a query string, this method extracts meta words, which of these are
* "materialized" (i.e., should be encoded as part of word ids),
* disallowed phrases, the query string after meta words removed
* and ampersand substitution applied, the query string with meta words
@@ -1293,7 +1293,9 @@ class PhraseModel extends ParallelModel
$results['TIME'] = time();
$lang = guessLocaleFromString($original_query);
$tokenizer = PhraseParser::getTokenizer($lang);
- if($tokenizer && method_exists($tokenizer, "scoredThesaurusMatches")
+ //only use tokenizer if no meta word or disjuncts in query
+ if(!preg_match('/(\||\:)/u', $original_query) &&
+ $tokenizer && method_exists($tokenizer, "scoredThesaurusMatches")
&& method_exists($tokenizer, "tagPartsOfSpeechPhrase")
&& isset($tokenizer->use_thesaurus)) {
$results = $this->sortByThesaurusScore($results, $original_query,
diff --git a/views/search_view.php b/views/search_view.php
index 753b9603f..78190993e 100755
--- a/views/search_view.php
+++ b/views/search_view.php
@@ -173,7 +173,7 @@ class SearchView extends View implements CrawlConstants
*/
function renderSearchResults($data)
{ ?>
- <div <?php if(WORD_SUGGEST) { e('id="spell-check"'); } ?>
+ <div <?php if(WORD_SUGGEST) { e('id="spell-check"'); } ?>
class="spell"><span class="hidden"
> </span></div>
<h2 class="serp-stats"><?php