Fixes a bug in how materialized metas were checked in IndexManager, makes it so spell correction and thesaurus not done on meta words, a=chris

Chris Pollett [2014-06-14 20:Jun:th]
Fixes a bug in how materialized metas were checked in IndexManager, makes it so spell correction and thesaurus not done on meta words, a=chris
Filename
controllers/search_controller.php
lib/index_bundle_iterators/disjoint_iterator.php
lib/index_dictionary.php
lib/index_manager.php
locale/en-US/statistics.txt
models/phrase_model.php
views/search_view.php
diff --git a/controllers/search_controller.php b/controllers/search_controller.php
index 72a9553b6..9e99f69e2 100755
--- a/controllers/search_controller.php
+++ b/controllers/search_controller.php
@@ -515,7 +515,13 @@ class SearchController extends Controller implements CrawlConstants
         if($view == "search" && $data["RAW"] == 0 && isset($data['PAGES'])) {
             $data['PAGES'] = $this->makeMediaGroups($data['PAGES']);
         }
-        $data['INCLUDE_SCRIPTS'] = array("suggest");
+        /*  Only set up spell correction if single conjunctive query without
+            without meta words
+         */
+        if(isset($data['QUERY']) &&
+            !preg_match('/(\%7C|\%3A)/u', $data['QUERY'])) {
+            $data['INCLUDE_SCRIPTS'] = array("suggest");
+        }
         if(!isset($data['SCRIPT'])) {
             $data['SCRIPT'] = "";
         }
diff --git a/lib/index_bundle_iterators/disjoint_iterator.php b/lib/index_bundle_iterators/disjoint_iterator.php
index 1e577b52f..e5d28e529 100644
--- a/lib/index_bundle_iterators/disjoint_iterator.php
+++ b/lib/index_bundle_iterators/disjoint_iterator.php
@@ -231,6 +231,7 @@ class DisjointIterator extends IndexBundleIterator
             $this->seen_docs += 1;
             $this->seen_docs_unfiltered = 0;
             $least= $this->least_offset_index;
+            if(!isset($this->index_bundle_iterators[$least])) { return; }
             $this->seen_docs_unfiltered +=
                 $this->index_bundle_iterators[$least]->seen_docs;
             $total_num_docs += $this->index_bundle_iterators[$least]->num_docs;
diff --git a/lib/index_dictionary.php b/lib/index_dictionary.php
index bdce48c30..5a32e9276 100644
--- a/lib/index_dictionary.php
+++ b/lib/index_dictionary.php
@@ -737,7 +737,7 @@ class IndexDictionary implements CrawlConstants
             $k = 0;
             $old_k = 0;
             while(($k = strpos($mask, "\xFF", $old_k)) !== false) {
-                $loc = $k + 9;
+                $loc = $k + 8;
                 if(isset($id[$loc]) && $id[$loc] != $word_id[$loc]) {
                     $add_flag = false;
                     break;
diff --git a/lib/index_manager.php b/lib/index_manager.php
index d8b6b83a7..f36fb3aa7 100644
--- a/lib/index_manager.php
+++ b/lib/index_manager.php
@@ -88,9 +88,8 @@ class IndexManager implements CrawlConstants
                 }
             } else {
                 $index_archive_name = self::index_data_base_name . $index_name;
-                $tmp =
-                    new IndexArchiveBundle(
-                        CRAWL_DIR.'/cache/'.$index_archive_name);
+                $tmp = new IndexArchiveBundle(
+                    CRAWL_DIR.'/cache/'.$index_archive_name);
                 if(!$tmp) {
                     return false;
                 }
@@ -181,7 +180,7 @@ class IndexManager implements CrawlConstants
                            $add_flag = true;
                            if($mask != "") {
                                for($k = 0; $k < $len; $k++) {
-                                   $loc = 9 + $k;
+                                   $loc = 8 + $k;
                                    if(ord($mask[$k]) > 0 && isset($id[$loc]) &&
                                        $id[$loc] != $hash[$loc]) {
                                        $add_flag = false;
diff --git a/locale/en-US/statistics.txt b/locale/en-US/statistics.txt
index b6bef56f0..5a165df53 100755
--- a/locale/en-US/statistics.txt
+++ b/locale/en-US/statistics.txt
@@ -1 +1 @@
-d:99;
\ No newline at end of file
+d:100;
\ No newline at end of file
diff --git a/models/phrase_model.php b/models/phrase_model.php
index 58c76978e..5d291b9d5 100755
--- a/models/phrase_model.php
+++ b/models/phrase_model.php
@@ -686,7 +686,7 @@ class PhraseModel extends ParallelModel
         return array($word_struct, $format_words);
     }
     /**
-     * Given a query string extracts meta word, which of these are
+     * Given a query string, this method extracts meta words, which of these are
      * "materialized" (i.e., should be encoded as part of word ids),
      * disallowed phrases, the query string after meta words removed
      * and ampersand substitution applied, the query string with meta words
@@ -1293,7 +1293,9 @@ class PhraseModel extends ParallelModel
         $results['TIME'] = time();
         $lang = guessLocaleFromString($original_query);
         $tokenizer = PhraseParser::getTokenizer($lang);
-        if($tokenizer && method_exists($tokenizer, "scoredThesaurusMatches")
+        //only use tokenizer if no meta word or disjuncts in query
+        if(!preg_match('/(\||\:)/u', $original_query) &&
+            $tokenizer && method_exists($tokenizer, "scoredThesaurusMatches")
             && method_exists($tokenizer, "tagPartsOfSpeechPhrase")
             && isset($tokenizer->use_thesaurus)) {
             $results = $this->sortByThesaurusScore($results, $original_query,
diff --git a/views/search_view.php b/views/search_view.php
index 753b9603f..78190993e 100755
--- a/views/search_view.php
+++ b/views/search_view.php
@@ -173,7 +173,7 @@ class SearchView extends View implements CrawlConstants
      */
     function renderSearchResults($data)
     { ?>
-        <div <?php if(WORD_SUGGEST) { e('id="spell-check"'); } ?>
+        <div <?php if(WORD_SUGGEST) { e('id="spell-check"'); } ?>
             class="spell"><span class="hidden"
         >&nbsp;</span></div>
         <h2 class="serp-stats"><?php
ViewGit