fix guess query local issue, a=chris

Chris Pollett [2019-06-07 23:Jun:th]
fix guess query local issue, a=chris
Filename
src/configs/Config.php
src/controllers/FetchController.php
src/controllers/SearchController.php
src/executables/Fetcher.php
src/library/LocaleFunctions.php
src/library/archive_bundle_iterators/TextArchiveBundleIterator.php
src/models/PhraseModel.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index f3c8fa999..2097fc733 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -872,7 +872,9 @@ nsconddefine('ENABLE_QUESTION_ANSWERING', true);
     query over those terms
  */
 nsconddefine("SUFFIX_PHRASES", false);
-/** Number of words until to switch from bag of words to phrase lookup */
+/** Number of words until to switch from bag of words to phrase lookup
+ * if SUFFIX_PHRASES is true
+ */
 nsconddefine('PHRASE_THRESHOLD', 3);
 /** default number of search results to display per page */
 nsconddefine('NUM_RESULTS_PER_PAGE', 10);
diff --git a/src/controllers/FetchController.php b/src/controllers/FetchController.php
index 76876724a..1ef5a5783 100755
--- a/src/controllers/FetchController.php
+++ b/src/controllers/FetchController.php
@@ -292,7 +292,7 @@ class FetchController extends Controller implements CrawlConstants
             self::index_closed_name . $crawl_time . ".txt";
         if ($crawl_time > 0 && file_exists($index_schedule_file) &&
             $check_crawl_time > intval(fileatime($index_schedule_file)) &&
-            !file_exists(C\CRAWL_DIR.
+            !file_exists(C\CRAWL_DIR .
                 "/schedules/QueueServerMessages.txt") ) {
             $restart = true;
             if (file_exists($this->crawl_status_file_name)) {
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index dab0de7ca..cc6c56274 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -535,7 +535,7 @@ class SearchController extends Controller implements CrawlConstants
                 }
             }
         }
-        if (isset($_REQUEST['save_timestamp'])){
+        if (isset($_REQUEST['save_timestamp'])) {
             $save_timestamp = substr($this->clean(
                 $_REQUEST['save_timestamp'], 'int'), 0, C\TIMESTAMP_LEN);
         } else {
diff --git a/src/executables/Fetcher.php b/src/executables/Fetcher.php
index 5e12b6271..d9adcaa2c 100755
--- a/src/executables/Fetcher.php
+++ b/src/executables/Fetcher.php
@@ -581,8 +581,8 @@ class Fetcher implements CrawlConstants
         static $last_record_time = 0;
         L\crawlLog("In Fetch Loop");
         L\crawlLog("PHP Version in use: " . phpversion());
-        $prefix = $this->fetcher_num."-";
-        if (!file_exists(C\CRAWL_DIR."/{$prefix}temp")) {
+        $prefix = $this->fetcher_num . "-";
+        if (!file_exists(C\CRAWL_DIR . "/{$prefix}temp")) {
             mkdir(C\CRAWL_DIR . "/{$prefix}temp");
         }
         $info[self::STATUS] = self::CONTINUE_STATE;
@@ -765,8 +765,8 @@ class Fetcher implements CrawlConstants
                 - L\changeInMicrotime($start_time))));
             return [];
         }
-        $prefix = $this->fetcher_num."-";
-        $tmp_dir = C\CRAWL_DIR."/{$prefix}temp";
+        $prefix = $this->fetcher_num . "-";
+        $tmp_dir = C\CRAWL_DIR . "/{$prefix}temp";
         $filtered_sites = [];
         $site_pages = [];
         foreach ($sites as $site) {
@@ -867,9 +867,9 @@ class Fetcher implements CrawlConstants
      */
     public function downloadPagesArchiveCrawl()
     {
-        $prefix = $this->fetcher_num."-";
+        $prefix = $this->fetcher_num . "-";
         $arc_name = "$prefix" . self::archive_base_name . $this->crawl_index;
-        $base_name = C\CRAWL_DIR."/cache/$arc_name";
+        $base_name = C\CRAWL_DIR . "/cache/$arc_name";
         $pages = [];
         if (!isset($this->archive_iterator->iterate_timestamp) ||
             $this->archive_iterator->iterate_timestamp != $this->crawl_index ||
@@ -1106,7 +1106,7 @@ class Fetcher implements CrawlConstants
             if (L\generalIsA(C\NS_ARCHIVE . $this->arc_type . "Iterator",
                 C\NS_ARCHIVE . "TextArchiveBundleIterator")) {
                 $result_dir = C\WORK_DIRECTORY . "/schedules/" .
-                    $prefix.self::fetch_archive_iterator . $this->crawl_time;
+                    $prefix . self::fetch_archive_iterator . $this->crawl_time;
                 $iterator_name = C\NS_ARCHIVE . $this->arc_type . "Iterator";
                 $this->archive_iterator = new $iterator_name(
                     $info[self::CRAWL_INDEX],
@@ -1252,14 +1252,15 @@ class Fetcher implements CrawlConstants
         $name_server = $this->name_server;
         $time = time();
         $session = md5($time . C\AUTH_KEY);
-        $prefix = $this->fetcher_num."-";
+        $prefix = $this->fetcher_num . "-" . $this->channel . "-";
         $request =
             $name_server."?c=fetch&a=archiveSchedule&time=$time".
-            "&session=$session&robot_instance=".$prefix.C\ROBOT_INSTANCE.
+            "&session=$session&robot_instance=" . $prefix . C\ROBOT_INSTANCE.
             "&machine_uri=".C\WEB_URI."&crawl_time=".$this->crawl_time.
-            "&check_crawl_time=".$this->check_crawl_time;
+            "&check_crawl_time=" . $this->check_crawl_time;
         L\crawlLog($request);
         $response_string = FetchUrl::getPage($request, null, true);
+        echo $response_string;
         if ($response_string === false) {
             L\crawlLog("Request failed!");
             return false;
diff --git a/src/library/LocaleFunctions.php b/src/library/LocaleFunctions.php
index ec294565c..b85aee8fc 100755
--- a/src/library/LocaleFunctions.php
+++ b/src/library/LocaleFunctions.php
@@ -101,16 +101,18 @@ function guessLocale()
 function guessLocaleFromString($phrase_string, $locale_tag = null)
 {
     $len = strlen($phrase_string);
-    foreach (['ar', 'bn', 'de', 'en-US', 'es', 'fa', 'fr-FR', 'he', 'hi',
-        'in-ID', 'it', 'ja', 'kn', 'ko', 'nl', 'pl', 'pt', 'ru', 'te', 'th',
-        'vi-VN', 'zh-CN'] as $lang) {
-        $tokenizer = PhraseParser::getTokenizer($lang);
-        if ($tokenizer) {
-            $test_len =
-                strlen($tokenizer->stopwordsRemover($phrase_string));
-            if ($test_len < $len) {
-                $len = $test_len;
-                $locale_tag = $lang;
+    if (!$locale_tag || $len >= C\NAME_LEN) {
+        foreach (['ar', 'bn', 'de', 'en-US', 'es', 'fa', 'fr-FR', 'he', 'hi',
+            'in-ID', 'it', 'ja', 'kn', 'ko', 'nl', 'pl', 'pt', 'ru', 'te', 'th',
+            'vi-VN', 'zh-CN'] as $lang) {
+            $tokenizer = PhraseParser::getTokenizer($lang);
+            if ($tokenizer) {
+                $test_len =
+                    strlen($tokenizer->stopwordsRemover($phrase_string));
+                if ($test_len < $len) {
+                    $len = $test_len;
+                    $locale_tag = $lang;
+                }
             }
         }
     }
diff --git a/src/library/archive_bundle_iterators/TextArchiveBundleIterator.php b/src/library/archive_bundle_iterators/TextArchiveBundleIterator.php
index 0d694dabb..a2d2d1135 100644
--- a/src/library/archive_bundle_iterators/TextArchiveBundleIterator.php
+++ b/src/library/archive_bundle_iterators/TextArchiveBundleIterator.php
@@ -206,7 +206,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
         }
         $this->num_partitions = count($this->partitions);
         $this->status_filename = "{$this->result_dir}/iterate_status.txt";
-        $this->buffer_filename = $this->result_dir."/buffer.txt";
+        $this->buffer_filename = $this->result_dir . "/buffer.txt";

         if (file_exists($this->status_filename)) {
             $this->restoreCheckpoint();
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index c782e9809..58dbcbc61 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -538,7 +538,8 @@ class PhraseModel extends ParallelModel
             $query_string = $query;
             $this->program_indicator = false;
         }
-        $locale_tag = L\guessLocaleFromString($query_string);
+        $locale_tag = L\guessLocale();
+        $locale_tag = L\guessLocaleFromString($query_string, $locale_tag);
         $quote_state = false;
         $phrase_parts = explode('"', $phrase_string);
         $base_words = [];
@@ -593,7 +594,7 @@ class PhraseModel extends ParallelModel
             $this->query_info['QUERY'] .= "$in3<i>Index</i>: ".
                 $index_name."<br />";
             $this->query_info['QUERY'] .= "$in3<i>LocaleTag</i>: ".
-                $locale_tag."<br />";
+                $locale_tag ."<br />";
             $this->query_info['QUERY'] .=
                 "$in3<i>Stemmed/Char-grammed Words</i>:<br />";
             foreach ($base_words as $word) {
ViewGit