Make search results more fixed to language, safe search results by default, a=chris

Chris Pollett [2019-06-09 01:Jun:th]
Make search results more fixed to language, safe search results by default, a=chris
Filename
src/controllers/SearchController.php
src/index.php
src/library/LocaleFunctions.php
src/models/LocaleModel.php
src/models/PhraseModel.php
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index cc6c56274..7aad041dc 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -178,7 +178,7 @@ class SearchController extends Controller implements CrawlConstants
             } else {
                 $out_data = [];
                 $out_data["language"] = L\getLocaleTag();
-                $out_data["link"] = C\NAME_SERVER.
+                $out_data["link"] = C\NAME_SERVER .
                     "?f=json&q={$data['QUERY']}";
                 $out_data["totalResults"] = $data['TOTAL_ROWS'];
                 $out_data["startIndex"] = $data['LIMIT'];
@@ -1925,7 +1925,7 @@ class SearchController extends Controller implements CrawlConstants
         $terms, $hist_ui_open, $url)
     {
         //Guess locale for date localization
-        $locale_type = L\guessLocale();
+        $locale_type = L\getLocaleTag();
         //Create data structure that stores years months and associated links
         list($time_ds, $years, $months) = $this->
             createHistoryDataStructure($all_crawl_times, $locale_type, $url);
diff --git a/src/index.php b/src/index.php
index 840b91695..205f62467 100644
--- a/src/index.php
+++ b/src/index.php
@@ -155,7 +155,7 @@ function bootstrap($web_site = null, $start_new_session = true)
     }
     if (C\PROFILE && L\upgradeLocalesCheck($locale_tag)) {
         L\upgradeLocales();
-        /* upgrade manipulations might mess with globale local,
+        /* upgrade manipulations might mess with global local,
             so set it back here
          */
         L\setLocaleObject($locale_tag);
diff --git a/src/library/LocaleFunctions.php b/src/library/LocaleFunctions.php
index b85aee8fc..28ce714af 100755
--- a/src/library/LocaleFunctions.php
+++ b/src/library/LocaleFunctions.php
@@ -100,8 +100,11 @@ function guessLocale()
  */
 function guessLocaleFromString($phrase_string, $locale_tag = null)
 {
+    if (!$locale_tag) {
+        $locale_tag = getLocaleTag();
+    }
     $len = strlen($phrase_string);
-    if (!$locale_tag || $len >= C\NAME_LEN) {
+    if ($len >= C\NAME_LEN) {
         foreach (['ar', 'bn', 'de', 'en-US', 'es', 'fa', 'fr-FR', 'he', 'hi',
             'in-ID', 'it', 'ja', 'kn', 'ko', 'nl', 'pl', 'pt', 'ru', 'te', 'th',
             'vi-VN', 'zh-CN'] as $lang) {
diff --git a/src/models/LocaleModel.php b/src/models/LocaleModel.php
index 581bf0889..dd83f4822 100755
--- a/src/models/LocaleModel.php
+++ b/src/models/LocaleModel.php
@@ -187,7 +187,8 @@ class LocaleModel extends Model
     public function initialize($locale_tag)
     {
         $old_style_dir = C\LOCALE_DIR . "/" . $locale_tag;
-        $new_style_dir = C\LOCALE_DIR . "/".str_replace("-", "_", $locale_tag);
+        $new_style_dir = C\LOCALE_DIR . "/" .
+            str_replace("-", "_", $locale_tag);
         // renames locale using old style locale dir naming
         if (file_exists($old_style_dir)) {
             $get_update_dirs = glob(C\LOCALE_DIR . "/*",  GLOB_ONLYDIR);
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index 58dbcbc61..761fdf95f 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -527,7 +527,8 @@ class PhraseModel extends ParallelModel
         $phrase = $this->parseIfConditions($phrase);
         $phrase_string = $phrase;
         list($found_metas, $disallow_phrases,
-            $phrase_string, $query_string, $index_name, $weight) =
+            $phrase_string, $query_string, $index_name, $weight,
+            $locale_tag) =
             $this->extractMetaWordInfo($phrase);
         /*
             we search using the stemmed/char-grammed words, but we format
@@ -538,15 +539,13 @@ class PhraseModel extends ParallelModel
             $query_string = $query;
             $this->program_indicator = false;
         }
-        $locale_tag = L\guessLocale();
-        $locale_tag = L\guessLocaleFromString($query_string, $locale_tag);
         $quote_state = false;
         $phrase_parts = explode('"', $phrase_string);
         $base_words = [];
         $num_words = 0;
         $quote_positions = [];
         foreach ($phrase_parts as $phrase_part) {
-            if (trim($phrase_part) == "") {
+            if (empty(trim($phrase_part))) {
                 $quote_state = ($quote_state) ? false : true;
                 continue;
             }
@@ -708,7 +707,8 @@ class PhraseModel extends ParallelModel
      * disallowed phrases, the query string after meta words removed
      * and ampersand substitution applied, the query string with meta words
      * but apersand substitution applied, the index and the weights found
-     * as part of the query string.
+     * as part of the query string. Finally, it extracts the locale_tag for
+     * the query
      *
      * @param string $phrase the query string
      * @return array containing items listed above in the description of this
@@ -728,6 +728,8 @@ class PhraseModel extends ParallelModel
                 $this->additional_meta_words));
         }
         $materialized_match_conflict = false;
+        $found_lang = false;
+        $found_safe = false;
         foreach ($meta_words as $meta_word) {
             $pattern = "/(\s)($meta_word(\S)+)/";
             preg_match_all($pattern, $phrase, $matches);
@@ -750,14 +752,31 @@ class PhraseModel extends ParallelModel
                     $weight = substr($matches[2][0], strlen($meta_word));
                 }
             }
+            if ($meta_word == 'safe:' && !empty($matches[0])) {
+                $found_safe = true;
+            }
+            if ($meta_word == 'lang:' && !empty($matches[0])) {
+                $found_lang = true;
+            }
             $phrase_string = preg_replace($pattern, "", $phrase_string);
         }
+        $locale_tag = L\guessLocaleFromString($phrase_string);
         if ($materialized_match_conflict) {
             $found_metas = [];
             $disallow_phrases = [];
             $phrase_string = "";
         }
         $found_metas = array_unique($found_metas);
+        if (!empty($found_metas) || !empty($phrase_string)) {
+            if (!$found_lang) {
+                $lang_parts = explode("-", $locale_tag);
+                $found_metas[] = "lang:" . $lang_parts[0];
+            }
+            if (!$found_safe) {
+                $found_metas[] = (empty($_SESSION['safe'])) ? "safe:true" :
+                    "safe:" . $_SESSION['safe'];
+            }
+        }
         $disallow_phrases = array_unique($disallow_phrases);
         $phrase_string = mb_ereg_replace("&", "_and_", $phrase_string);
         $query_string = mb_ereg_replace(C\PUNCT, " ", $phrase_string);
@@ -765,7 +784,7 @@ class PhraseModel extends ParallelModel
         $query_string = mb_ereg_replace('_and_', '&', $query_string);
         $phrase_string = mb_ereg_replace('_and_', '&', $phrase_string);
         return [$found_metas, $disallow_phrases, $phrase_string, $query_string,
-            $index_name, $weight];
+            $index_name, $weight, $locale_tag];
     }
     /**
      * Ideally, this function tries to guess from the query what the
ViewGit