Tighter bot checking on search queries

Chris Pollett [2024-04-06 00:Apr:th]
Tighter bot checking on search queries
Filename
src/controllers/SearchController.php
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index 7cba5ab9d..3403e4401 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -967,15 +967,34 @@ class SearchController extends Controller implements CrawlConstants
      */
     public function restrictQueryByUserAgent($query)
     {
-        $bots = ["googlebot", "baidu", "naver", "sogou"];
-        $query_okay = true;
-        foreach ($bots as $bot) {
-            if (isset($_SERVER["HTTP_USER_AGENT"]) &&
-                stristr($_SERVER["HTTP_USER_AGENT"], $bot)) {
-                $query_okay = false;
+        if (empty($query)) {
+            return $query;
+        }
+        $user_agent = strtolower(
+            ($_SERVER["HTTP_USER_AGENT"] ?? "") . " " .
+            ($_SERVER["SERVER_PROTOCOL"] ?? "") . " " .
+            ($_REQUEST["f"] ?? "")
+            );
+        $parameter_checks = [
+            [["googlebot", "baidu", "naver", "sogou", "yisouspider"], 0],
+            [["windows", "apple", "linux"], 1],
+            [["api", "http/1.0", "http/1.1"], 1],
+            [["chrome", "safari", "edge", "firefox"], 1],
+            ];
+        foreach ($parameter_checks as $parameter_check) {
+            $check_count = 0;
+            list($checks, $max_count) = $parameter_check;
+            foreach ($checks as $check) {
+                if (str_contains($user_agent, $check)) {
+                    $check_count++;
+                }
+                if ($check_count > $max_count) {
+                    include(C\BASE_DIR . "/error.php");
+                    \seekquarry\yioop\library\webExit();
+                }
             }
         }
-        return ($query_okay) ? $query : "";
+        return $query;
     }
     /**
      * Prepares the array $data so the SearchView can draw search results
@@ -1856,7 +1875,9 @@ EOD;
     public function relatedRequest($url, $results_per_page, $limit = 0,
         $crawl_time = 0, $grouping = 0, $save_timestamp = 0)
     {
-        if (!C\API_ACCESS) {return null; }
+        if (!C\API_ACCESS) {
+            return null;
+        }
         $grouping = ($grouping > 0 ) ? 2 : 0;
         $data = [];
         $this->processQuery($data, "", "related", $url, $results_per_page,
ViewGit