Tighter bot checking on search queries
Tighter bot checking on search queries
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index 7cba5ab9d..3403e4401 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -967,15 +967,34 @@ class SearchController extends Controller implements CrawlConstants
*/
public function restrictQueryByUserAgent($query)
{
- $bots = ["googlebot", "baidu", "naver", "sogou"];
- $query_okay = true;
- foreach ($bots as $bot) {
- if (isset($_SERVER["HTTP_USER_AGENT"]) &&
- stristr($_SERVER["HTTP_USER_AGENT"], $bot)) {
- $query_okay = false;
+ if (empty($query)) {
+ return $query;
+ }
+ $user_agent = strtolower(
+ ($_SERVER["HTTP_USER_AGENT"] ?? "") . " " .
+ ($_SERVER["SERVER_PROTOCOL"] ?? "") . " " .
+ ($_REQUEST["f"] ?? "")
+ );
+ $parameter_checks = [
+ [["googlebot", "baidu", "naver", "sogou", "yisouspider"], 0],
+ [["windows", "apple", "linux"], 1],
+ [["api", "http/1.0", "http/1.1"], 1],
+ [["chrome", "safari", "edge", "firefox"], 1],
+ ];
+ foreach ($parameter_checks as $parameter_check) {
+ $check_count = 0;
+ list($checks, $max_count) = $parameter_check;
+ foreach ($checks as $check) {
+ if (str_contains($user_agent, $check)) {
+ $check_count++;
+ }
+ if ($check_count > $max_count) {
+ include(C\BASE_DIR . "/error.php");
+ \seekquarry\yioop\library\webExit();
+ }
}
}
- return ($query_okay) ? $query : "";
+ return $query;
}
/**
* Prepares the array $data so the SearchView can draw search results
@@ -1856,7 +1875,9 @@ EOD;
public function relatedRequest($url, $results_per_page, $limit = 0,
$crawl_time = 0, $grouping = 0, $save_timestamp = 0)
{
- if (!C\API_ACCESS) {return null; }
+ if (!C\API_ACCESS) {
+ return null;
+ }
$grouping = ($grouping > 0 ) ? 2 : 0;
$data = [];
$this->processQuery($data, "", "related", $url, $results_per_page,