More improvements to SERP favicon handling, ac=chris

Chris Pollett [2022-07-10 18:Jul:th]
More improvements to SERP favicon handling, ac=chris
Filename
src/controllers/SearchController.php
src/index.php
src/views/elements/SearchElement.php
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index f5fd719da..a87de871c 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -1845,12 +1845,30 @@ EOD;
             true : false;
         $hash_key = L\crawlHash(
             $terms . $url . serialize($ui_flags) . serialize($crawl_time));
-        if (!empty($_SERVER["USE_CACHE"])) {
+        if (!empty($_SERVER["USE_CACHE"]) && $ui_flags['field'] != 'favicon') {
             if ($new_doc = $cache->get($hash_key)) {
                 echo $new_doc;
                 return;
             }
         }
+        if (!empty($ui_flags['field']) && $ui_flags['field'] == 'favicon') {
+            $this->web_site->header("Cache-control: public");
+            $expires = gmdate( "D, d M Y H:i:s", time() +
+                intval(C\RESOURCE_CACHE_TIME))." GMT";
+            $this->web_site->header("Expires: $expires");
+            $etag = $crawl_time;
+            $this->web_site->header("Etag: $etag");
+            $modified_since = (isset( $_SERVER["HTTP_IF_MODIFIED_SINCE"]))
+                ? strtotime($_SERVER["HTTP_IF_MODIFIED_SINCE"]) : false;
+            $etag_header = (isset($_SERVER["HTTP_IF_NONE_MATCH"])) ?
+                trim($_SERVER["HTTP_IF_NONE_MATCH"]) : false;
+            //very crude etag
+            if ($modified_since === $crawl_time ||
+                $etag === $etag_header) {
+                $this->web_site->header("HTTP/1.1 304 Not Modified");
+                return true;
+            }
+        }
         $queue_servers = $this->model("machine")->getQueueServerUrls();
         if ($crawl_time == 0) {
             $crawl_time = $crawl_model->getCurrentIndexDatabaseName();
@@ -2073,9 +2091,7 @@ EOD;
                     ob_end_clean();
                 }
             }
-            if (!empty($image_type)) {
-                $this->web_site->header("Content-Type: $image_type");
-            }
+            $this->web_site->header("Content-Type: $image_type");
         }
         echo $crawl_item[$request_field] ?? "";
         return;
diff --git a/src/index.php b/src/index.php
index 531ec4559..307f054cf 100644
--- a/src/index.php
+++ b/src/index.php
@@ -251,6 +251,7 @@ function configureRewrites($web_site)
         'user' => 'routeFeeds',
         'user_messages' => 'routeUserMessages',
         's' => "routeSubsearch",
+        'serp_icon' => 'routeSerpIcon',
         'suggest' => 'routeSuggest',
         'p' => 'routeWiki'
     ];
@@ -657,7 +658,6 @@ function controllerUrl($name, $with_delim = false)
     $base_url = C\SHORT_BASE_URL;
     if (C\REDIRECTS_ON) {
         $delim = ($with_delim) ? "?" : "";
-        $_REQUEST['route']['c'] = true;
         return $base_url . $name . $delim;
     } else {
         $delim = ($with_delim) ? "&" : "";
@@ -707,6 +707,47 @@ function subsearchUrl($name, $with_delim = false)
         return "$base_url?s=$name$delim";
     }
 }
+/**
+ * Used to route requests for favicons for pages in search results
+ *
+ * @param array $route_args of url parts (split on slash).
+ * @return bool whether was able to compute a route or not
+ */
+function routeSerpIcon($route_args)
+{
+    $_REQUEST['c'] = "search";
+    $_REQUEST['a'] = "cache";
+    $_REQUEST['field'] = "favicon";
+    $_REQUEST['arg'] = L\webdecode($route_args[2] ?? "");
+    $_REQUEST['its'] = $route_args[1] ?? 0;
+    $_REQUEST['route']['c'] = true;
+    $_REQUEST['route']['a'] = true;
+    $_REQUEST['route']['arg'] = true;
+    $_REQUEST['route']['field'] = true;
+    $_REQUEST['route']['its'] = true;
+    return true;
+}
+/**
+ * Return the url to repquest the favicon for a page in the search resutls,
+ * making use of the defined variable REDIRECTS_ON.
+ *
+ * @param bool $with_delim whether it should be terminated with nothing or
+ *      ? or &
+ * @return string url for the page in question
+ */
+function serpIconUrl($url, $crawl_time, $with_delim = false)
+{
+    $base_url = C\SHORT_BASE_URL;
+    if (C\REDIRECTS_ON) {
+        $delim = ($with_delim) ? "?" : "";
+        return $base_url . "serp_icon/$crawl_time/" . L\webencode($url) .
+            "$delim";
+    } else {
+        $delim = ($with_delim) ? "&" : "";
+        return "$base_url?c=search&a=cache&field=favicon&its=$crawl_time&".
+            "arg=" . urlencode($url) . $delim;
+    }
+}
 /**
  * Used to route requests for the suggest-a-url link on the tools page.
  * If redirects on, then /suggest routes to this suggest-a-url page.
@@ -732,8 +773,6 @@ function suggestUrl($with_delim = false)
 {
     $base_url = C\SHORT_BASE_URL;
     if (C\REDIRECTS_ON) {
-        $_REQUEST['route']['c'] = true;
-        $_REQUEST['route']['a'] = true;
         $delim = ($with_delim) ? "?" : "";
         return $base_url ."suggest$delim";
     } else {
diff --git a/src/views/elements/SearchElement.php b/src/views/elements/SearchElement.php
index d41bb5826..7b30b10de 100644
--- a/src/views/elements/SearchElement.php
+++ b/src/views/elements/SearchElement.php
@@ -227,10 +227,8 @@ class SearchElement extends Element implements CrawlConstants
             ?><div class='result'>
             <div class="result-details"><?php
             if (!empty($page[self::FAVICON_URL])) {
-                ?><img class="result-fav" src="<?=$base_query .
-                "&amp;a=cache&amp;field=favicon&amp;arg=" . urlencode(
-                    $page[self::FAVICON_URL]). "&amp;its=".
-                    $page[self::CRAWL_TIME]
+                ?><img class="result-fav" src="<?=B\serpIconUrl(
+                    $page[self::FAVICON_URL], $page[self::CRAWL_TIME]);
                 ?>" /><?php
             }?>
             <span class="echo-link" <?=$subtitle ?>><?=
ViewGit