Improve query caching, a=chris

Chris Pollett [2015-12-12 17:Dec:th]
Improve query caching, a=chris
Filename
src/configs/Config.php
src/library/FileCache.php
src/models/PhraseModel.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index f8d6dd5b5..4a279634b 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -542,7 +542,7 @@ nsdefine('URL_FILTER_SIZE', MEMORY_PROFILE * 5000000);
  */
 nsdefine('NUM_URLS_QUEUE_RAM', MEMORY_PROFILE * 80000);
 /** number of documents before next gen */
-nsdefine('NUM_DOCS_PER_GENERATION', MEMORY_PROFILE *10000);
+nsdefine('NUM_DOCS_PER_GENERATION', MEMORY_PROFILE * 10000);
 /** precision to round floating points document scores */
 nsdefine('PRECISION', 10);
 /** maximum number of links to extract from a page on an initial pass*/
@@ -688,11 +688,16 @@ nsdefine ('NUM_RESULTS_PER_PAGE', 10);
 /** Number of recently crawled urls to display on admin screen */
 nsdefine ('NUM_RECENT_URLS_TO_DISPLAY', 10);
 /** Maximum time a set of results can stay in query cache before it is
-    invalidated */
-nsdefine ('MAX_QUERY_CACHE_TIME', 2 * ONE_DAY); //two days
+    invalidated. If negative, then never use time to kick something out of
+    cache. */
+if(!nsdefined("MAX_QUERY_CACHE_TIME")) {
+    nsdefine ('MAX_QUERY_CACHE_TIME', 2 * ONE_DAY); //two days
+}
 /** Minimum time a set of results can stay in query cache before it is
-    invalidated */
-nsdefine ('MIN_QUERY_CACHE_TIME', ONE_HOUR); //one hour
+    invalidated (used for active crawl or feed results) */
+if(!nsdefined("MIN_QUERY_CACHE_TIME")) {
+    nsdefine ('MIN_QUERY_CACHE_TIME', ONE_HOUR); //one hour
+}
 /**
  * Default number of items to page through for users,roles, mixes, etc
  * on the admin screens
diff --git a/src/library/FileCache.php b/src/library/FileCache.php
index 2fc7844b6..78848f808 100644
--- a/src/library/FileCache.php
+++ b/src/library/FileCache.php
@@ -53,7 +53,7 @@ class FileCache
     /**
      * Maximum number of files in a bin
      */
-    const MAX_FILES_IN_A_BIN = 1000;
+    const MAX_FILES_IN_A_BIN = 10000;
     /**
      * Creates the directory for the file cache, sets how frequently
      * all items in the cache expire
@@ -111,17 +111,19 @@ class FileCache
         if (!isset($data['last_expired'])) {
             $data = ['last_expired' => time(), 'count' => 0];
         }
-        if ((time() - $data['last_expired'] > C\MIN_QUERY_CACHE_TIME &&
-            $data['count'] > self::MAX_FILES_IN_A_BIN) ||
-            $data['count'] > 10 * self::MAX_FILES_IN_A_BIN) {
-            $db_class = ucfirst(C\DBMS)."Manager";
+        if ($data['count'] > self::MAX_FILES_IN_A_BIN) {
+            $db_class = ucfirst(C\DBMS) . "Manager";
             $db = new $db_class();
             $db->unlinkRecursive($checksum_dir);
         }
-        $data['count']++;
         if (!file_exists($checksum_dir)) {
             mkdir($checksum_dir);
-            $data['last_expired'] = time();
+            $data['last_expired'] = time(); /* currently count is used rather
+                than time, but we store time anyway.
+                */
+        }
+        if (!file_exists($cache_file)) {
+            $data['count']++;
         }
         file_put_contents("$checksum_dir/last_expired.txt",
             serialize($data));
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index fe3f4fc66..8be310a1f 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -1104,7 +1104,8 @@ class PhraseModel extends ParallelModel
                 } else {
                     $cached_time = $time;
                 }
-                if ($cached_time > C\MAX_QUERY_CACHE_TIME) {
+                if (C\MAX_QUERY_CACHE_TIME > 0 &&
+                    $cached_time > C\MAX_QUERY_CACHE_TIME) {
                     $results = false;
                 }
                 if (isset($results['PAGES'])) {
ViewGit