Last commit for src/library/FileCache.php: 2addb500315b7393a90fe66431d7832b1e7386c7

Adjust copyrights years

Chris Pollett [2024-01-03 21:Jan:rd]
Adjust copyrights years
<?php
/**
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 * Copyright (C) 2009 - 2023  Chris Pollett chris@pollett.org
 *
 * LICENSE:
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * END LICENSE
 *
 * @author Chris Pollett chris@pollett.org
 * @license https://www.gnu.org/licenses/ GPL3
 * @link https://www.seekquarry.com/
 * @copyright 2009 - 2023
 * @filesource
 */
namespace seekquarry\yioop\library;

use seekquarry\yioop\configs as C;
use seekquarry\yioop\models\datasources as D;

/** For Yioop global defines */
require_once __DIR__ . "/../configs/Config.php";
/**
 * Library of functions used to implement a simple file cache
 *
 * @author Chris Pollett
 */
class FileCache
{
    /**
     * File used to serve last cache request
     * @var string
     */
    public $cache_file;
    /**
     * Folder name to use for this FileCache
     * @var string
     */
    public $dir_name;
    /**
     * Total number of bins to cycle between
     */
    const NUMBER_OF_BINS = 50;
    /**
     * Maximum number of files in a bin
     */
    const MAX_FILES_IN_A_BIN = 5000;
    /**
     * Creates the directory for the file cache, sets how frequently
     * all items in the cache expire
     *
     * @param string $dir_name folder name of where to put the file cache
     * @param WebSite an optional object that might be used to serve webpages
     *      when Yioop run in CLI mode. This object has fileGetContents and
     *      filePutContents methods which allow RAM caching of files.
     */
    public function __construct($dir_name, $web_site = null)
    {
        $this->dir_name = $dir_name;
        if (!is_dir($this->dir_name)) {
            mkdir($this->dir_name);
            chmod($this->dir_name, 0777);
            $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS) . "Manager";
            $db = new $db_class();
            $db->setWorldPermissionsRecursive($this->dir_name, true);
        }
        $this->web_site = $web_site;
    }
    /**
     * Either a wrapper for file_get_contents, or if a WebSite object is being
     * used to serve pages, it reads it in using blocking I/O
     * file_get_contents() and caches it before return its string contents.
     * Note this function assumes that only the web server is performing I/O
     * with this file. filemtime() can be used to see if a file on disk has been
     * changed and then you can use $force_read = true below to force re-
     * reading the file into the cache
     *
     * @param string $filename name of file to get contents of
     * @param bool $force_read whether to force the file to be read from
     *      persistent storage rather than the cache
     * @return string contents of the file given by $filename
     */
    public function fileGetContents($filename, $force_read = false)
    {
        if (!empty($this->web_site)) {
            return $this->web_site->fileGetContents($filename, $force_read);
        }
        return file_get_contents($filename);
    }
    /**
     * Either a wrapper for file_put_contents, or if a WebSite object is being
     * used to serve pages, writes $data to the persistent file with name
     * $filename. Saves a copy in the RAM cache if there is a copy already
     * there.
     *
     * @param string $filename name of file to write to persistent storages
     * @param string $data string of data to store in file
     */
    public function filePutContents($filename, $data)
    {
        if (!empty($this->web_site)) {
            return $this->web_site->filePutContents($filename, $data);
        }
        return file_put_contents($filename, $data);
    }
    /**
     * Retrieve data associated with a key that has been put in the cache
     *
     * @param string $key the key to look up
     * @return mixed the data associated with the key if it exists, false
     *     otherwise
     */
    public function get($key)
    {
        $checksum_block = $this->checksum($key);
        $checksum_dir = $this->dir_name . "/$checksum_block";
        $this->cache_file = $checksum_dir . "/c" . webencode($key);
        if (file_exists($this->cache_file)) {
            $this->updateCache($key);
            return unserialize($this->fileGetContents($this->cache_file));
        }
        return false;
    }
    /**
     * Stores in the file cache a key-value pair
     *
     * @param string $key to associate with value
     * @param mixed $value to store
     */
    public function set($key, $value)
    {
        $checksum_block = $this->checksum($key);
        $checksum_dir = $this->dir_name . "/$checksum_block";
        if (!file_exists($checksum_dir)) {
            mkdir($checksum_dir);
            chmod($checksum_dir, 0777);
        }
        $cache_file = "$checksum_dir/c".webencode($key);
        $this->updateCache($key);
        $this->filePutContents($cache_file, serialize($value));
    }
    /**
     * Makes a 0 - self::NUMBER_OF_BINS value out of the provided key
     *
     * @param string $key to convert to a random value between
     *     0 - self::NUMBER_OF_BINS
     * @return int value between 0 and self::NUMBER_OF_BINS
     */
    public function checksum($key)
    {
        $len = strlen($key);
        $value = 0;
        for ($i = 0; $i < $len; $i++) {
            $value += ord($key[$i]);
        }
        return ($value % self::NUMBER_OF_BINS);
    }
    /**
     * Deletes cache key value files and ram copies of key values stored in the
     * this file cache
     */
    public function clear()
    {
        if (!empty($this->web_site)) {
            $this->web_site->clearFileCache();
        }
        if (is_dir($this->dir_name)) {
            $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS)."Manager";
            $db = new $db_class();
            $db->unlinkRecursive($this->dir_name, false);
        }
    }
    /**
     * Used to mark a cache item, and keep track of rounds according to the
     * marker algorithm. This function determine if the cache is too full
     * and if so eject an item.
     *
     * @param string $key that was just read from or written to. Might need
     *  to be marked according to Marker algorithm
     */
    protected function updateCache($key)
    {
        $checksum_block = $this->checksum($key);
        $checksum_dir = $this->dir_name . "/$checksum_block";
        $marker_file = "$checksum_dir/cache_markers.txt";
        if (!file_exists($checksum_dir)) {
            mkdir($checksum_dir);
            chmod($checksum_dir, 0777);
        }
        if (file_exists($marker_file)) {
            $data = unserialize($this->fileGetContents($marker_file));
        } else {
            $data = [];
        }
        if (empty($data['MARKED'])) {
            $data['MARKED'] = [];
        }
        if (empty($data['UNMARKED'])) {
            $data['UNMARKED'] = [];
        }
        $now = time();
        if (empty($data['TIME'])) {
            $data['TIME'] = $now;
        }
        if (empty($data['UNMARKED'][$key]) && empty($data['MARKED'][$key])) {
            $data['UNMARKED'][$key] = true;
        }
        if (!empty($data['UNMARKED'][$key])) {
            $data['MARKED'][$key] = true;
            unset($data['UNMARKED'][$key]);
        }
        $num_marked = count($data['MARKED']);
        if ($num_marked > self::MAX_FILES_IN_A_BIN) {
            $data['UNMARKED'] = array_merge($data['UNMARKED'], $data['MARKED']);
            $data['MARKED'] = [];
        }
        $num_unmarked = count($data['UNMARKED']);
        $total_in_cache = $num_marked + $num_unmarked;
        if ($total_in_cache > self::MAX_FILES_IN_A_BIN) {
            $num_delete = $total_in_cache - self::MAX_FILES_IN_A_BIN;
            $num_unmarked_delete = min($num_unmarked, $num_delete);
            for ($i = 0; $i < $num_unmarked_delete; $i++) {
                $keys = array_keys($data['UNMARKED']);
                $eject_key = mt_rand(0, $num_unmarked - 1);
                unset($data['UNMARKED'][$eject_key]);
                $num_unmarked--;
                $delete_file = $checksum_dir . "/" . webencode($eject_key);
                if (file_exists($delete_file)) {
                    unlink($delete_file);
                }
            }
            if ($now - $data["TIME"] > C\MIN_QUERY_CACHE_TIME) {
                $in_cache_files = array_flip(glob($checksum_dir . "/c*"));
                $keys = array_keys($data['UNMARKED']);
                foreach ($keys as $check_key) {
                    $check_file = $checksum_dir . "/" . webencode($check_key);
                    if (isset($in_cache_files[$check_file])) {
                        unset($in_cache_files[$check_file]);
                    }
                }
                foreach ($in_cache_files as $to_delete => $num) {
                    unlink($to_delete);
                }
                $data["TIME"] = $now;
            }
        }
        $this->filePutContents($marker_file, serialize($data));
    }
}
ViewGit