<?php /** * SeekQuarry/Yioop -- * Open Source Pure PHP Search Engine, Crawler, and Indexer * * Copyright (C) 2009 - 2021 Chris Pollett chris@pollett.org * * LICENSE: * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * END LICENSE * * @author Chris Pollett chris@pollett.org * @license https://www.gnu.org/licenses/ GPL3 * @link https://www.seekquarry.com/ * @copyright 2009 - 2021 * @filesource */ namespace seekquarry\yioop\library; use seekquarry\yioop\configs as C; /** * Loads crawlLog functions if needed */ require_once __DIR__ . "/Utility.php"; /** * * * @author Chris Pollett */ class LinearHashTable { /** * */ const ACTIVE_INDEX = "active"; /** * Compression strategy used to compress blob and serial columns */ const DEFAULT_COMPRESSOR = C\NS_COMPRESSORS . "NonCompressor"; /** * Default parameters to use when constructing a LinearHashTable */ const DEFAULT_PARAMETERS = ["COMPRESSOR" => self::DEFAULT_COMPRESSOR, "COUNT" => 0, "PARTITION_SIZE_THRESHOLD" => self::PARTITION_SIZE_THRESHOLD, "FORMAT" => ["PRIMARY KEY" => "KEY", "VALUE" => "BLOB"], "MAX_ITEMS_PER_FILE" => self::MAX_ITEMS_PER_FILE, "SAVE_PARTITION" => 0, "ACTIVE_COUNT" => 0 ]; /** * */ const HASH_INDEX_EXTENSION = ".hix"; /** * */ const INDEX_CACHE_SIZE = 100; /** * */ const KEY_PREFIX = "key_"; /** * Default maximum number of records to store in a hash table partition */ const MAX_ITEMS_PER_FILE = 16384; /** * File name of file used to store the parameters of this * LinearHashTable */ const PARAMETERS_FILE = "lht_parameters.txt"; /** * Prefix to file names of LinearHashTable partition files */ const PARTITION_PREFIX = "partition_"; /** * Maximum number of bytes a partition can have before a split. * Notice this implies a maximum file size to store * in BLOB columns */ const PARTITION_SIZE_THRESHOLD = 2147483648; /** * */ public $active_index; /** * */ public $add_archive_cache = [null, "", -1]; /** * */ public $add_key_archive_cache = [null, "", -1]; /** * */ public $add_index_cache = ["", [], -1]; /** * */ public $get_archive_cache = [null, "", -1]; /** * Array of column names for the columns in a BPlusTree which * are of type BLOB or SERIAL * @var array */ public $blob_columns; /** * The seekquarry\yioop\library\compressors\Compressor object used to * compress record files and blob items. * @var object */ public $compressor; /** * Folder path where the LinearHashTable is stored * @var string */ public $folder; /** * */ public $index_buffer; /** * */ public $instance_time; /** * Name of primary key column for records * @var string */ public $key_field; /** * Stores the constructor parameters used to create this * LinearHashTable * @var array */ public $parameters; /** * Array of column names for the columns in a PartitionDocumentBundle which * are of type SERIAL * @var array */ public $serial_columns; /** * The PackedTableTools object used to pack and unpack records in * LinearHashTable * @var object */ public $table_tools; /** * */ public function __construct($folder, $format = self::DEFAULT_PARAMETERS["FORMAT"], $max_items_per_file = self::MAX_ITEMS_PER_FILE, $partition_size_threshold = self::PARTITION_SIZE_THRESHOLD, $compressor_type = self::DEFAULT_COMPRESSOR) { $initial_parameters = self::DEFAULT_PARAMETERS; $initial_parameters["PARTITION_SIZE_THRESHOLD"] = $partition_size_threshold; $initial_parameters["MAX_ITEMS_PER_FILE"] = $max_items_per_file; $initial_parameters["COMPRESSOR"] = $compressor_type; $this->compressor = new $compressor_type(); $initial_parameters["FORMAT"] = $format; $this->instance_time = hrtime(true); $this->folder = $folder; $folder_paths = [$folder]; $changed_parameters = false; foreach ($folder_paths as $folder_path) { if (!file_exists($folder_path)) { $changed_parameters = true; if(!mkdir($folder_path)) { return null; } } } $this->parameters = self::getParameterInfo($folder); foreach (self::DEFAULT_PARAMETERS as $field => $value) { if (!isset($this->parameters[$field])) { $this->parameters[$field] = $initial_parameters[$field]; $changed_parameters = true; } } $format = $this->parameters["FORMAT"]; $packed_table_format = []; $this->blob_columns = []; $this->serial_columns = []; $this->index_buffer = []; $this->key_field = null; foreach ($format as $field_name => $type) { $upper_field_name = strtoupper($field_name); $upper_type = strtoupper($type); if ($upper_field_name == "PRIMARY KEY") { $packed_table_format["PRIMARY KEY"] = $type; $this->key_field = $type; } else if (in_array($upper_type, ["BLOB", "SERIAL"])) { $this->blob_columns[] = $field_name; if ($upper_type == "SERIAL") { $this->serial_columns[] = $field_name; } $packed_table_format[$field_name] = "INT"; } else if (isset(PackedTableTools::TYPE_SYNONYMS[$upper_type])) { $packed_table_format[$field_name] = PackedTableTools::TYPE_SYNONYMS[$upper_type]; } else { return null; } } if (empty($this->key_field)) { return null; } if (!empty($this->blob_columns)) { $packed_table_format["LAST_BLOB_LEN"] = "INT"; $packed_table_format["PARTITION"] = "INT"; } $this->table_tools = new PackedTableTools($packed_table_format); if ($changed_parameters) { $this->saveParameters(); } } /** * */ public function get($key, $fields = [], $is_hash_key = false) { $hash_key = ($is_hash_key) ? $key : $this->hashKey($key); $table_tools = $this->table_tools; $active_info = $this->getIndexInfo(self::ACTIVE_INDEX); $index_data_raw = false; if (!empty($active_info) && !empty($active_info[1])) { list($index_path, $index) = $active_info; $index_data_raw = $table_tools->find($index, $hash_key); } if (empty($index_data_raw)) { $index_info = $this->getIndexInfo($hash_key); if (empty($index_info) || empty($index_info[1])) { return false; } list($index_path, $index) = $index_info; $index_data_raw = $table_tools->find($index, $hash_key); } if (empty($index_data_raw)) { return false; } if (!empty($fields)) { if (isset($fields[0]) && $fields == array_values($fields)) { $fields = array_combine($fields, $fields); } } $index_data = $table_tools->unpack($index_data_raw)[0]; if (!empty($this->blob_columns)) { $num_blob_columns = count($this->blob_columns); $offset = intval($index_data[$this->blob_columns[0]]); $partition = $index_data["PARTITION"]; $partition_filename = $this->getPartition($partition); for ($i = 0; $i < $num_blob_columns; $i++) { $column_name = $this->blob_columns[$i]; $len = ($i + 1 < $num_blob_columns) ? intval($index_data[$this->blob_columns[$i + 1]]) : $index_data["LAST_BLOB_LEN"]; if (empty($fields) || isset($fields[$column_name])) { $index_data[$column_name] = ($len == 0) ? "" : $this->getArchive($partition_filename, $offset, $len); } $offset += $len; } unset($index_data["LAST_BLOB_LEN"], $index_data["PARTITION"]); } if (empty($fields)) { foreach ($this->serial_columns as $field_name) { $index_data[$field_name] = unserialize( $index_data[$field_name]); } $index_data[$this->key_field] = $key; $out_data = $index_data; } else { $out_data = []; if (isset($fields[$this->key_field])) { $out_data[$fields[$this->key_field]] = $key; unset($fields[$this->key_field]); } foreach ($fields as $in_name => $out_name) { if (isset($index_data[$in_name])) { if (in_array($in_name, $this->serial_columns)) { $out_data[$out_name] = unserialize( $index_data[$in_name]); } else { $out_data[$out_name] = $index_data[$in_name]; } } } } return $out_data; } /** * */ public function getArchive($archive_filename, $offset, $len) { list($fh, $previous_archive_filename, $previous_instance_time) = $this->get_archive_cache; $compressor = $this->compressor; if (!$fh || $previous_archive_filename != $archive_filename || $previous_instance_time != $this->instance_time) { if ($fh) { fclose($fh); } $fh = fopen($archive_filename , "r"); $previous_archive_filename = $archive_filename; $previous_instance_time = $this->instance_time; } $value = false; if (fseek($fh, $offset) == 0) { $compressed_file = fread($fh, $len); $value = $compressor->uncompress($compressed_file); } $this->get_archive_cache = [$fh, $previous_archive_filename, $previous_instance_time]; return $value; } /** * */ public function getPartition($i) { return $this->folder . "/" . self::PARTITION_PREFIX . $i . $this->compressor->fileExtension(); } /** * */ public function getKeyPartition($i) { return $this->folder . "/" . self::KEY_PREFIX . $i . self::HASH_INDEX_EXTENSION; } /** * */ public function getActiveIndex() { return $this->folder . "/" . self::ACTIVE_INDEX . self::HASH_INDEX_EXTENSION; } /** * Checks if a key exists in the linear hash table * @param string $key key to check * @param bool $compute_hash whether the key has already had the linear * hash table's hash function applied or not * @param bool $check_active whether to check the active index of * buffered key values that have not yet been put into the main table * @return bool whether the $key exists in the linear hash table */ public function exists($key, $compute_hash = true, $check_active = true) { if ($compute_hash) { $hash_key = $this->hashKey($key); } else { $hash_key = $key; } $table_tools = $this->table_tools; if ($check_active) { $active_info = $this->getIndexInfo(self::ACTIVE_INDEX); if (!empty($active_info) && !empty($active_info[1])) { list($index_path, $index) = $active_info; $row = $table_tools->find($index, $hash_key); if ($row != null) { return true; } } } list($hash_path, $index) = $this->getIndexInfo($hash_key); if ($index) { $row = $table_tools->find($index, $hash_key); if ($row != null) { return true; } } return false; } /** * * @param array $row_or_rows * @param bool $is_hash_key * @param bool $allow_duplicates */ public function put($row_or_rows, $is_hash_key = false, $allow_duplicates = true) { $rows = (empty($row_or_rows[$this->key_field])) ? $row_or_rows : [$row_or_rows]; $table_tools = $this->table_tools; $num_rows = count($rows); $i = 0; foreach ($rows as $row) { crawlTimeoutLog("..still adding pages linear hash table. Added %s" . " of %s.", $i, $num_rows); $key = $row[$this->key_field] ?? false; if ($key === false) { crawlLog("LinearHashtable Put Failed A"); return false; } unset($row[$this->key_field]); $value = $row; $hash_key = ($is_hash_key) ? $key : $this->hashKey($key); if (!$allow_duplicates && $this->exists($hash_key, false)) { crawlLog("LinearHashtable Put Failed B"); return false; } $save_partition = $this->parameters["SAVE_PARTITION"]; $save_partition_name = $this->getPartition($save_partition); $active_index_name = $this->getActiveIndex(); if (($this->parameters['ACTIVE_COUNT'] > $this->parameters["MAX_ITEMS_PER_FILE"]) || ( file_exists($save_partition_name) && filesize($save_partition_name) > $this->parameters["PARTITION_SIZE_THRESHOLD"])) { $this->advanceSavePartition(); } $blob_columns = $this->blob_columns ?? []; $serial_columns = $this->serial_columns ?? []; $format = $this->parameters["FORMAT"]; unset($format['PRIMARY KEY']); $format_keys = array_keys($format); $out_value = []; foreach ($format_keys as $format_key) { $out_value[$format_key] = $value[$format_key] ?? null; } if (!empty($blob_columns)) { $old_offset = 0; $first = true; foreach ($blob_columns as $blob_column) { $blob = $out_value[$blob_column] ?? ""; if (in_array($blob_column, $serial_columns)) { $blob = serialize($blob); } if (($add_info = $this->addArchive($blob)) === false) { crawlLog("LinearHashtable Put Failed C"); return false; } list($offset, $len, $partition) = $add_info; if ($first) { $this->addKeyArchive($key); $first = false; } $out_value[$blob_column] = $offset - $old_offset; $old_offset = $offset; } $out_value["LAST_BLOB_LEN"] = $len; $out_value["PARTITION"] = $partition; } $out_value = $table_tools->pack($out_value); if (empty($this->save_partition_handle) || !is_resource( $this->save_partition_handle)) { $this->save_partition_handle = fopen($active_index_name, "c+"); fseek($this->save_partition_handle, 0, SEEK_END); } $this->active_index = []; /* the above forces getIndexInfo to reload active index on a look up */ if ($table_tools->add($this->save_partition_handle, $hash_key, $out_value, PackedTableTools::ADD_FILE_HANDLE)) { $this->parameters['ACTIVE_COUNT']++; } else { crawlLog("LinearHashtable Put Failed D"); return false; } $i++; } $this->saveParameters(); return true; } /** * */ public function advanceSavePartition($new_save_partition = 0) { $save_partition = $this->parameters["SAVE_PARTITION"]; $new_save_partition = (($new_save_partition) > 0) ? $new_save_partition : $save_partition + 1; if ($new_save_partition <= $save_partition) { return false; } $active_index_name = $this->getActiveIndex(); $active_index_info = $this->getIndexInfo(self::ACTIVE_INDEX); if (!empty($active_index_info) && !empty($active_index_info[1])) { list(, $active_index) = $active_index_info; foreach ($active_index as $active_key => $active_row) { $this->putIndex($active_key, $active_row, true, 0, true); } $this->addIndex("", ""); } if (!empty($this->save_partition_handle) && is_resource($this->save_partition_handle)) { fclose($this->save_partition_handle); } if (file_exists($active_index_name)) { unlink($active_index_name); } $save_partition = $new_save_partition; $this->parameters["SAVE_PARTITION"] = $save_partition; $this->parameters['ACTIVE_COUNT'] = 0; return true; } /** * */ public function delete($key_or_keys, $is_hash_key = false) { $keys = (is_array($key_or_keys)) ? $key_or_keys : [$key_or_keys]; $table_tools = $this->table_tools; foreach ($keys as $key) { $hash_key = ($is_hash_key) ? $key : $this->hashKey($key); $active_info = $this->getIndexInfo(self::ACTIVE_INDEX); $is_active = false; $index_data_raw = false; $found = false; if (!empty($active_info) && !empty($active_info[1])) { list($hash_path, $index) = $active_info; $index_data_row = $table_tools->find($index, $hash_key); } if (!empty($index_data_row)) { if (!empty($this->save_partition_handle) && is_resource($this->save_partition_handle)) { fclose($this->save_partition_handle); } $is_active = true; } else { $index_info = $this->getIndexInfo($hash_key); if (empty($index_info) || empty($index_info[1])) { return false; } list($hash_path, $index) = $index_info; if ($index) { $index_data_row = $table_tools->find($index, $hash_key); } if (empty($index_data_row)) { return false; } } $table_tools->delete($index, $hash_key); if (empty($index)) { $this->unlinkHashPath($hash_path); } else { $table_tools->save($hash_path . self::HASH_INDEX_EXTENSION, $index); } if ($is_active) { $this->active_index = $index; $this->parameters["SAVE_PARTITION"]--; } else { $this->index_buffer[$hash_path] = $index; $max_items_per_file = $this->parameters["MAX_ITEMS_PER_FILE"]; if ($this->checkSplitMerge( $this->parameters['COUNT'] - 1) < 0) { $this->mergeMigrate(); } $this->parameters['COUNT']--; } } $this->saveParameters(); return true; } /** * */ public function hashKey($key) { return md5($key, true); } /** * */ public function getHashPath($hash_key, $count = -1, $max_items_per_file = -1, $mkdir_if_not_exists = false) { static $old_hash_key = null; static $old_count = -1; static $old_max_items_per_file = -1; static $old_mkdir_if_not_exists = false; static $old_path = ""; if ($old_hash_key == $hash_key && $old_count == $count && $old_max_items_per_file == $max_items_per_file && (!$mkdir_if_not_exists || $old_mkdir_if_not_exists == $mkdir_if_not_exists)) { return $old_path; } $old_hash_key = $hash_key; $old_count = $count; $old_max_items_per_file = $max_items_per_file; $old_mkdir_if_not_exists = $mkdir_if_not_exists; $pack_hash = str_pad(substr($hash_key, 0, 8), 8, "\0", STR_PAD_LEFT); $hash_int = unpack("J", $pack_hash)[1]; $count = ($count == -1) ? $this->parameters['COUNT'] : $count; $max_items_per_file = max(1, ($max_items_per_file == -1) ? $this->parameters['MAX_ITEMS_PER_FILE'] : $max_items_per_file); list($num_files, $max_num_bits, $pow_max, $threshold) = $this->bitStatistics($count, $max_items_per_file); $masked_int = ($hash_int & ($pow_max - 1)); $num_bits = (($masked_int < $threshold)) ? $max_num_bits : $max_num_bits - 1; $mask = ($num_bits == 0) ? 0 : (1 << ($num_bits)) - 1; $init_prefix = ""; if ($num_files == 1 && $max_items_per_file != 1) { $init_prefix = "z"; } $prefix = $this->folder ?? "."; for ($i = 56, $j = 0; $i > 0; $i -= 8, $j++) { if ($num_bits > $i) { $prefix .= "/" . ord($pack_hash[$j]); if ($mkdir_if_not_exists && !file_exists($prefix) ) { mkdir($prefix); } } } $old_path = $prefix . "/" . $init_prefix . sprintf("%'.0{$num_bits}b",($hash_int & $mask)); return $old_path; } /** * */ public function bitStatistics($count, $max_items_per_file) { $num_files = max(1, ceil($count/$max_items_per_file)); $max_num_bits = ceil(log($num_files + 1, 2)); $pow_max = 1 << ($max_num_bits - 1); $threshold = $num_files - $pow_max; return [$num_files, $max_num_bits, $pow_max, $threshold]; } /** * Creates a new counter $field to be maintained * * @param string $field field of info struct to add a counter for */ public function initCountIfNotExists($field = "COUNT") { $this->parameters[$field] ??= 0; $this->saveParameters(); } /** * Add $num to maintained counter $field * * @param int $num number of items to add to current count * @param string $field field of info struct to add to the count of */ public function addCount($num, $field = "COUNT") { $this->parameters[$field] ??= 0; $this->parameters[$field] += $num; $this->saveParameters(); } /** * */ public function saveParameters() { $parameter_path = $this->folder . "/" . self::PARAMETERS_FILE; file_put_contents($parameter_path, serialize($this->parameters), LOCK_EX); } /** * */ public static function getParameterInfo($folder) { $parameter_path = $folder . "/" . self::PARAMETERS_FILE; if(file_exists($parameter_path)) { return unserialize(file_get_contents($parameter_path)); } else { return []; } } /** * */ protected function putIndex($key, $value, $is_hash_key = false, $change_count = 0, $bulk_insert = false) { $hash_key = ($is_hash_key) ? $key : $this->hashKey($key); $split = false; if ($change_count == 0) { $count = $this->parameters['COUNT']; if ($count > 1 && $this->checkSplitMerge($count + 1) > 0) { $this->addIndex("", ""); $this->splitMigrate(); } $this->parameters['COUNT']++; $count = $this->parameters['COUNT']; } else { $count = $change_count; } //the next line actually creates directories if needed $hash_path = $this->getHashPath($hash_key, $count, -1, true); if ($this->addIndex($hash_key, $value, $count, $bulk_insert)) { return true; } return false; } /** * */ protected function mergeMigrate() { list(,$migrate_to_path_low, $migrate_to_path_high) = $this->computeMigratePaths(); $new_count = $this->parameters['COUNT'] - 1; $this->insertRecordsFromIndex($migrate_to_path_low, $new_count); $this->insertRecordsFromIndex($migrate_to_path_high, $new_count); $this->unlinkHashPath($migrate_to_path_low); $this->unlinkHashPath($migrate_to_path_high); } /** * */ protected function splitMigrate() { list($migrate_from_path,) = $this->computeMigratePaths(); $this->insertRecordsFromIndex($migrate_from_path); $this->unlinkHashPath($migrate_from_path); } /** * */ protected function unlinkHashPath($hash_path) { $index_file_name = $hash_path . self::HASH_INDEX_EXTENSION; if (file_exists($index_file_name)) { unlink($index_file_name); } } /** * */ protected function computeMigratePaths($count = -1, $max_items_per_file = -1) { $count = ($count == -1) ? $this->parameters['COUNT'] : $count; $max_items_per_file = max(1, ($max_items_per_file == -1) ? $this->parameters['MAX_ITEMS_PER_FILE'] : $max_items_per_file); list($num_files, $max_num_bits, $pow_max, $threshold) = $this->bitStatistics($count, $max_items_per_file); $migrate_from_path = ($threshold >= 0) ? $this->getHashPath(pack("J", $threshold), $count, $max_items_per_file) : false; $migrate_to_path_high = $this->getHashPath(pack("J", $pow_max + $threshold - 1), $count + 1, $max_items_per_file); if ($threshold == 0) { $migrate_to_path_low = $this->getHashPath( pack("J", ($pow_max -1) >>1), $count + 1, $max_items_per_file); } else { $migrate_to_path_low = $this->getHashPath(pack("J", $threshold - 1), $count + 1, $max_items_per_file); } return [$migrate_from_path, $migrate_to_path_low, $migrate_to_path_high]; } /** * */ protected function insertRecordsFromIndex($hash_path, $new_count = -1) { $new_count = ($new_count == -1) ? $this->parameters['COUNT'] + 1 : $new_count; $index_path = $hash_path . self::HASH_INDEX_EXTENSION; $table_tools = $this->table_tools; $index = (!empty($this->index_buffer[$hash_path])) ? $this->index_buffer[$hash_path] : $table_tools->load($index_path); unset($this->index_buffer[$hash_path]); if (!empty($index)) { foreach ($index as $hash_key => $value) { $hash_path = $this->getHashPath($hash_key, $new_count, -1, true); $this->addIndex($hash_key, $value, $new_count, true); } $this->addIndex("", ""); } } /** * */ protected function addArchive($value) { list($fh, $previous_partition_filename, $previous_instance_time) = $this->add_archive_cache; $compressor = $this->compressor; $save_partition = $this->parameters["SAVE_PARTITION"]; $partition_filename = $this->getPartition($save_partition); if (!is_resource($fh) || $partition_filename != $previous_partition_filename || $previous_instance_time != $this->instance_time) { if (!empty($fh) && is_resource($fh)) { fclose($fh); } $fh = fopen($partition_filename , "c+"); $previous_partition_filename = $partition_filename; $previous_instance_time = $this->instance_time; } fseek($fh, 0, SEEK_END); $offset = ftell($fh); $compressed_value = $compressor->compress($value); $len = strlen($compressed_value); fwrite($fh, $compressed_value, $len); $this->add_archive_cache = [$fh, $previous_partition_filename, $previous_instance_time]; return [$offset, $len, $save_partition]; } /** * */ protected function addKeyArchive($key) { list($fh, $previous_key_filename, $previous_instance_time) = $this->add_key_archive_cache; $save_partition = $this->parameters["SAVE_PARTITION"]; $key_filename = $this->getKeyPartition($save_partition); $separator = "\xFF"; if (!is_resource($fh) || $key_filename != $previous_key_filename || $previous_instance_time != $this->instance_time) { if (is_resource($fh)) { fclose($fh); }; if (!file_exists($key_filename)) { $separator = ""; } $fh = fopen($key_filename , "c+"); $previous_key_filename = $key_filename; $previous_instance_time = $this->instance_time; } fseek($fh, 0, SEEK_END); $encode = $separator . encode255($key); $this->add_key_archive_cache = [$fh, $previous_key_filename, $previous_instance_time]; fwrite($fh, $encode, strlen($encode)); } /** * */ protected function addIndex($hash_key, $value, $count = -1, $bulk_mode = false) { list($previous_hash_path, $previous_index, $previous_instance_time) = $this->add_index_cache; $hash_path = $this->getHashPath($hash_key, $count); $table_tools = $this->table_tools; if (!empty($previous_hash_path)) { if ($previous_hash_path == $hash_path && $hash_key !== "" && $previous_instance_time == $this->instance_time) { $index = $previous_index; } else { $table_tools->save($previous_hash_path . self::HASH_INDEX_EXTENSION, $previous_index); $previous_index = []; $previous_hash_path = ""; $previous_instance_time = $this->instance_time; } } if (empty($hash_key)) { // this case used to flush old value $this->add_index_cache = [$previous_hash_path, $previous_index, $previous_instance_time]; return true; } if (empty($index)) { $index_info = $this->getIndexInfo($hash_key, $count); list($hash_path, $index) = $index_info; } if ($index) { $hash_key_info = $table_tools->find($index, $hash_key); } else { $index = []; $hash_key_info = null; } if (!$hash_key_info) { $table_tools->add($index, $hash_key, $value); if ($bulk_mode) { $previous_index = $index; $previous_hash_path = $hash_path; } else { $table_tools->save($hash_path . self::HASH_INDEX_EXTENSION, $index); $previous_index = []; $previous_hash_path = ""; } $this->index_buffer[$hash_path] = $index; $this->add_index_cache = [$previous_hash_path, $previous_index, $previous_instance_time]; return true; } $this->add_index_cache = [$previous_hash_path, $previous_index, $previous_instance_time]; return false; } /** * */ protected function getIndexInfo($hash_key, $count = -1) { if ($hash_key == self::ACTIVE_INDEX) { $index_path = $this->getActiveIndex(); $hash_path = substr($index_path, 0, -4); if (!empty($this->active_index)) { return [$hash_path, $this->active_index]; } } else { $count = ($count == -1) ? $this->parameters['COUNT'] : $count; $hash_path = $this->getHashPath($hash_key, $count); $index_path = $hash_path . self::HASH_INDEX_EXTENSION; if (!empty($this->index_buffer[$hash_path])) { return [$hash_path, $this->index_buffer[$hash_path]]; } } if (!file_exists($index_path)) { return [$hash_path, false]; } $index_data = $this->table_tools->load($index_path); if ($hash_key == self::ACTIVE_INDEX) { $this->active_index = $index_data; return [$hash_path, $this->active_index]; } else { if (count($this->index_buffer) > self::INDEX_CACHE_SIZE) { $this->index_buffer = []; } $this->index_buffer[$hash_path] = $index_data; } return [$hash_path, $index_data]; } /** * */ protected function checkSplitMerge($new_count) { $count = $this->parameters['COUNT']; $max_items_per_file = $this->parameters['MAX_ITEMS_PER_FILE']; $current_expected_num_files = ceil($count/$max_items_per_file); $next_expected_num_files = ceil($new_count/$max_items_per_file); return $next_expected_num_files - $current_expected_num_files; } }