diff --git a/src/library/BPlusTree.php b/src/library/BPlusTree.php index 879890d72..0526c17dc 100644 --- a/src/library/BPlusTree.php +++ b/src/library/BPlusTree.php @@ -91,11 +91,13 @@ class BPlusTree */ const PARAMETERS_FILE = "bpt_parameters.txt"; /** - * + * Field variable used as a cache for the file handle, file name, and + * time of the last archive file for a B+-tree node added to */ public $add_archive_cache = [null, "", -1]; /** - * + * Field variable used as a cache for the file handle, file name, and + * time of the last archive file for a B+-tree node accessed for a value */ public $get_archive_cache = [null, "", -1]; /** @@ -116,11 +118,14 @@ class BPlusTree */ public $folder; /** - * + * A cache for contents of nodes that have been recently added to. + * consists of path_to_node => node_content pairs + * @var array */ public $insert_node_cache = []; /** - * + * Used to keep track if the file_handle to the archive cache has changed + * since the last get operation */ public $instance_time; /** @@ -157,8 +162,10 @@ class BPlusTree * @param string $folder is the folder for storing the B+-Tree files * @param array $format the column names, keys and types for this * B+-Tree object - * @param int $max_keys - * @param string $compressor + * @param int $max_keys the maximum number of keys a node is allowed to hold + * @param object $compressor_type + * seekquarry\yioop\library\compressors\Compressor object used to + * compress index node files and blob items. */ public function __construct($folder, $format = self::DEFAULT_PARAMETERS["FORMAT"], $max_keys = self::MAX_KEYS, @@ -228,9 +235,21 @@ class BPlusTree } } /** - * @param array $row - * @param bool $is_encoded_key - * @param int $mode + * Adds an entry to the BPlusTree. Entries added might not be fully + * on disk until flushLastPutNode is called, as this memory tries to + * avoid disk writes. + * + * @param array $row associative array of filed_name => values for each + * column field field this BPlusTree's signature has + * @param bool $is_encoded_key whether in $row the primary key field has + * already been encoded (rawurlencode) or not + * @param int $mode says what to do in the event that an entry with the + * given key already exists in the BPlusTree. Possibilities are either + * PackedTableTools::APPEND_MODE or PackedTableTools::REPLACE_MODE + * The former appends as a subrecord the nonkey fields of the entry + * to the existing list of sub-records associated to the given key, the + * later replaces the key value pair. + * @return bool success (true) or failure (false) */ public function put($row, $is_encoded_key = true, $mode = PackedTableTools::APPEND_MODE) @@ -273,8 +292,10 @@ class BPlusTree return true; } /** + * Saves the the BPlusTree $insert_node_path (if not empty) node to disk. + * If $insert_node_path == "" then the last node put'ted to will be saved. * - * @param string $insert_node_path + * @param string $insert_node_path name of node to save to disk. */ public function flushLastPutNode($insert_node_path = "") { @@ -289,11 +310,26 @@ class BPlusTree } } /** - * @param array $row - * @param array& $node - * @param string $archive_filename - * @param bool $is_encoded_key - * @param int $mode + * Given an entry to add to a BPlusTree node, the index file of the node + * (for non blob or serial fields), and the name of the archive file for + * node (for blob or serial fields) stores the entry to the node. + * + * @param array $row associative array of filed_name => values for each + * column field field this BPlusTree's signature has + * @param bool $is_encoded_key whether in $row the primary key field has + * already been encoded (rawurlencode) or not + * @param array& $node an in-memory code of a node (will be modified by + * this method) + * @param string $archive_filename the name of the archive file associated + * with this file used for blob or serial fields. + * @param bool $is_encoded_key whether in $row the primary key field has + * already been encoded (rawurlencode) or not + * @param int $mode says what to do in the event that an entry with the + * given key already exists in the BPlusTree. Possibilities are either + * PackedTableTools::APPEND_MODE or PackedTableTools::REPLACE_MODE + * The former appends as a subrecord the nonkey fields of the entry + * to the existing list of sub-records associated to the given key, the + * later replaces the key value pair. */ public function putNode($row, &$node, $archive_filename, $is_encoded_key = true, $mode = PackedTableTools::APPEND_MODE) @@ -306,8 +342,11 @@ class BPlusTree PackedTableTools::ADD_MEM_TABLE, $mode); } /** + * For a non-leaf node checks if it needs to be split, and if so splits + * and fixes keys up the tree to restore BPlus-Tree properties. * - * @param string $folder + * @param string $folder to check if needs split + * @return bool success (true) or failure (false) */ public function updateNodePath($folder) { @@ -362,7 +401,13 @@ class BPlusTree return $this->updateNodePath($parent_folder); } /** + * This method handles split the root node of a BPlusTree when it gets too + * full. Unlike for other nodes when we can add a key in the parent, for the + * root, we make a temporary folder and move all the root node's nodes to it + * then rename this the the least node of the root, and call updateNodePath + * on this to restore the BPlusTree property. * + * @return @return bool success (true) or failure (false) */ public function splitRootNode() { @@ -408,14 +453,14 @@ class BPlusTree return $this->updateNodePath($least_node); } /** - * Splits BPlusTree $node with path $node_path into two nodes each with an - * equal number of keys. + * Splits BPlusTree leaf $node with path $node_path into two nodes each + * with an equal number of keys. * - * @param string $node_path path to file used to store a BPlusTree node - * Used to name files after split. - * @param array BPlusTree node (associative array of pairs key => - * record for key stored according to BPlusTree signature using - * PackedTableTools) + * @param string $node_path path to file used to store a BPlusTree node + * Used to name files after split. + * @param array BPlusTree node (associative array of pairs key => + * record for key stored according to BPlusTree signature using + * PackedTableTools) */ public function splitRecordsInLeaf($node_path, $node) { @@ -500,7 +545,22 @@ class BPlusTree return $rows; } /** + * Returns records associated with $key in $key_node and the file + * associated with $archive_filename (for blob and serial column values) + * if it exists. * + * @param string $key the key to look up + * @param array $key_node the BPlusTree index node in which to look up the + * key + * @param string $archive_filename file name of archive data (blob and + * serial columns) associated with $key_node + * @param bool $is_encoded_key whether the key is rawurlencoded or not + * @param bool $decode whether to $decode the records or leave them packed + * @param bool $look_up_blobs whether to look up blob and serial columns + * @param int $offset index of the first record associated with $key to + * return + * @param int $limit maximum number of records associated with key to return + * @return array of value records associated with $key */ public function getFromNode($key, $key_node, $archive_filename, $is_encoded_key = true, $decode = true, @@ -601,7 +661,11 @@ class BPlusTree return $return_folder; } /** + * Given the path to an index node file in the BPlusTree compute the + * path to the corresponding archive node file. * + * @param string $node_filename of index node path + * @return string associated archive node path */ public function archiveFilenameFromNodeFilename($node_filename) { @@ -616,7 +680,14 @@ class BPlusTree substr($node_name, strlen(self::NODE_PREFIX)); } /** + * Return the blob item from $archive_filename at $offset of length $len, + * uncompress the result * + * @param string $archive_filename path to an archive node file for this + * BPlusTree + * @param int $offset byte offset into archive node file file + * @param int $len length of blob item + * @return string uncompressed blob item from $archive_filename */ public function getArchive($archive_filename, $offset, $len) { @@ -646,7 +717,14 @@ class BPlusTree return $value; } /** - * + * Given a encoded hash_key to be used as the name of in the name of + * an archive file for a BPlusTree node and a folder where that + * node should be stored, return the path name for an archive file for + * a node + * @param string $current_folder to store archive file in + * @param string $encode_key encode key value to be used in the file + * name of a BPlusTree node archive file + * @return string archive file path */ protected function getArchiveName($current_folder, $encode_key) { @@ -655,7 +733,12 @@ class BPlusTree return $archive_filename; } /** + * Write the blob and serial columns from a row to insert into BPlusTree + * and then pack the rest of the row as a string * + * @param string $archive_filename name of archive file for a BPlusTree node + * @param array $row BPlusTree row to pack Blob columns for + * @return string packed row */ protected function writeBlobsAndPack($archive_filename, $row) { @@ -689,7 +772,9 @@ class BPlusTree return $out_value; } /** - * + * Add a value to an archive file $archive_filename of a B+-tree node + * @param string $archive_filename name of archive file + * @param string $value value to add */ protected function addArchive($archive_filename, $value) { diff --git a/src/library/LinearHashTable.php b/src/library/LinearHashTable.php index d3ff440c8..c4d7ff8a2 100644 --- a/src/library/LinearHashTable.php +++ b/src/library/LinearHashTable.php @@ -37,14 +37,16 @@ use seekquarry\yioop\configs as C; */ require_once __DIR__ . "/Utility.php"; /** - * + * This class implements a linear hash table for storing records that use + * PackedTableTools for their format * * @author Chris Pollett */ class LinearHashTable { /** - * + * Name of staging partition used for rows before they are added to the + * main linear hash table */ const ACTIVE_INDEX = "active"; /** @@ -62,11 +64,11 @@ class LinearHashTable "SAVE_PARTITION" => 0, "ACTIVE_COUNT" => 0 ]; /** - * + * File extension to use for hash index files for partitions */ const HASH_INDEX_EXTENSION = ".hix"; /** - * + * Maximum number of indexes to partitions to cache for reads */ const INDEX_CACHE_SIZE = 100; /** @@ -333,7 +335,10 @@ class LinearHashTable return $value; } /** + * Returns the path to the archive file for the partition $i + * (file used to store blob and serial columns) * + * @return string path to partition $i archive file */ public function getPartition($i) { @@ -341,7 +346,10 @@ class LinearHashTable $i . $this->compressor->fileExtension(); } /** + * Returns the path to the key index file for the partition $i + * (file used to store blob and serial columns) * + * @return string path to partition $i key index file */ public function getKeyPartition($i) { @@ -349,7 +357,10 @@ class LinearHashTable $i . self::HASH_INDEX_EXTENSION; } /** + * Returns the path to the index file for the active partition (the + * key value pairs not yet stored in the main LinearHashTable) * + * @return string path to active partition */ public function getActiveIndex() { @@ -632,7 +643,8 @@ class LinearHashTable return $old_path; } /** - * + * @param int + * @param int $max_items_per_file */ public function bitStatistics($count, $max_items_per_file) { @@ -665,7 +677,7 @@ class LinearHashTable $this->saveParameters(); } /** - * + * Save the operating parameters of this LinearHashTable */ public function saveParameters() { @@ -674,7 +686,9 @@ class LinearHashTable LOCK_EX); } /** - * + * Returns the parameters (such as its signature, max number of + * documents per partition and counts) used to configure the + * LinearHashTable stored at $folder */ public static function getParameterInfo($folder) { @@ -686,7 +700,11 @@ class LinearHashTable } } /** - * + * @param string $key + * @param string $value + * @param bool $is_hash_key + * @param int $change_count + * @param bool $bulk_insert */ protected function putIndex($key, $value, $is_hash_key = false, $change_count = 0, $bulk_insert = false) @@ -736,7 +754,7 @@ class LinearHashTable $this->unlinkHashPath($migrate_from_path); } /** - * + * @param string $hash_path */ protected function unlinkHashPath($hash_path) { @@ -746,7 +764,8 @@ class LinearHashTable } } /** - * + * @param int $count + * @param int $max_items_per_file */ protected function computeMigratePaths($count = -1, $max_items_per_file = -1) @@ -772,7 +791,8 @@ class LinearHashTable $migrate_to_path_high]; } /** - * + * @param string $hash_path + * @param int $new_count */ protected function insertRecordsFromIndex($hash_path, $new_count = -1) { @@ -793,7 +813,7 @@ class LinearHashTable } } /** - * + * @param string $value */ protected function addArchive($value) { @@ -822,7 +842,7 @@ class LinearHashTable return [$offset, $len, $save_partition]; } /** - * + * @param string $key */ protected function addKeyArchive($key) { @@ -850,7 +870,11 @@ class LinearHashTable fwrite($fh, $encode, strlen($encode)); } /** - * + * @param string $hash_key + * @param string $value + * @param int $count + * @param bool $mode + * @return bool */ protected function addIndex($hash_key, $value, $count = -1, $bulk_mode = false) @@ -907,7 +931,15 @@ class LinearHashTable return false; } /** + * Computes the path to the hash partition that would contain $hash_key + * as well as the contents of that partition * + * @param string $hash_key key to find partition for + * @param int $count number of items assumed to be stored in table, if -1 + * uses $this->parameters['COUNT'] + * @return array [hash_partition_path, hash_partition_contents] if + * $count is not -1, then the partition might not exists in which case + * the second component might be -1 */ protected function getIndexInfo($hash_key, $count = -1) { @@ -941,7 +973,12 @@ class LinearHashTable return [$hash_path, $index_data]; } /** - * + * Checks if $new_count many stored items would entail splitting + * one of the hash partitions of this linear hash table (> 0), + * require no change in the number of hash partitions (0), or + * require two partitions to be merges (< 0) + * @param int $new_count new item count + * @return int change (<0 merge, >0split) or no change required (0). */ protected function checkSplitMerge($new_count) {