Adds rest of documentation for BPlusTree, add more documentation for LinearHashTable, a=chris

Chris Pollett [2021-09-20 00:Sep:th]
Adds rest of documentation for BPlusTree, add more documentation for LinearHashTable, a=chris
Filename
src/library/BPlusTree.php
src/library/LinearHashTable.php
diff --git a/src/library/BPlusTree.php b/src/library/BPlusTree.php
index 879890d72..0526c17dc 100644
--- a/src/library/BPlusTree.php
+++ b/src/library/BPlusTree.php
@@ -91,11 +91,13 @@ class BPlusTree
      */
     const PARAMETERS_FILE = "bpt_parameters.txt";
     /**
-     *
+     * Field variable used as a cache for the file handle, file name, and
+     * time of the last archive file for a B+-tree node added to
      */
     public $add_archive_cache = [null, "", -1];
     /**
-     *
+     * Field variable used as a cache for the file handle, file name, and
+     * time of the last archive file for a B+-tree node accessed for a value
      */
     public $get_archive_cache = [null, "", -1];
     /**
@@ -116,11 +118,14 @@ class BPlusTree
      */
     public $folder;
     /**
-     *
+     * A cache for contents of nodes that have been recently added to.
+     * consists of path_to_node => node_content pairs
+     * @var array
      */
     public $insert_node_cache = [];
     /**
-     *
+     * Used to keep track if the file_handle to the archive cache has changed
+     * since the last get operation
      */
     public $instance_time;
     /**
@@ -157,8 +162,10 @@ class BPlusTree
      * @param string $folder is the folder for storing the B+-Tree files
      * @param array $format the column names, keys and types for this
      *  B+-Tree  object
-     * @param int $max_keys
-     * @param string $compressor
+     * @param int $max_keys the maximum number of keys a node is allowed to hold
+     * @param object $compressor_type
+     *  seekquarry\yioop\library\compressors\Compressor object used to
+     *  compress index node files and blob items.
      */
     public function __construct($folder, $format =
         self::DEFAULT_PARAMETERS["FORMAT"], $max_keys = self::MAX_KEYS,
@@ -228,9 +235,21 @@ class BPlusTree
         }
     }
     /**
-     * @param array $row
-     * @param bool $is_encoded_key
-     * @param int $mode
+     * Adds an entry to the BPlusTree. Entries added might not be fully
+     * on disk until flushLastPutNode is called, as this memory tries to
+     * avoid disk writes.
+     *
+     * @param array $row associative array of filed_name => values for each
+     *  column field field this BPlusTree's signature has
+     * @param bool $is_encoded_key whether in $row the primary key field has
+     *  already been encoded (rawurlencode) or not
+     * @param int $mode says what to do in the event that an entry with the
+     *  given key already exists in the BPlusTree. Possibilities are either
+     *  PackedTableTools::APPEND_MODE or PackedTableTools::REPLACE_MODE
+     *  The former appends as a subrecord the nonkey fields of the entry
+     *  to the existing list of sub-records associated to the given key, the
+     *  later replaces the key value pair.
+     * @return bool success (true) or failure (false)
      */
     public function put($row, $is_encoded_key = true,
         $mode = PackedTableTools::APPEND_MODE)
@@ -273,8 +292,10 @@ class BPlusTree
         return true;
     }
     /**
+     * Saves the the BPlusTree $insert_node_path (if not empty) node to disk.
+     * If $insert_node_path == "" then the last node put'ted to will be saved.
      *
-     * @param string $insert_node_path
+     * @param string $insert_node_path name of node to save to disk.
      */
     public function flushLastPutNode($insert_node_path = "")
     {
@@ -289,11 +310,26 @@ class BPlusTree
         }
     }
     /**
-     * @param array $row
-     * @param array& $node
-     * @param string $archive_filename
-     * @param bool $is_encoded_key
-     * @param int $mode
+     * Given an entry to add to a BPlusTree node, the index file of the node
+     * (for non blob or serial fields),  and the name of the archive file for
+     * node (for blob or serial fields) stores the entry to the node.
+     *
+     * @param array $row associative array of filed_name => values for each
+     *  column field field this BPlusTree's signature has
+     * @param bool $is_encoded_key whether in $row the primary key field has
+     *  already been encoded (rawurlencode) or not
+     * @param array& $node an in-memory code of a node (will be modified by
+     *  this method)
+     * @param string $archive_filename the name of the archive file associated
+     *  with this file used for blob or serial fields.
+     * @param bool $is_encoded_key whether in $row the primary key field has
+     *  already been encoded (rawurlencode) or not
+     * @param int $mode says what to do in the event that an entry with the
+     *  given key already exists in the BPlusTree. Possibilities are either
+     *  PackedTableTools::APPEND_MODE or PackedTableTools::REPLACE_MODE
+     *  The former appends as a subrecord the nonkey fields of the entry
+     *  to the existing list of sub-records associated to the given key, the
+     *  later replaces the key value pair.
      */
     public function putNode($row, &$node, $archive_filename,
         $is_encoded_key = true, $mode = PackedTableTools::APPEND_MODE)
@@ -306,8 +342,11 @@ class BPlusTree
             PackedTableTools::ADD_MEM_TABLE, $mode);
     }
     /**
+     * For a non-leaf node checks if it needs to be split, and if so splits
+     * and fixes keys up the tree to restore BPlus-Tree properties.
      *
-     * @param string $folder
+     * @param string $folder to check if needs split
+     * @return bool success (true) or failure (false)
      */
     public function updateNodePath($folder)
     {
@@ -362,7 +401,13 @@ class BPlusTree
         return $this->updateNodePath($parent_folder);
     }
     /**
+     * This method handles split the root node of a BPlusTree when it gets too
+     * full. Unlike for other nodes when we can add a key in the parent, for the
+     * root, we make a temporary folder and move all the root node's nodes to it
+     * then rename this the the least node of the root, and call updateNodePath
+     * on this to restore the BPlusTree property.
      *
+     * @return @return bool success (true) or failure (false)
      */
     public function splitRootNode()
     {
@@ -408,14 +453,14 @@ class BPlusTree
         return $this->updateNodePath($least_node);
     }
     /**
-     *  Splits BPlusTree $node with path $node_path into two nodes each with an
-     *  equal number of keys.
+     * Splits BPlusTree leaf $node with path $node_path into two nodes each
+     * with an equal number of keys.
      *
-     *  @param string $node_path path to file used to store a BPlusTree node
-     *      Used to name files after split.
-     *  @param array BPlusTree node (associative array of pairs key =>
-     *      record for key stored according to BPlusTree signature using
-     *      PackedTableTools)
+     * @param string $node_path path to file used to store a BPlusTree node
+     *  Used to name files after split.
+     * @param array BPlusTree node (associative array of pairs key =>
+     *  record for key stored according to BPlusTree signature using
+     *  PackedTableTools)
      */
     public function splitRecordsInLeaf($node_path, $node)
     {
@@ -500,7 +545,22 @@ class BPlusTree
         return $rows;
     }
     /**
+     * Returns records associated with $key in $key_node and the file
+     * associated with $archive_filename (for blob and serial column values)
+     * if it exists.
      *
+     * @param string $key the key to look up
+     * @param array $key_node the BPlusTree index node in which to look up the
+     *  key
+     * @param string $archive_filename file name of archive data (blob and
+     *  serial columns) associated with $key_node
+     * @param bool $is_encoded_key whether the key is rawurlencoded or not
+     * @param bool $decode  whether to $decode the records or leave them packed
+     * @param bool $look_up_blobs whether to look up blob and serial columns
+     * @param int $offset index of the first record associated with $key to
+     *  return
+     * @param int $limit maximum number of records associated with key to return
+     * @return array of value records associated with $key
      */
     public function getFromNode($key, $key_node, $archive_filename,
         $is_encoded_key = true, $decode = true,
@@ -601,7 +661,11 @@ class BPlusTree
         return $return_folder;
     }
     /**
+     * Given the path to an index node file in the BPlusTree compute the
+     * path to the corresponding archive node file.
      *
+     * @param string $node_filename of index node path
+     * @return string associated archive node path
      */
     public function archiveFilenameFromNodeFilename($node_filename)
     {
@@ -616,7 +680,14 @@ class BPlusTree
             substr($node_name, strlen(self::NODE_PREFIX));
     }
     /**
+     * Return the blob item from $archive_filename at $offset of length $len,
+     * uncompress the result
      *
+     * @param string $archive_filename path to an archive node file for this
+     *  BPlusTree
+     * @param int $offset byte offset into archive node file file
+     * @param int $len length of blob item
+     * @return string uncompressed blob item from $archive_filename
      */
     public function getArchive($archive_filename, $offset, $len)
     {
@@ -646,7 +717,14 @@ class BPlusTree
         return $value;
     }
     /**
-     *
+     * Given a encoded hash_key to be used as the name of in the name of
+     * an archive file for a BPlusTree node and a folder where that
+     * node should be stored, return the path name for an archive file for
+     * a node
+     * @param string $current_folder to store archive file in
+     * @param string $encode_key encode key value to be used in the file
+     *  name of a BPlusTree node archive file
+     * @return string archive file path
      */
     protected function getArchiveName($current_folder, $encode_key)
     {
@@ -655,7 +733,12 @@ class BPlusTree
         return $archive_filename;
     }
     /**
+     * Write the blob and serial columns from a row to insert into BPlusTree
+     * and then pack the rest of the row as a string
      *
+     * @param string $archive_filename name of archive file for a BPlusTree node
+     * @param array $row BPlusTree row to pack Blob columns for
+     * @return string packed row
      */
     protected function writeBlobsAndPack($archive_filename, $row)
     {
@@ -689,7 +772,9 @@ class BPlusTree
         return $out_value;
     }
     /**
-     *
+     * Add a value to an archive file $archive_filename of a B+-tree node
+     * @param string $archive_filename name of archive file
+     * @param string $value value to add
      */
     protected function addArchive($archive_filename, $value)
     {
diff --git a/src/library/LinearHashTable.php b/src/library/LinearHashTable.php
index d3ff440c8..c4d7ff8a2 100644
--- a/src/library/LinearHashTable.php
+++ b/src/library/LinearHashTable.php
@@ -37,14 +37,16 @@ use seekquarry\yioop\configs as C;
  */
 require_once __DIR__ . "/Utility.php";
 /**
- *
+ * This class implements a linear hash table for storing records that use
+ * PackedTableTools for their format
  *
  * @author Chris Pollett
  */
 class LinearHashTable
 {
     /**
-     *
+     * Name of staging partition used for rows before they are added to the
+     * main linear hash table
      */
     const ACTIVE_INDEX = "active";
     /**
@@ -62,11 +64,11 @@ class LinearHashTable
         "SAVE_PARTITION" => 0, "ACTIVE_COUNT" => 0
     ];
     /**
-     *
+     * File extension to use for hash index files for partitions
      */
     const HASH_INDEX_EXTENSION = ".hix";
     /**
-     *
+     * Maximum number of indexes to partitions to cache for reads
      */
     const INDEX_CACHE_SIZE = 100;
     /**
@@ -333,7 +335,10 @@ class LinearHashTable
         return $value;
     }
     /**
+     * Returns the path to the archive file for the partition $i
+     * (file used to store blob and serial columns)
      *
+     * @return string path to partition $i archive file
      */
     public function getPartition($i)
     {
@@ -341,7 +346,10 @@ class LinearHashTable
             $i . $this->compressor->fileExtension();
     }
     /**
+     * Returns the path to the key index file for the partition $i
+     * (file used to store blob and serial columns)
      *
+     * @return string path to partition $i key index file
      */
     public function getKeyPartition($i)
     {
@@ -349,7 +357,10 @@ class LinearHashTable
             $i . self::HASH_INDEX_EXTENSION;
     }
     /**
+     * Returns the path to the index file for the active partition (the
+     * key value pairs not yet stored in the main LinearHashTable)
      *
+     * @return string path to active partition
      */
     public function getActiveIndex()
     {
@@ -632,7 +643,8 @@ class LinearHashTable
         return $old_path;
     }
     /**
-     *
+     * @param int
+     * @param int $max_items_per_file
      */
     public function bitStatistics($count, $max_items_per_file)
     {
@@ -665,7 +677,7 @@ class LinearHashTable
         $this->saveParameters();
     }
     /**
-     *
+     * Save the operating parameters of this LinearHashTable
      */
     public function saveParameters()
     {
@@ -674,7 +686,9 @@ class LinearHashTable
             LOCK_EX);
     }
     /**
-     *
+     * Returns the parameters (such as its signature, max number of
+     * documents per partition and counts) used to configure the
+     * LinearHashTable stored at $folder
      */
     public static function getParameterInfo($folder)
     {
@@ -686,7 +700,11 @@ class LinearHashTable
         }
     }
     /**
-     *
+     * @param string $key
+     * @param string $value
+     * @param bool $is_hash_key
+     * @param int $change_count
+     * @param bool $bulk_insert
      */
     protected function putIndex($key, $value, $is_hash_key = false,
         $change_count = 0, $bulk_insert = false)
@@ -736,7 +754,7 @@ class LinearHashTable
         $this->unlinkHashPath($migrate_from_path);
     }
     /**
-     *
+     * @param string $hash_path
      */
     protected function unlinkHashPath($hash_path)
     {
@@ -746,7 +764,8 @@ class LinearHashTable
         }
     }
     /**
-     *
+     * @param int $count
+     * @param int $max_items_per_file
      */
     protected function computeMigratePaths($count = -1,
         $max_items_per_file = -1)
@@ -772,7 +791,8 @@ class LinearHashTable
             $migrate_to_path_high];
     }
     /**
-     *
+     * @param string $hash_path
+     * @param int $new_count
      */
     protected function insertRecordsFromIndex($hash_path, $new_count = -1)
     {
@@ -793,7 +813,7 @@ class LinearHashTable
         }
     }
     /**
-     *
+     * @param string $value
      */
     protected function addArchive($value)
     {
@@ -822,7 +842,7 @@ class LinearHashTable
         return [$offset, $len, $save_partition];
     }
     /**
-     *
+     * @param string $key
      */
     protected function addKeyArchive($key)
     {
@@ -850,7 +870,11 @@ class LinearHashTable
         fwrite($fh, $encode, strlen($encode));
     }
     /**
-     *
+     * @param string $hash_key
+     * @param string $value
+     * @param int $count
+     * @param bool $mode
+     * @return bool
      */
     protected function addIndex($hash_key, $value, $count = -1,
         $bulk_mode = false)
@@ -907,7 +931,15 @@ class LinearHashTable
         return false;
     }
     /**
+     * Computes the path to the hash partition that would contain $hash_key
+     * as well as the contents of that partition
      *
+     * @param string $hash_key key to find partition for
+     * @param int $count number of items assumed to be stored in table, if -1
+     *  uses $this->parameters['COUNT']
+     * @return array [hash_partition_path, hash_partition_contents] if
+     *  $count is not -1, then the partition might not exists in which case
+     *  the second component might be -1
      */
     protected function getIndexInfo($hash_key, $count = -1)
     {
@@ -941,7 +973,12 @@ class LinearHashTable
         return [$hash_path, $index_data];
     }
     /**
-     *
+     * Checks if $new_count many stored items would entail splitting
+     * one of the hash partitions of this linear hash table (> 0),
+     * require no change in the number of hash partitions (0), or
+     * require two partitions to be merges (< 0)
+     * @param int $new_count new item count
+     * @return int change (<0 merge, >0split) or no change required (0).
      */
     protected function checkSplitMerge($new_count)
     {
ViewGit