Adding documentation, =achris

Chris Pollett [2021-09-17 21:Sep:th]
Adding documentation, =achris
Filename
src/executables/QueueServer.php
src/library/IndexDocumentBundle.php
src/library/LinearHashTable.php
src/library/PartitionDocumentBundle.php
src/library/index_bundle_iterators/WordIterator.php
tests/BPlusTreeTest.php
tests/IndexDocumentBundleTest.php
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 472555beb..25dc5beae 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -605,8 +605,8 @@ class QueueServer implements CrawlConstants
         $process_lines = L\lineFilter($lines, $filters, false);
         $num_lines = count($process_lines);
         L\crawlLog("...Filtered " . $this->process_name . ".log lines ".
-            "looking for $initial process. Found $num_lines associated with ".
-            "process.");
+            "looking for $initial process. Found $num_lines lines associated ".
+            "with process.");
         // err on the side of caution in assuming process dead
         $last_process_timestamp = (!empty($lines[0])) ?
             L\logLineTimestamp($process_lines[$num_lines - 1]) : $time;
@@ -807,6 +807,7 @@ class QueueServer implements CrawlConstants
                 if ($this->isOnlyIndexer()) {
                     return;
                 }
+                //so isScheduler true here
                 $this->processRobotUrls();
                 if (C\USE_ETAG_EXPIRES) {
                     $this->processEtagExpires();
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index d5f7ea2e6..92105d3ae 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -736,8 +736,9 @@ class IndexDocumentBundle implements CrawlConstants
         }
     }
     /**
-     *
+     * Checks if a doc_id correspons to a document or a link
      * @param string $key
+     * @return bool true if a document
      */
     public function isDoc($key)
     {
diff --git a/src/library/LinearHashTable.php b/src/library/LinearHashTable.php
index 7e793f703..80d309629 100644
--- a/src/library/LinearHashTable.php
+++ b/src/library/LinearHashTable.php
@@ -342,7 +342,13 @@ class LinearHashTable
             self::HASH_INDEX_EXTENSION;
     }
     /**
-     *
+     * Checks if a key exists in the linear hash table
+     * @param string $key key to check
+     * @param bool $compute_hash whether the key has already had the linear
+     *  hash table's hash function applied or not
+     * @param bool $check_active whether to check the active index of
+     *  buffered key values that have not yet been put into the main table
+     * @return bool whether the $key exists in the linear hash table
      */
     public function exists($key, $compute_hash = true, $check_active = true)
     {
@@ -373,6 +379,9 @@ class LinearHashTable
     }
     /**
      *
+     * @param array $row_or_rows
+     * @param bool $is_hash_key
+     * @param bool $allow_duplicates
      */
     public function put($row_or_rows, $is_hash_key = false,
         $allow_duplicates = true)
diff --git a/src/library/PartitionDocumentBundle.php b/src/library/PartitionDocumentBundle.php
index 8c7c6bf96..0a3b1200e 100644
--- a/src/library/PartitionDocumentBundle.php
+++ b/src/library/PartitionDocumentBundle.php
@@ -100,55 +100,87 @@ class PartitionDocumentBundle
      */
     const PARTITION_SIZE_THRESHOLD = 2147483648;
     /**
-     *
+     * Used to store the file handle to, the partition number, and last add time
+     * for the last time an item's blob/serial columns were added to for
+     * the PartitionDocumentBundle
+     * @var array
      */
     public $add_archive_cache = [null, "", -1];
     /**
-     *
+     * Used to store the file handle to, the partition number, and last access
+     * time for the last time an item's blob/serial columns were accessed for
+     * the PartitionDocumentBundle
+     * @var array
      */
     public $get_archive_cache = [null, "", -1];
     /**
-     *
+     * Array of column names for the columns in a PartitionDocumentBundle which
+     * are of type BLOB or SERIAL
+     * @var array
      */
     public $blob_columns;
     /**
-     *
+     * The seekquarry\yioop\library\compressors\Compressor object used to
+     * compress record files and blob items.
+     * @var object
      */
     public $compressor;
     /**
-     *
+     * Folder path where the PartitionDocumentBundle is stored
+     * @var string
      */
     public $folder;
     /**
-     *
+     * In memory cache of partitions from the PartitionDocumentBundle
+     * @var array
      */
     public $index_cache;
     /**
-     *
+     * Maximum number of items the partition cache is allowed to hold
      */
     public $index_cache_size;
     /**
-     *
+     * hi-res time the file handle for add or getting filehandle blob was
+     * created. Used to determine if the file_handle needs to be closed, because
+     * active partition changed
+     * @var int
      */
     public $instance_time;
     /**
-     *
+     * Name of primary key column for records
+     * @var string
      */
     public $key_field;
     /**
-     *
+     * Stores the constructor paramters used to create this
+     * PartitionDocumentBundle
+     * @var array
      */
     public $parameters;
     /**
-     *
+     * Array of column names for the columns in a PartitionDocumentBundle which
+     * are of type SERIAL
+     * @var array
      */
     public $serial_columns;
     /**
-     *
+     * The PackedTableTools object used to pack and unpack records in
+     * partitions
+     * @var object
      */
     public $table_tools;
     /**
+     * Used to create a new instance of a PartitionDocumentBundle
      *
+     * @param string $folder the path to the folder to store this
+     *  PartitionDocumentBundle
+     * @param array $format the column names, keys and types for this
+     *  PartitionDocumentBundle object
+     * @param int $partition_size_threshold maximum number of items to store
+     *  in a parition before making the next partition
+     * @param object $compressor_type
+     *  seekquarry\yioop\library\compressors\Compressor object used to
+     *  compress record files and blob items.
      */
     public function __construct($folder, $format =
         self::DEFAULT_PARAMETERS["FORMAT"],
@@ -351,7 +383,7 @@ class PartitionDocumentBundle
     }
     /**
      * Returns the unserialized index file for the $partition parition of
-     * this PartitionIndexBundle. If $force_load is set to true then reloads
+     * this PartitionDocumentBundle. If $force_load is set to true then reloads
      * from disk rather than use a cached value if present.
      *
      * @param int $partition which partition index to read
@@ -489,8 +521,12 @@ class PartitionDocumentBundle
         return true;
     }
     /**
+     * Saves the current save partition, adds one to the save partition number,
+     * and starts a new save partition.
      *
-     * @param int $new_save_partition
+     * @param int $new_save_partition partition and add one to. If use default,
+     *  then this method will use the parameters "SAVE_PARTITION"
+     *  value.
      */
     public function advanceSavePartition($new_save_partition = 0)
     {
@@ -536,7 +572,7 @@ class PartitionDocumentBundle
         $this->saveParameters();
     }
     /**
-     *
+     * Save the operataing parameters of this PartitionDocumentBundle
      */
     public function saveParameters()
     {
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index 91a72366b..7ae376745 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -487,7 +487,7 @@ class WordIterator extends IndexBundleIterator
         if (empty($descriptions_scores)) {
             return count($positions);
         }
-        $first_score = $descriptions_scores[0]['SCORE'];
+        $first_score = $descriptions_scores[0]['SCORE'] ?? 1;
         $description_pos = $descriptions_scores[$description_index]['POS'];
         $num_scores = count($descriptions_scores);
         $raw_freq_squared = 0;
diff --git a/tests/BPlusTreeTest.php b/tests/BPlusTreeTest.php
index a837a715f..ae210cc1b 100644
--- a/tests/BPlusTreeTest.php
+++ b/tests/BPlusTreeTest.php
@@ -86,9 +86,10 @@ use seekquarry\yioop\library\UnitTest;
         return new L\BPlusTree($table_dir, $format, $max_degree);
     }
     /**
-     *
+     * Test putting items in bplustrees of odd sized nodes between 3 adn 13 and
+     * then seeing if the items can be retrieved
      */
-    public function getPutTestCase()
+    public function putGetTestCase()
     {
         for ($i = 3; $i <= 13; $i += 2) {
             $bptree = $this->createTree($i);
diff --git a/tests/IndexDocumentBundleTest.php b/tests/IndexDocumentBundleTest.php
index 0e74125f2..2e67f3f0c 100644
--- a/tests/IndexDocumentBundleTest.php
+++ b/tests/IndexDocumentBundleTest.php
@@ -41,7 +41,11 @@ use seekquarry\yioop\library\UrlParser;
 use seekquarry\yioop\library\UnitTest;

 /**
- *
+ * Used to test that the IndexDocumentBundle class can properly add and
+ * retrieve documents. Check its prepareMethod correctly deduplicates
+ * documents before inverted index creation. Tests inverted index creation
+ * and adding terms to IndexDocumentBundle's BPlusTree. Check look up of
+ * documents according to term.
  */
  class IndexDocumentBundleTest extends UnitTest
 {
@@ -50,7 +54,8 @@ use seekquarry\yioop\library\UnitTest;
      */
     const TEST_DIR = __DIR__ . '/test_files/index_document_test';
     /**
-     *
+     * Holds the IndexDocumentBundle used for test purposes
+     * @var IndexDocumentBundle
      */
     public $index_archive;
     /**
@@ -74,7 +79,8 @@ use seekquarry\yioop\library\UnitTest;
         $model->db->unlinkRecursive(self::TEST_DIR);
     }
     /**
-     *
+     * Checks if the constructor of the IndexDocumentBundle correctly save
+     * the constructor info such as the bundle description
      */
     public function saveDescriptionTestCase()
     {
@@ -109,7 +115,10 @@ use seekquarry\yioop\library\UnitTest;
         }
     }
     /**
-     *
+     * Tests the prepareIndexMap method which is used to deduplicate pages
+     * before an inverted index of a partition is made. Tests adding pages
+     * pages with the same doc_id to make sure will get grouped together
+     * Grouping also affect how documents are scored so tests this as well.
      */
     public function prepareIndexTestCase()
     {
@@ -177,7 +186,10 @@ use seekquarry\yioop\library\UnitTest;
         }
     }
     /**
-     *
+     * Tests the process of added documents to the IndexDocumentBundle, then
+     * building an inverted index from this.  To check after the above is
+     * done perform lookup's of terms known to have posting list
+     * and then checking the properties of the returned posting lists.
      */
     public function buildInvertedIndexPartitionTestCase()
     {
@@ -248,7 +260,11 @@ use seekquarry\yioop\library\UnitTest;
             "Test Position List Decode");
     }
     /**
-     *
+     * Tests the complete process of going for documents, dedup,
+     * building an inverted index and adding the result to the
+     * IndexDocumentBundle's inverted index.  To this after the above is
+     * done perform lookup's of terms known to be in the indexed documents
+     * and check the properties of the returned posting lists.
      */
     public function addPartitionPostingsDictionaryTestCase()
     {
ViewGit