Modify checkProcessRunning, a=chris

Chris Pollett [2021-09-15 15:Sep:th]
Modify checkProcessRunning, a=chris
Filename
src/executables/QueueServer.php
src/library/BPlusTree.php
src/library/IndexDocumentBundle.php
src/library/PartitionDocumentBundle.php
src/library/Utility.php
src/views/helpers/OptionsHelper.php
tests/IndexDocumentBundleTest.php
tests/LinearHashTableTest.php
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 36a6fb081..472555beb 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -601,11 +601,15 @@ class QueueServer implements CrawlConstants
             return;
         }
         $filters = ($process == self::INDEXER) ? ["Indexer"] : ["Scheduler"];
-        $process_lines = L\lineFilter($lines, $filters);
-        L\crawlLog("...Filtered " . $this->process_name . ".log lines");
+        $initial = ($process == self::INDEXER) ? "I" : "S";
+        $process_lines = L\lineFilter($lines, $filters, false);
         $num_lines = count($process_lines);
-        $last_process_timestamp = $time;
+        L\crawlLog("...Filtered " . $this->process_name . ".log lines ".
+            "looking for $initial process. Found $num_lines associated with ".
+            "process.");
         // err on the side of caution in assuming process dead
+        $last_process_timestamp = (!empty($lines[0])) ?
+            L\logLineTimestamp($process_lines[$num_lines - 1]) : $time;
         if (isset($process_lines[$num_lines - 1])) {
             $timestamp =
                 L\logLineTimestamp($process_lines[$num_lines - 1]);
@@ -1984,7 +1988,7 @@ class QueueServer implements CrawlConstants
         $etag_expires_data =
             unserialize(gzuncompress(L\webdecode(file_get_contents($file))));
         L\crawlLog("Scheduler Done uncompressing etag data.".
-            " Starting to add to btree");
+            " Starting to add to linear hash table");
         $num_entries = count($etag_expires_data);
         $i = 0;
         foreach ($etag_expires_data as $data) {
diff --git a/src/library/BPlusTree.php b/src/library/BPlusTree.php
index 4fa6ec0c8..616d96eb3 100644
--- a/src/library/BPlusTree.php
+++ b/src/library/BPlusTree.php
@@ -59,15 +59,21 @@ class BPlusTree
         "MAX_KEYS" => self::MAX_KEYS
     ];
     /**
-     *
+     * Internal nodes of BPlusTree are folders, subfolders/subfiles are
+     * names according to their least key except for the first subdfolder/
+     * subfile of the node which is given the name of the LEAST_NODE_NAME
+     * constant
      */
     const LEAST_NODE_NAME = "start";
     /**
-     *
+     * Internal nodes of BPlusTree are folders. For nodes of the same
+     * height in the tree NEXT_NODE_NAME is used as the name of the file
+     * with the serialized name of the next folder of the same height in
+     * the tree.
      */
     const NEXT_NODE_NAME = "next";
     /**
-     *
+     * Name of temporary file used when splitting a BPlusTree node.
      */
     const TEMP_NODE_NAME = "tmp_node";
     /**
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 25af4ef5b..d5f7ea2e6 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -41,44 +41,62 @@ require_once __DIR__ . '/Utility.php';
  * from these documents which allow one to search for documents containing a
  * particular word.
  *
- *
- *
  * @author Chris Pollett
  */
 class IndexDocumentBundle implements CrawlConstants
 {
     /**
-     *
+     * File name used to store withing the folder of the IndexDocumentBundle
+     * parameter/configuration information about the bundle
      */
     const ARCHIVE_INFO_FILE = "archive_info.txt";
     /**
-     *
+     * The version of this IndexDocumentBundle. The lowest format number is
+     * 3.0 as prior inverted index/document stores used IndexArchiveBundle's
      */
     const DEFAULT_VERSION = "3.0";
     /**
-     *
+     * Default values for the configuration parameters of an
+     * IndexDocumentBundle
      */
     const DEFAULT_PARAMETERS = ["DESCRIPTION" => "",
         "VERSION" => self::DEFAULT_VERSION
     ];
     /**
-     *
+     * Subfolder of IndexDocumentBundle to store the btree with
+     * term => posting list information (i.e., the inverted index)
      */
     const DICTIONARY_FOLDER = "dictionary";
     /**
-     *
+     * DocIds are made of three parts: hash of url, hash of document, hash
+     * of url hostname. Each of these hashes is  DOCID_PART_LEN long
      */
     const DOCID_PART_LEN = 8;
     /**
-     *
+     * Length of DocIds used by this IndexDocumentBundle
      */
     const DOCID_LEN = 24;
     /**
-     *
+     * Partition i in an IndexDocumentBundle has a subfolder i
+     * within self::POSITIONS_DOC_MAP_FOLDER. Within this subfolder i,
+     * self::DOC_MAP_FILENAME is the name of the file used to store the
+     * document map for the partition. The document map consists of a sequence
+     * of records associated with each doc_id of a document stored in the
+     * partition. The first record is ["POS" => $num_words,
+     * "SCORE" => floatval($global_score_for_document)]. The second record is:
+     * ["POS" => $length_of_title_of_document, "SCORE" =>
+     *          floatval($num_description_scores)]]
+     * Here a description score is a score for the importance for a section
+     * of a document. Subsequence records, list [POS => the length of the jth
+     * section of the document, SCORE => its score].
      */
     const DOC_MAP_FILENAME = "doc_map";
     /**
-     *
+     * Folder used to store the partition data of this IndexDocumentBundle
+     * These will consits of .txt.gz files for each partition which are used
+     * to store summaries of documents and actual documents (web pages) and
+     * .ix files which are used to store doc_id and the associated offets to
+     * their summary and actual document within the .txt.gz file
      */
     const DOCUMENTS_FOLDER = "documents";
     /**
@@ -319,8 +337,8 @@ class IndexDocumentBundle implements CrawlConstants
         $num_postings = count($postings);
         $i = 0;
         foreach ($postings as $term => $entry) {
-            if(crawlTimeoutLog("..Still processing partition $partition. Have ".
-                "completed $i postings of $num_postings.") &&
+            if(crawlTimeoutLog("..Indexer Still processing partition ".
+                "$partition. Have completed $i postings of $num_postings.") &&
                 $taking_too_long_touch) {
                 if (file_exists($taking_too_long_touch)) {
                     touch($taking_too_long_touch, time());
diff --git a/src/library/PartitionDocumentBundle.php b/src/library/PartitionDocumentBundle.php
index 2271d3397..8c7c6bf96 100644
--- a/src/library/PartitionDocumentBundle.php
+++ b/src/library/PartitionDocumentBundle.php
@@ -290,7 +290,16 @@ class PartitionDocumentBundle
         return $out_data;
     }
     /**
+     * Retrieve a BLOB string in the file $archive_filename at byte position
+     * $offset of length $len. It uncompresses this string using
+     * $compressor->uncompress and return the result.
      *
+     * @param string $archive_filename the filename of a partition archive
+     *  file to get a blob object from
+     * @param int $offset a byte position in that file
+     * @param int $len number of bytes from $offset to read.
+     * @return string the result of uncompressing the string at $offset of
+     *  length $len
      */
     public function getArchive($archive_filename, $offset, $len)
     {
@@ -316,11 +325,11 @@ class PartitionDocumentBundle
         return $value;
     }
     /**
-     * Returns the path of the archive file (used to store BLOB and SERIAL
+     * Returns the path to the archive file (used to store BLOB and SERIAL
      * columns) for the $i partition in this PartitionDocumentBundle
      *
      * @param int $i partition to get the archive file name for
-     * @return string path of $i partition
+     * @return string path of $i partition archive file
      */
     public function getPartition($i)
     {
@@ -328,7 +337,12 @@ class PartitionDocumentBundle
             $i . $this->compressor->fileExtension();
     }
     /**
+     * Returns the path to the index file (used to store all columns
+     * a partition record except blob and serial columns) for the $i partition
+     * in this PartitionDocumentBundle
      *
+     * @param int $i partition to get the index file name for
+     * @return string path of $i partition index file
      */
     public function getPartitionIndex($i)
     {
@@ -336,7 +350,15 @@ class PartitionDocumentBundle
             $i . self::INDEX_EXTENSION;
     }
     /**
+     * Returns the unserialized index file for the $partition parition of
+     * this PartitionIndexBundle. If $force_load is set to true then reloads
+     * from disk rather than use a cached value if present.
      *
+     * @param int $partition which partition index to read
+     * @param bool $force_load whether to reload the index from disk or to
+     *  use a cached value if present
+     * @return array $key => packed records pairs where records are
+     *  packed according to this ParititionDocumentBundle's signature
      */
     public function loadPartitionIndex($partition, $force_load = false)
     {
@@ -379,6 +401,7 @@ class PartitionDocumentBundle
      *
      * @param array $row_or_rows either array of record with fields given
      *      by this PartitionDocumentBundle's signature or an array of rows.
+     * @return bool success or not
      */
     public function put($row_or_rows)
     {
@@ -513,7 +536,7 @@ class PartitionDocumentBundle
         $this->saveParameters();
     }
     /**
-     *
+     *
      */
     public function saveParameters()
     {
diff --git a/src/library/Utility.php b/src/library/Utility.php
index bfd3f9eb8..14903a903 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -2336,17 +2336,20 @@ function tail($file_name, $num_lines)
  * @param string $lines to search
  * @param mixed $filters either string to filter lines with or an array of
  *      strings (any of which can be present to pass the filter)
+ * @param bool $case_insensitive whether search should be done case
+ *      insensitively or not.
  * @return array lines containing the string
  */
-function lineFilter($lines, $filters)
+function lineFilter($lines, $filters, $case_insensitive = true)
 {
     $out_lines = [];
+    $search_function = ($case_insensitive) ? "stripos" : "strpos";
     if (is_string($filters)) {
         $filters = [$filters];
     }
     foreach ($lines as $line) {
         foreach ($filters as $filter) {
-            if (stripos($line, $filter) !== false) {
+            if ($search_function($line, $filter) !== false) {
                 $out_lines[] = $line;
                 break;
             }
diff --git a/src/views/helpers/OptionsHelper.php b/src/views/helpers/OptionsHelper.php
index d0f8f330d..143cf91cc 100755
--- a/src/views/helpers/OptionsHelper.php
+++ b/src/views/helpers/OptionsHelper.php
@@ -132,6 +132,9 @@ class OptionsHelper extends Helper
      *      or as an unordered list.
      * @param string $class_list a string of additional CSS classes for outer-
      *      most div tag
+     * @param string $show_top if empty then the selected item value will be the
+     *      clickable link at the top of the drop down, if non-empty
+     *      then the text of $show_top will be used.
      */
     public function renderLinkDropDown($id, $options, $selected, $url_prefix,
         $as_list = false, $class_list = "", $show_top = "")
diff --git a/tests/IndexDocumentBundleTest.php b/tests/IndexDocumentBundleTest.php
index 9ea2e35af..0e74125f2 100644
--- a/tests/IndexDocumentBundleTest.php
+++ b/tests/IndexDocumentBundleTest.php
@@ -82,7 +82,8 @@ use seekquarry\yioop\library\UnitTest;
         $this->assertEqual($archive_info["DESCRIPTION"], "TestBundle");
     }
     /**
-     *
+     * Tests that after adding pages to an IndexArchiveBundle, the page,
+     * and its summary can be retrieved.
      */
     public function addGetPagesTestCase()
     {
diff --git a/tests/LinearHashTableTest.php b/tests/LinearHashTableTest.php
index 97470b1b5..bc9c63e75 100644
--- a/tests/LinearHashTableTest.php
+++ b/tests/LinearHashTableTest.php
@@ -38,7 +38,10 @@ use seekquarry\yioop\models\Model;
 use seekquarry\yioop\library\UnitTest;

 /**
+ * Used to test that the LinearHashTable class properly stores key value pairs,
+ * handles insert, deletes, retrievals okay.
  *
+ * @author Chris Pollett
  */
  class LinearHashTableTest extends UnitTest
 {
@@ -81,7 +84,9 @@ use seekquarry\yioop\library\UnitTest;
         $this->table_dirs = [];
     }
     /**
-     *
+     * This tests that packed records can be successfully unpacked
+     * after being put into the linear hash table and retrieved.
+     * This tests the LinearHashTable use-case for storing ETag data
      */
     public function packUnpackFormatTestCase()
     {
@@ -133,7 +138,8 @@ use seekquarry\yioop\library\UnitTest;
         }
     }
     /**
-     *
+     * Tests whether key value pairs inserted into the linear hash table
+     * can subsequently be retrieved. This tests the hashed key case
      */
     public function insertHashKeyLookupTestCase()
     {
@@ -154,6 +160,8 @@ use seekquarry\yioop\library\UnitTest;
         }
     }
     /**
+     * Tests whether key value pairs inserted into the linear hash table
+     * can subsequently be retrieved. This tests the non-hashed key case
      */
     public function insertKeyLookupTestCase()
     {
ViewGit