Fixes a notice in checkRecursiveUrl, add crawlLog messages, a=chris

Chris Pollett [2019-07-12 04:Jul:th]
Fixes a notice in checkRecursiveUrl, add crawlLog messages, a=chris
Filename
src/controllers/components/CrawlComponent.php
src/executables/QueueServer.php
src/library/UrlParser.php
src/library/Utility.php
src/library/WebQueueBundle.php
src/models/PhraseModel.php
diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php
index 15cdc19a2..0a542a496 100644
--- a/src/controllers/components/CrawlComponent.php
+++ b/src/controllers/components/CrawlComponent.php
@@ -327,7 +327,9 @@ class CrawlComponent extends Component implements CrawlConstants
         if (isset($crawl_params[self::INDEXING_PLUGINS]) &&
             is_array($crawl_params[self::INDEXING_PLUGINS])) {
             foreach ($crawl_params[self::INDEXING_PLUGINS] as $plugin) {
-                if ($plugin == "") {continue;}
+                if ($plugin == "") {
+                    continue;
+                }
                 $plugin_class = C\NS_PLUGINS . $plugin . "Plugin";
                 $plugin_obj = $parent->plugin(lcfirst($plugin));
                 if (method_exists($plugin_class, "loadConfiguration")) {
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 30de04c36..48ab8a5fc 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -2299,6 +2299,8 @@ class QueueServer implements CrawlConstants, Join
                  so group and do last
              */
             $this->web_queue->addUrlsQueue($added_pairs);
+            L\crawlLog("... Scheduler: Added " . count($added_pairs). " urls ".
+                "to queue from orginal list of $num_triples urls.");
         }
         L\crawlLog("Scheduler: time: " . L\changeInMicrotime($start_time));
         L\crawlLog("Scheduler: Done queue schedule file: $file");
diff --git a/src/library/UrlParser.php b/src/library/UrlParser.php
index 529e9137b..18f848519 100755
--- a/src/library/UrlParser.php
+++ b/src/library/UrlParser.php
@@ -745,7 +745,10 @@ class UrlParser
      */
     public static function checkRecursiveUrl($url, $repeat_threshold = 3)
     {
-        $url_parts = mb_split("/", $url);
+        if (!is_string($url)) {
+            return false;
+        }
+        $url_parts = explode("/", $url);
         $count= count($url_parts);
         $flag = 0;
         for ($i = 0; $i < $count; $i++) {
diff --git a/src/library/Utility.php b/src/library/Utility.php
index 16a3e6226..0585b9095 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -1537,7 +1537,7 @@ function partitionByHash($table, $field, $num_partition, $instance,
  * @param int $num_partition number of queue_servers to choose between
  * @param object $callback function or static method that might be
  *     applied to input before deciding the responsible queue_server.
- *     For example, if input was a url we might want to get the host
+ *     For example, if the input was a url we might want to get the host
  *     before deciding on the queue_server
  * @return int id of server responsible for input
  */
diff --git a/src/library/WebQueueBundle.php b/src/library/WebQueueBundle.php
index 09b05ff98..9bef3f065 100755
--- a/src/library/WebQueueBundle.php
+++ b/src/library/WebQueueBundle.php
@@ -299,7 +299,9 @@ class WebQueueBundle implements Notifier
     {
         $add_urls = [];
         $count = count($url_pairs);
-        if ( $count < 1) { return; }
+        if ( $count < 1) {
+            return;
+        }
         for ($i = 0; $i < $count; $i++) {
             $add_urls[$i][0] = & $url_pairs[$i][0];
         }
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index e0d11a84c..2fa807312 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -473,6 +473,9 @@ class PhraseModel extends ParallelModel
             $format_words = null;
         }
         $description_length = self::DEFAULT_DESCRIPTION_LENGTH;
+        /* additional meta word come from indexing plugins which might need
+           longer description lengths, say for recipes
+         */
         if (isset($this->additional_meta_words) &&
             is_array($this->additional_meta_words)) {
             foreach ($this->additional_meta_words as $meta_word => $length) {
ViewGit