Fixes a notice in checkRecursiveUrl, add crawlLog messages, a=chris
Fixes a notice in checkRecursiveUrl, add crawlLog messages, a=chris
diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php
index 15cdc19a2..0a542a496 100644
--- a/src/controllers/components/CrawlComponent.php
+++ b/src/controllers/components/CrawlComponent.php
@@ -327,7 +327,9 @@ class CrawlComponent extends Component implements CrawlConstants
if (isset($crawl_params[self::INDEXING_PLUGINS]) &&
is_array($crawl_params[self::INDEXING_PLUGINS])) {
foreach ($crawl_params[self::INDEXING_PLUGINS] as $plugin) {
- if ($plugin == "") {continue;}
+ if ($plugin == "") {
+ continue;
+ }
$plugin_class = C\NS_PLUGINS . $plugin . "Plugin";
$plugin_obj = $parent->plugin(lcfirst($plugin));
if (method_exists($plugin_class, "loadConfiguration")) {
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 30de04c36..48ab8a5fc 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -2299,6 +2299,8 @@ class QueueServer implements CrawlConstants, Join
so group and do last
*/
$this->web_queue->addUrlsQueue($added_pairs);
+ L\crawlLog("... Scheduler: Added " . count($added_pairs). " urls ".
+ "to queue from orginal list of $num_triples urls.");
}
L\crawlLog("Scheduler: time: " . L\changeInMicrotime($start_time));
L\crawlLog("Scheduler: Done queue schedule file: $file");
diff --git a/src/library/UrlParser.php b/src/library/UrlParser.php
index 529e9137b..18f848519 100755
--- a/src/library/UrlParser.php
+++ b/src/library/UrlParser.php
@@ -745,7 +745,10 @@ class UrlParser
*/
public static function checkRecursiveUrl($url, $repeat_threshold = 3)
{
- $url_parts = mb_split("/", $url);
+ if (!is_string($url)) {
+ return false;
+ }
+ $url_parts = explode("/", $url);
$count= count($url_parts);
$flag = 0;
for ($i = 0; $i < $count; $i++) {
diff --git a/src/library/Utility.php b/src/library/Utility.php
index 16a3e6226..0585b9095 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -1537,7 +1537,7 @@ function partitionByHash($table, $field, $num_partition, $instance,
* @param int $num_partition number of queue_servers to choose between
* @param object $callback function or static method that might be
* applied to input before deciding the responsible queue_server.
- * For example, if input was a url we might want to get the host
+ * For example, if the input was a url we might want to get the host
* before deciding on the queue_server
* @return int id of server responsible for input
*/
diff --git a/src/library/WebQueueBundle.php b/src/library/WebQueueBundle.php
index 09b05ff98..9bef3f065 100755
--- a/src/library/WebQueueBundle.php
+++ b/src/library/WebQueueBundle.php
@@ -299,7 +299,9 @@ class WebQueueBundle implements Notifier
{
$add_urls = [];
$count = count($url_pairs);
- if ( $count < 1) { return; }
+ if ( $count < 1) {
+ return;
+ }
for ($i = 0; $i < $count; $i++) {
$add_urls[$i][0] = & $url_pairs[$i][0];
}
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index e0d11a84c..2fa807312 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -473,6 +473,9 @@ class PhraseModel extends ParallelModel
$format_words = null;
}
$description_length = self::DEFAULT_DESCRIPTION_LENGTH;
+ /* additional meta word come from indexing plugins which might need
+ longer description lengths, say for recipes
+ */
if (isset($this->additional_meta_words) &&
is_array($this->additional_meta_words)) {
foreach ($this->additional_meta_words as $meta_word => $length) {