Fix error in sendStartCrawlMessage that could be caused by use of empty as opposed to not isset, a=chris

Chris Pollett [2019-06-10 19:Jun:th]
Fix error in sendStartCrawlMessage that could be caused by use of empty as opposed to not isset, a=chris
Filename
src/controllers/CrawlController.php
src/library/UrlParser.php
src/library/Utility.php
tests/UrlParserTest.php
diff --git a/src/controllers/CrawlController.php b/src/controllers/CrawlController.php
index a48a247a6..12768637c 100644
--- a/src/controllers/CrawlController.php
+++ b/src/controllers/CrawlController.php
@@ -307,7 +307,8 @@ class CrawlController extends Controller implements CrawlConstants
             return;
         }
         foreach (["num", "i", "num_fetchers"] as $field) {
-            $$field = (empty($_REQUEST[$field])) ? -1 :
+            //don't change !isset to empty, i can be 0
+            $$field = (!isset($_REQUEST[$field])) ? -1 :
                 $this->clean($_REQUEST[$field], "int");
         }
         list($crawl_params,
diff --git a/src/library/UrlParser.php b/src/library/UrlParser.php
index fd22f9940..8a64fde7a 100755
--- a/src/library/UrlParser.php
+++ b/src/library/UrlParser.php
@@ -243,7 +243,9 @@ class UrlParser
     public static function getHost($url, $with_login_and_port = true)
     {
         $url_parts = @parse_url($url);
-        if (!isset($url_parts['scheme']) ) {return false;}
+        if (!isset($url_parts['scheme']) ) {
+            return false;
+        }
         $host_url = $url_parts['scheme'].'://';
         //handles common typo http:/yahoo.com rather than http://yahoo.com
         if (!isset($url_parts['host'])) {
@@ -261,7 +263,9 @@ class UrlParser
             isset($url_parts['user']) && isset($url_parts['pass'])) {
             $host_url .= $url_parts['user'].":".$url_parts['pass']."@";
         }
-        if (strlen($url_parts['host']) <= 0) { return false; }
+        if (strlen($url_parts['host']) <= 0) {
+            return false;
+        }
         $host_url .= $url_parts['host'];
         if ($with_login_and_port && isset($url_parts['port'])) {
             $host_url .= ":".$url_parts['port'];
diff --git a/src/library/Utility.php b/src/library/Utility.php
index 09098f432..2e5f18156 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -1553,7 +1553,6 @@ function calculatePartition($input, $num_partition, $callback = null)
     }
     $hash_int =  abs(unpackInt(substr(crawlHash($input, true), 0, 4))) %
         $num_partition;
-
     return $hash_int;
 }
 /**
diff --git a/tests/UrlParserTest.php b/tests/UrlParserTest.php
index 087cab356..58cebfa45 100644
--- a/tests/UrlParserTest.php
+++ b/tests/UrlParserTest.php
@@ -236,4 +236,22 @@ class UrlParserTest extends UnitTest
             $this->assertEqual($result, $test_link[1], $test_link[2]);
         }
     }
+    /**
+     * Checks if getHost is working okay
+     */
+    public function getHostTestCase()
+    {
+        $test_links = [
+            ["https://somewhere.com:80/la/de/da", "https://somewhere.com:80",
+                "Host with por"],
+            ["https://10.1.10.10/?dfas=aga/", "https://10.1.10.10",
+                "IP based host with query"],
+            ["https://www.yioop.com/###@?woohoo", "https://www.yioop.com",
+                "Host with fragment"],
+        ];
+        foreach ($test_links as $test_link) {
+            $result = UrlParser::getHost($test_link[0]);
+            $this->assertEqual($result, $test_link[1], $test_link[2]);
+        }
+    }
 }
ViewGit