Fix error in sendStartCrawlMessage that could be caused by use of empty as opposed to not isset, a=chris
Fix error in sendStartCrawlMessage that could be caused by use of empty as opposed to not isset, a=chris
diff --git a/src/controllers/CrawlController.php b/src/controllers/CrawlController.php
index a48a247a6..12768637c 100644
--- a/src/controllers/CrawlController.php
+++ b/src/controllers/CrawlController.php
@@ -307,7 +307,8 @@ class CrawlController extends Controller implements CrawlConstants
return;
}
foreach (["num", "i", "num_fetchers"] as $field) {
- $$field = (empty($_REQUEST[$field])) ? -1 :
+ //don't change !isset to empty, i can be 0
+ $$field = (!isset($_REQUEST[$field])) ? -1 :
$this->clean($_REQUEST[$field], "int");
}
list($crawl_params,
diff --git a/src/library/UrlParser.php b/src/library/UrlParser.php
index fd22f9940..8a64fde7a 100755
--- a/src/library/UrlParser.php
+++ b/src/library/UrlParser.php
@@ -243,7 +243,9 @@ class UrlParser
public static function getHost($url, $with_login_and_port = true)
{
$url_parts = @parse_url($url);
- if (!isset($url_parts['scheme']) ) {return false;}
+ if (!isset($url_parts['scheme']) ) {
+ return false;
+ }
$host_url = $url_parts['scheme'].'://';
//handles common typo http:/yahoo.com rather than http://yahoo.com
if (!isset($url_parts['host'])) {
@@ -261,7 +263,9 @@ class UrlParser
isset($url_parts['user']) && isset($url_parts['pass'])) {
$host_url .= $url_parts['user'].":".$url_parts['pass']."@";
}
- if (strlen($url_parts['host']) <= 0) { return false; }
+ if (strlen($url_parts['host']) <= 0) {
+ return false;
+ }
$host_url .= $url_parts['host'];
if ($with_login_and_port && isset($url_parts['port'])) {
$host_url .= ":".$url_parts['port'];
diff --git a/src/library/Utility.php b/src/library/Utility.php
index 09098f432..2e5f18156 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -1553,7 +1553,6 @@ function calculatePartition($input, $num_partition, $callback = null)
}
$hash_int = abs(unpackInt(substr(crawlHash($input, true), 0, 4))) %
$num_partition;
-
return $hash_int;
}
/**
diff --git a/tests/UrlParserTest.php b/tests/UrlParserTest.php
index 087cab356..58cebfa45 100644
--- a/tests/UrlParserTest.php
+++ b/tests/UrlParserTest.php
@@ -236,4 +236,22 @@ class UrlParserTest extends UnitTest
$this->assertEqual($result, $test_link[1], $test_link[2]);
}
}
+ /**
+ * Checks if getHost is working okay
+ */
+ public function getHostTestCase()
+ {
+ $test_links = [
+ ["https://somewhere.com:80/la/de/da", "https://somewhere.com:80",
+ "Host with por"],
+ ["https://10.1.10.10/?dfas=aga/", "https://10.1.10.10",
+ "IP based host with query"],
+ ["https://www.yioop.com/###@?woohoo", "https://www.yioop.com",
+ "Host with fragment"],
+ ];
+ foreach ($test_links as $test_link) {
+ $result = UrlParser::getHost($test_link[0]);
+ $this->assertEqual($result, $test_link[1], $test_link[2]);
+ }
+ }
}