PHP 5.4 and Windows fixes to UrlParser, a=chris
PHP 5.4 and Windows fixes to UrlParser, a=chris
diff --git a/bin/fetcher.php b/bin/fetcher.php
index b60be1fd1..92631e249 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -1128,7 +1128,6 @@ class Fetcher implements CrawlConstants
foreach($site_pages as $site) {
$response_code = $site[self::HTTP_CODE];
-
if($response_code < 200 || $response_code >= 300) {
crawlLog($site[self::URL]." response code $response_code");
$host = UrlParser::getHost($site[self::URL]);
@@ -1332,10 +1331,12 @@ class Fetcher implements CrawlConstants
}
} // end for
- $cache_page_partition = $this->web_archive->addPages(
- self::OFFSET, $stored_site_pages);
-
$num_pages = count($stored_site_pages);
+
+ if($num_pages > 0) {
+ $cache_page_partition = $this->web_archive->addPages(
+ self::OFFSET, $stored_site_pages);
+ }
for($i = 0; $i < $num_pages; $i++) {
$summarized_site_pages[$i][self::INDEX] = $num_items + $i;
diff --git a/lib/url_parser.php b/lib/url_parser.php
index f26d31e41..3d24aec27 100755
--- a/lib/url_parser.php
+++ b/lib/url_parser.php
@@ -270,6 +270,9 @@ class UrlParser
if(!isset($url_parts['path'])) {
return NULL;
}
+ // windows hack
+ $url_parts['path'] = str_replace("\/", "/", $url_parts['path']);
+
$path = $url_parts['path'];
$len = strlen($url);
if($with_query_string && isset($url_parts['query'])) {
diff --git a/lib/utility.php b/lib/utility.php
index 7f1443085..88fa69de3 100755
--- a/lib/utility.php
+++ b/lib/utility.php
@@ -48,7 +48,7 @@ function charCopy($source, &$destination, $start, $length)
$endk = $length - 1;
$end = $start + $endk;
for($j = $end, $k = $endk; $j >= $start; $j--, $k--) {
- $destination[$j] = $source[$k];
+ $destination[(int)$j] = $source[(int)$k];
}
}
diff --git a/tests/it_stemmer_test.php b/tests/it_stemmer_test.php
index 994b0fd43..983a4e354 100644
--- a/tests/it_stemmer_test.php
+++ b/tests/it_stemmer_test.php
@@ -34,13 +34,13 @@
if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
/**
- * Load the Italian stemmer
+ * Load the Italian stemmer via phrase_parser (5.4 hack)
*/
-require_once BASE_DIR.'/locale/it/resources/tokenizer.php';
+require_once BASE_DIR."/lib/phrase_parser.php";
/**
* Load the run function
*/
-require_once BASE_DIR.'/lib/unit_test.php';
+require_once BASE_DIR.'lib/unit_test.php';
/**
* My code for testing the Italian stemming algorithm. The inputs for the