PHP 5.4 and Windows fixes to UrlParser, a=chris

Chris Pollett [2012-09-15 22:Sep:th]
PHP 5.4 and Windows fixes to UrlParser, a=chris
Filename
bin/fetcher.php
lib/url_parser.php
lib/utility.php
tests/it_stemmer_test.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index b60be1fd1..92631e249 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -1128,7 +1128,6 @@ class Fetcher implements CrawlConstants

         foreach($site_pages as $site) {
             $response_code = $site[self::HTTP_CODE];
-
             if($response_code < 200 || $response_code >= 300) {
                 crawlLog($site[self::URL]." response code $response_code");
                 $host = UrlParser::getHost($site[self::URL]);
@@ -1332,10 +1331,12 @@ class Fetcher implements CrawlConstants
             }
         } // end for

-        $cache_page_partition = $this->web_archive->addPages(
-            self::OFFSET, $stored_site_pages);
-
         $num_pages = count($stored_site_pages);
+
+        if($num_pages > 0) {
+            $cache_page_partition = $this->web_archive->addPages(
+                self::OFFSET, $stored_site_pages);
+        }

         for($i = 0; $i < $num_pages; $i++) {
             $summarized_site_pages[$i][self::INDEX] = $num_items + $i;
diff --git a/lib/url_parser.php b/lib/url_parser.php
index f26d31e41..3d24aec27 100755
--- a/lib/url_parser.php
+++ b/lib/url_parser.php
@@ -270,6 +270,9 @@ class UrlParser
         if(!isset($url_parts['path'])) {
             return NULL;
         }
+        // windows hack
+        $url_parts['path'] = str_replace("\/", "/", $url_parts['path']);
+
         $path = $url_parts['path'];
         $len = strlen($url);
         if($with_query_string && isset($url_parts['query'])) {
diff --git a/lib/utility.php b/lib/utility.php
index 7f1443085..88fa69de3 100755
--- a/lib/utility.php
+++ b/lib/utility.php
@@ -48,7 +48,7 @@ function charCopy($source, &$destination, $start, $length)
     $endk = $length - 1;
     $end = $start + $endk;
     for($j = $end, $k = $endk; $j >= $start; $j--, $k--) {
-        $destination[$j] = $source[$k];
+        $destination[(int)$j] = $source[(int)$k];
     }
 }

diff --git a/tests/it_stemmer_test.php b/tests/it_stemmer_test.php
index 994b0fd43..983a4e354 100644
--- a/tests/it_stemmer_test.php
+++ b/tests/it_stemmer_test.php
@@ -34,13 +34,13 @@
 if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}

 /**
- *  Load the Italian stemmer
+ *  Load the Italian stemmer via phrase_parser (5.4 hack)
  */
-require_once BASE_DIR.'/locale/it/resources/tokenizer.php';
+require_once BASE_DIR."/lib/phrase_parser.php";
 /**
  *  Load the run function
  */
-require_once BASE_DIR.'/lib/unit_test.php';
+require_once BASE_DIR.'lib/unit_test.php';

 /**
  * My code for testing the Italian stemming algorithm. The inputs for the
ViewGit