Fixes bug in temp folders in multiple fetcher scenario, a=chris

Chris Pollett [2011-12-08 06:Dec:th]
Fixes bug in temp folders in multiple fetcher scenario, a=chris
Filename
bin/fetcher.php
lib/fetch_url.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index 9e9a604d5..beebfb21d 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -477,9 +477,15 @@ class Fetcher implements CrawlConstants
                 MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time))));
             return array();
         }
-
+
+        $prefix = "";
+        if($this->fetcher_num !== false) {
+            $prefix = $this->fetcher_num."-";
+        }
+        $tmp_dir = CRAWL_DIR."/{$prefix}temp";
         $site_pages = FetchUrl::getPages($sites, true,
-            $this->page_range_request);
+            $this->page_range_request, $tmp_dir
+            );

         list($downloaded_pages, $schedule_again_pages) =
             $this->reschedulePages($site_pages);
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 85590633f..0333f02c1 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -65,7 +65,7 @@ class FetchUrl implements CrawlConstants
      */

     public static function getPages($sites, $timer = false,
-        $page_range_request = PAGE_RANGE_REQUEST,
+        $page_range_request = PAGE_RANGE_REQUEST, $temp_dir = NULL,
         $key=CrawlConstants::URL, $value = CrawlConstants::PAGE)
     {
         $agent_handler = curl_multi_init();
@@ -74,11 +74,18 @@ class FetchUrl implements CrawlConstants

         $start_time = microtime();

+        if($temp_dir == NULL) {
+            $temp_dir = CRAWL_DIR."/temp";
+            if(!file_exists($temp_dir)) {
+                mkdir($temp_dir);
+            }
+        }
+
         //Set-up requests
         for($i = 0; $i < count($sites); $i++) {
             if(isset($sites[$i][$key])) {
                 $sites[$i][0] = curl_init();
-                $ip_holder[$i] = fopen(CRAWL_DIR."/temp/tmp$i.txt", 'w+');
+                $ip_holder[$i] = fopen("$temp_dir/tmp$i.txt", 'w+');
                 curl_setopt($sites[$i][0], CURLOPT_USERAGENT, USER_AGENT);
                 $url = str_replace("&amp;", "&", $sites[$i][$key]);
                 curl_setopt($sites[$i][0], CURLOPT_URL, $url);
ViewGit