Fixes bug in temp folders in multiple fetcher scenario, a=chris
Fixes bug in temp folders in multiple fetcher scenario, a=chris
diff --git a/bin/fetcher.php b/bin/fetcher.php
index 9e9a604d5..beebfb21d 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -477,9 +477,15 @@ class Fetcher implements CrawlConstants
MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time))));
return array();
}
-
+
+ $prefix = "";
+ if($this->fetcher_num !== false) {
+ $prefix = $this->fetcher_num."-";
+ }
+ $tmp_dir = CRAWL_DIR."/{$prefix}temp";
$site_pages = FetchUrl::getPages($sites, true,
- $this->page_range_request);
+ $this->page_range_request, $tmp_dir
+ );
list($downloaded_pages, $schedule_again_pages) =
$this->reschedulePages($site_pages);
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 85590633f..0333f02c1 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -65,7 +65,7 @@ class FetchUrl implements CrawlConstants
*/
public static function getPages($sites, $timer = false,
- $page_range_request = PAGE_RANGE_REQUEST,
+ $page_range_request = PAGE_RANGE_REQUEST, $temp_dir = NULL,
$key=CrawlConstants::URL, $value = CrawlConstants::PAGE)
{
$agent_handler = curl_multi_init();
@@ -74,11 +74,18 @@ class FetchUrl implements CrawlConstants
$start_time = microtime();
+ if($temp_dir == NULL) {
+ $temp_dir = CRAWL_DIR."/temp";
+ if(!file_exists($temp_dir)) {
+ mkdir($temp_dir);
+ }
+ }
+
//Set-up requests
for($i = 0; $i < count($sites); $i++) {
if(isset($sites[$i][$key])) {
$sites[$i][0] = curl_init();
- $ip_holder[$i] = fopen(CRAWL_DIR."/temp/tmp$i.txt", 'w+');
+ $ip_holder[$i] = fopen("$temp_dir/tmp$i.txt", 'w+');
curl_setopt($sites[$i][0], CURLOPT_USERAGENT, USER_AGENT);
$url = str_replace("&", "&", $sites[$i][$key]);
curl_setopt($sites[$i][0], CURLOPT_URL, $url);