Partial test of a new way to do things with multi curl, a=chris
Partial test of a new way to do things with multi curl, a=chris
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 22805e7..166cefc 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -130,22 +130,19 @@ class FetchUrl implements CrawlConstants
$start = time();
//Wait for responses
+ $running=null;
do {
- $mrc = @curl_multi_exec($agent_handler, $active);
- } while (time() - $start < PAGE_TIMEOUT &&
- $mrc == CURLM_CALL_MULTI_PERFORM );
+ $mrc = curl_multi_exec($agent_handler, $running);
+ $ready=curl_multi_select($agent_handler); //this will pause the loop
+ if($ready > 0){
+ while($info = curl_multi_info_read($agent_handler)){
+ $status=curl_getinfo($info['handle'],CURLINFO_HTTP_CODE);
+ }
+ }
+ } while (time() - $start < PAGE_TIMEOUT && $running > 0 && $ready!=-1);
if(time() - $start > PAGE_TIMEOUT) {crawlLog(" TIMED OUT!!!");}
- while (time()-$start < PAGE_TIMEOUT && $active && $mrc == CURLM_OK) {
- if (curl_multi_select($agent_handler, 1) != -1) {
- do {
- $mrc = @curl_multi_exec($agent_handler, $active);
- } while (time()-$start < PAGE_TIMEOUT &&
- $mrc == CURLM_CALL_MULTI_PERFORM);
- }
- }
-
if($timer) {
crawlLog(" Page Request time ".(changeInMicrotime($start_time)));
diff --git a/robots.txt b/robots.txt
index c605f3a..08e6e58 100644
--- a/robots.txt
+++ b/robots.txt
@@ -1,4 +1,5 @@
-#Okay to crawl, just don't query all the indexes
-#Dumb agents that don't understand *, we hope are too slow to bother excluding
+# Okay to crawl, just don't query all the indexes
+# Dumb agents that don't understand *, we hope you
+# are too slow to bother excluding
User-agent: *
Disallow: /*?*q=