Partial test of a new way to do things with multi curl, a=chris

Chris Pollett [2012-03-29 21:55:13]
Partial test of a new way to do things with multi curl, a=chris
Filename
lib/fetch_url.php
robots.txt
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 22805e7..166cefc 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -130,22 +130,19 @@ class FetchUrl implements CrawlConstants
         $start = time();

         //Wait for responses
+        $running=null;
         do {
-            $mrc = @curl_multi_exec($agent_handler, $active);
-        } while (time() - $start < PAGE_TIMEOUT &&
-            $mrc == CURLM_CALL_MULTI_PERFORM );
+            $mrc = curl_multi_exec($agent_handler, $running);
+            $ready=curl_multi_select($agent_handler); //this will pause the loop
+            if($ready > 0){
+                while($info = curl_multi_info_read($agent_handler)){
+                    $status=curl_getinfo($info['handle'],CURLINFO_HTTP_CODE);
+                }
+            }
+        } while (time() - $start < PAGE_TIMEOUT &&  $running > 0 && $ready!=-1);

         if(time() - $start > PAGE_TIMEOUT) {crawlLog("  TIMED OUT!!!");}

-        while (time()-$start < PAGE_TIMEOUT && $active && $mrc == CURLM_OK) {
-            if (curl_multi_select($agent_handler, 1) != -1) {
-                do {
-                     $mrc = @curl_multi_exec($agent_handler, $active);
-                } while (time()-$start < PAGE_TIMEOUT &&
-                    $mrc == CURLM_CALL_MULTI_PERFORM);
-            }
-        }
-
         if($timer) {

             crawlLog("  Page Request time ".(changeInMicrotime($start_time)));
diff --git a/robots.txt b/robots.txt
index c605f3a..08e6e58 100644
--- a/robots.txt
+++ b/robots.txt
@@ -1,4 +1,5 @@
-#Okay to crawl, just don't query all the indexes
-#Dumb agents that don't understand *, we hope are too slow to bother excluding
+# Okay to crawl, just don't query all the indexes
+# Dumb agents that don't understand *, we hope you
+# are too slow to bother excluding
 User-agent: *
 Disallow: /*?*q=
ViewGit