Fix another regression in robots.txt performance, a=chris

Chris Pollett [2012-05-02 03:May:nd]
Fix another regression in robots.txt performance, a=chris
Filename
bin/queue_server.php
lib/web_queue_bundle.php
diff --git a/bin/queue_server.php b/bin/queue_server.php
index a4305bf64..af2eac34c 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -1935,6 +1935,8 @@ class QueueServer implements CrawlConstants, Join
                     $robots_okay = $this->web_queue->checkRobotOkay($url);

                     if(!$robots_okay) {
+                        $delete_urls[$i] = $url;
+                        $this->web_queue->addSeenUrlFilter($url);
                         $i++;
                         continue;
                     }
diff --git a/lib/web_queue_bundle.php b/lib/web_queue_bundle.php
index 7d7312edf..ea90f45d3 100755
--- a/lib/web_queue_bundle.php
+++ b/lib/web_queue_bundle.php
@@ -640,7 +640,7 @@ class WebQueueBundle implements Notifier
     {
         // local cache of recent robot.txt stuff
         static $robot_cache = array();
-        $cache_size = 500;
+        $cache_size = 2000;
         $host = UrlParser::getHost($url);
         $path = UrlParser::getPath($url);
         $path = urldecode($path);
ViewGit