Fix another regression in robots.txt performance, a=chris
Fix another regression in robots.txt performance, a=chris
diff --git a/bin/queue_server.php b/bin/queue_server.php
index a4305bf64..af2eac34c 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -1935,6 +1935,8 @@ class QueueServer implements CrawlConstants, Join
$robots_okay = $this->web_queue->checkRobotOkay($url);
if(!$robots_okay) {
+ $delete_urls[$i] = $url;
+ $this->web_queue->addSeenUrlFilter($url);
$i++;
continue;
}
diff --git a/lib/web_queue_bundle.php b/lib/web_queue_bundle.php
index 7d7312edf..ea90f45d3 100755
--- a/lib/web_queue_bundle.php
+++ b/lib/web_queue_bundle.php
@@ -640,7 +640,7 @@ class WebQueueBundle implements Notifier
{
// local cache of recent robot.txt stuff
static $robot_cache = array();
- $cache_size = 500;
+ $cache_size = 2000;
$host = UrlParser::getHost($url);
$path = UrlParser::getPath($url);
$path = urldecode($path);