Tweak on how earliest available slot in schedule is calculated to see if results in more full schedules, a=chris
Tweak on how earliest available slot in schedule is calculated to see if results in more full schedules, a=chris
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 04bf85727..a4c1b7890 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -2529,7 +2529,7 @@ class QueueServer implements CrawlConstants, Join
since check robots every 24 hours as needed
*/
$sites[$next_slot] = [$url, $weight, 0];
- $current_crawl_index = $next_slot;
+ $current_crawl_index++;
$delete_urls[$i] = $url;
$fetch_size++;
$i++;
@@ -2633,8 +2633,6 @@ class QueueServer implements CrawlConstants, Join
// has crawl delay but too many already waiting
$delete_urls[$i] = $url;
//delete from queue (so no clog) but don't mark seen
- $i++;
- continue;
}
} else { // add a url no crawl delay
$next_slot = $this->getEarliestSlot($current_crawl_index,
@@ -2646,7 +2644,7 @@ class QueueServer implements CrawlConstants, Join
/* we might miss some sites by marking them
seen after only scheduling them
*/
- $current_crawl_index = $next_slot;
+ $current_crawl_index++;
$fetch_size++;
} else { //no more available slots so prepare to bail
$i = $count;
@@ -2662,7 +2660,8 @@ class QueueServer implements CrawlConstants, Join
$new_time = microtime(true);
L\crawlLog("...Scheduler: Done selecting URLS for fetch batch time ".
"so far:". L\changeInMicrotime($start_time));
- L\crawlLog("...Scheduler: Examined urls while making fetch batch: $i");
+ L\crawlLog("...Scheduler: Examined urls while making fetch batch:" .
+ ($i - 1));
$num_deletes = count($delete_urls);
$k = 0;
foreach ($delete_urls as $delete_url) {
@@ -2702,7 +2701,7 @@ class QueueServer implements CrawlConstants, Join
//write schedule to disk
$fh = fopen(C\CRAWL_DIR.
"/schedules/".
- self::schedule_name.$this->crawl_time . ".txt", "wb");
+ self::schedule_name . $this->crawl_time . ".txt", "wb");
fwrite($fh, $first_line);
$num_sites = count($sites);
$k = 0;