Try to improve calculation of visited urls/hour, a=chrisg
Try to improve calculation of visited urls/hour, a=chrisg
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 36b62c05c..d15e91a47 100644
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -2181,13 +2181,16 @@ class QueueServer implements CrawlConstants
$crawl_status['COUNT'] = $info_bundle['COUNT'] ?? 0;
$now = time();
$change_in_time = C\ONE_HOUR + 1;
+ $least_recent_hourly_pair = [0, 0]; /* initial to something in case
+ while doesn't run */
while (count($this->hourly_crawl_data) > 0 &&
$change_in_time > C\ONE_HOUR) {
$least_recent_hourly_pair = array_pop($this->hourly_crawl_data);
$change_in_time =
($now - $least_recent_hourly_pair[0]);
}
- if ($change_in_time <= C\ONE_HOUR) {
+ if ($change_in_time <= C\ONE_HOUR &&
+ $now - $least_recent_hourly_pair[0] < 2 * C\ONE_HOUR) {
$this->hourly_crawl_data[] = $least_recent_hourly_pair;
}
array_unshift($this->hourly_crawl_data,
diff --git a/src/models/CrawlModel.php b/src/models/CrawlModel.php
index fdb035095..8841c3241 100755
--- a/src/models/CrawlModel.php
+++ b/src/models/CrawlModel.php
@@ -1450,8 +1450,8 @@ EOT;
floatval(C\ONE_HOUR);
$change_in_urls = $recent[1] - $oldest[1];
$data[$channel]['VISITED_URLS_COUNT_PER_HOUR'] =
- ($change_in_time_hours > 0) ?
- $change_in_urls/$change_in_time_hours : 0;
+ min((($change_in_time_hours > 0) ?
+ $change_in_urls/$change_in_time_hours : 0), 0);
} else {
$data[$channel]['VISITED_URLS_COUNT_PER_HOUR'] = 0;
}