Previous tweak added a CONTENT_SIZE field to downloaded sites, this sets that field in the case where substr used to ensure page does not exceed page range request, a=chris
Previous tweak added a CONTENT_SIZE field to downloaded sites, this sets that field in the case where substr used to ensure page does not exceed page range request, a=chris
diff --git a/src/library/FetchUrl.php b/src/library/FetchUrl.php
index 9f439912e..cd796c327 100755
--- a/src/library/FetchUrl.php
+++ b/src/library/FetchUrl.php
@@ -239,7 +239,11 @@ class FetchUrl implements CrawlConstants
here
*/
if ($page_range_request > 0) {
+ $init_len = strlen($content);
$content = substr($content, 0, $page_range_request);
+ if (strlen($content) != $init_len) {
+ $sites[$i][CrawlConstants::CONTENT_SIZE] = $init_len;
+ }
}
if (isset($content) && !$minimal && !$is_gopher) {
$site = self::parseHeaderPage($content, $value);