Fixes a bug in group iterator, a=chris
Fixes a bug in group iterator, a=chris
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 94b9cbb16..bbbbbc22e 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -299,16 +299,16 @@ class FetchUrl implements CrawlConstants
}
if(!isset($site[CrawlConstants::ENCODING]) ) {
//first guess we are html and try to find charset in doc head
- $end_head = stripos($site[CrawlConstants::PAGE], "</head");
+ $end_head = stripos($site[$value], "</head");
if($end_head) {
$len_c = strlen("charset=");
- $start_charset = stripos($site[CrawlConstants::PAGE],
+ $start_charset = stripos($site[$value],
"charset=") + $len_c;
if($start_charset && $start_charset < $end_head) {
- $end_charset = stripos($site[CrawlConstants::PAGE],
+ $end_charset = stripos($site[$value],
'"', $start_charset);
if($end_charset && $end_charset < $end_head) {
- $pre_charset = substr($site[CrawlConstants::PAGE],
+ $pre_charset = substr($site[$value],
$start_charset, $end_charset - $start_charset);
$charset_parts =
preg_split("/[\s,]+/", $pre_charset);
diff --git a/lib/index_bundle_iterators/group_iterator.php b/lib/index_bundle_iterators/group_iterator.php
index 767b39ba4..68c69a719 100644
--- a/lib/index_bundle_iterators/group_iterator.php
+++ b/lib/index_bundle_iterators/group_iterator.php
@@ -314,7 +314,6 @@ class GroupIterator extends IndexBundleIterator
*/
function groupByHashAndAggregate(&$pre_out_pages)
{
- $domain_vector = array();
foreach($pre_out_pages as $hash_url => $data) {
$hash = substr($data[0]['KEY'], IndexShard::DOC_KEY_LEN,
IndexShard::DOC_KEY_LEN);
@@ -329,8 +328,8 @@ class GroupIterator extends IndexBundleIterator
$this->aggregateScores($hash_url, $pre_out_pages[$hash_url]);
- if(isset($pre_out_pages[$hash_url][self::HASH])) {
- $hash = $pre_out_pages[$hash_url][self::HASH];
+ if(isset($pre_out_pages[$hash_url][0][self::HASH])) {
+ $hash = $pre_out_pages[$hash_url][0][self::HASH];
if(isset($this->grouped_hashes[$hash])) {
unset($pre_out_pages[$hash_url]);
} else if(isset($this->current_seen_hashes[$hash])) {