Fixes a bug in group iterator, a=chris

Chris Pollett [2011-10-25 16:Oct:th]
Fixes a bug in group iterator, a=chris
Filename
lib/fetch_url.php
lib/index_bundle_iterators/group_iterator.php
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 94b9cbb16..bbbbbc22e 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -299,16 +299,16 @@ class FetchUrl implements CrawlConstants
         }
         if(!isset($site[CrawlConstants::ENCODING]) ) {
         //first guess we are html and try to find charset in doc head
-            $end_head = stripos($site[CrawlConstants::PAGE], "</head");
+            $end_head = stripos($site[$value], "</head");
             if($end_head) {
                 $len_c = strlen("charset=");
-                $start_charset = stripos($site[CrawlConstants::PAGE],
+                $start_charset = stripos($site[$value],
                     "charset=") + $len_c;
                 if($start_charset && $start_charset < $end_head) {
-                    $end_charset = stripos($site[CrawlConstants::PAGE],
+                    $end_charset = stripos($site[$value],
                         '"', $start_charset);
                     if($end_charset && $end_charset < $end_head) {
-                        $pre_charset = substr($site[CrawlConstants::PAGE],
+                        $pre_charset = substr($site[$value],
                             $start_charset, $end_charset - $start_charset);
                         $charset_parts =
                             preg_split("/[\s,]+/", $pre_charset);
diff --git a/lib/index_bundle_iterators/group_iterator.php b/lib/index_bundle_iterators/group_iterator.php
index 767b39ba4..68c69a719 100644
--- a/lib/index_bundle_iterators/group_iterator.php
+++ b/lib/index_bundle_iterators/group_iterator.php
@@ -314,7 +314,6 @@ class GroupIterator extends IndexBundleIterator
      */
     function groupByHashAndAggregate(&$pre_out_pages)
     {
-        $domain_vector = array();
         foreach($pre_out_pages as $hash_url => $data) {
             $hash = substr($data[0]['KEY'], IndexShard::DOC_KEY_LEN,
                 IndexShard::DOC_KEY_LEN);
@@ -329,8 +328,8 @@ class GroupIterator extends IndexBundleIterator

             $this->aggregateScores($hash_url, $pre_out_pages[$hash_url]);

-            if(isset($pre_out_pages[$hash_url][self::HASH])) {
-                $hash = $pre_out_pages[$hash_url][self::HASH];
+            if(isset($pre_out_pages[$hash_url][0][self::HASH])) {
+                $hash = $pre_out_pages[$hash_url][0][self::HASH];
                 if(isset($this->grouped_hashes[$hash])) {
                     unset($pre_out_pages[$hash_url]);
                 } else if(isset($this->current_seen_hashes[$hash])) {
ViewGit