Fixes bug in retrieval of cache pages, a=chris

Chris Pollett [2011-01-28 21:Jan:th]
Fixes bug in retrieval of cache pages, a=chris
Filename
controllers/search_controller.php
lib/web_archive.php
models/phrase_model.php
views/search_view.php
diff --git a/controllers/search_controller.php b/controllers/search_controller.php
index 79b8cc8ee..17a8dc6dc 100755
--- a/controllers/search_controller.php
+++ b/controllers/search_controller.php
@@ -171,15 +171,7 @@ class SearchController extends Controller implements CrawlConstants
                     list(,$query_activity,) = $this->extractActivityQuery();
                     if($query_activity != "query") {$highlight = false;}
                 }
-                $summary_offset = NULL;
-                if(isset($_REQUEST['so'])) {
-                    $summary_offset = $this->clean($_REQUEST['so'], "int");
-                }
-                $generation = -1;
-                if(isset($_REQUEST['g'])) {
-                    $generation = $this->clean($_REQUEST['g'], "int");
-                }
-                $this->cacheRequest($query, $arg, $summary_offset, $generation,
+                $this->cacheRequest($query, $arg,
                     $highlight, $index_time_stamp);
             }
         }
@@ -234,20 +226,12 @@ class SearchController extends Controller implements CrawlConstants
         switch($activity)
         {
             case "related":
-            $data['QUERY'] = "related:$arg";
+                $data['QUERY'] = "related:$arg";
                 $url = $arg;
-                $summary_offset = NULL;
-                if(isset($_REQUEST['so'])) {
-                    $summary_offset = $this->clean($_REQUEST['so'], "int");
-                }
-                $generation = -1;
-                if(isset($_REQUEST['g'])) {
-                    $generation = $this->clean($_REQUEST['g'], "int");
-                }
-                if($summary_offset === NULL || $generation == -1) {
-                    list($summary_offset, $generation) =
+
+                list($summary_offset, $generation, ) =
                         $this->phraseModel->lookupSummaryOffsetGeneration($url);
-                }
+
                 $crawl_item = $this->crawlModel->getCrawlItem($summary_offset,
                     $generation);

@@ -257,7 +241,7 @@ class SearchController extends Controller implements CrawlConstants
                 $phrase_results = $this->phraseModel->getPhrasePageResults(
                     $top_query, $limit, $results_per_page, false);
                 $data['PAGING_QUERY'] = "index.php?c=search&a=related&arg=".
-                    urlencode($url)."&so=$summary_offset";
+                    urlencode($url);
             break;

             case "query":
@@ -344,8 +328,7 @@ class SearchController extends Controller implements CrawlConstants
      * @param int $crawl_time the timestamp of the crawl to look up the cached
      *      page in
      */
-    function cacheRequest($query, $url, $summary_offset = -1, $generation = -1,
-        $highlight=true, $crawl_time = 0)
+    function cacheRequest($query, $url, $highlight=true, $crawl_time = 0)
     {

         if($crawl_time == 0) {
@@ -354,10 +337,8 @@ class SearchController extends Controller implements CrawlConstants
         $this->phraseModel->index_name = $crawl_time;
         $this->crawlModel->index_name = $crawl_time;

-        if($summary_offset == -1 || $generation == -1) {
-            list($summary_offset, $generation) =
-                $this->phraseModel->lookupSummaryOffsetGeneration($url);
-        }
+        list($summary_offset, $generation, $cache_partition) =
+            $this->phraseModel->lookupSummaryOffsetGeneration($url);

         $data = array();
         if(!$crawl_item = $this->crawlModel->getCrawlItem($summary_offset,
@@ -366,13 +347,12 @@ class SearchController extends Controller implements CrawlConstants
             $this->displayView("nocache", $data);
             exit();
         }
-
         $machine = $crawl_item[self::MACHINE];
         $machine_uri = $crawl_item[self::MACHINE_URI];
         $page = $crawl_item[self::HASH];
         $offset = $crawl_item[self::OFFSET];
         $cache_item = $this->crawlModel->getCacheFile($machine,
-            $machine_uri, $generation, $offset,  $crawl_time);
+            $machine_uri, $cache_partition, $offset,  $crawl_time);
         $cache_file = $cache_item[self::PAGE];

         if(!stristr($cache_item[self::TYPE], "image")) {
@@ -423,6 +403,9 @@ class SearchController extends Controller implements CrawlConstants
         $first_child = $body->firstChild;
         $preNode = $dom->createElement('pre');
         $preNode = $body->insertBefore($preNode, $first_child);
+        $preNode->setAttributeNS("","style", "border-color: black; ".
+            "border-style:solid; border-width:3px; ".
+            "padding: 5px; background-color: white");
         $divNode = $dom->createElement('div');
         $divNode = $body->insertBefore($divNode, $preNode);
         $divNode->setAttributeNS("","style", "border-color: black; ".
diff --git a/lib/web_archive.php b/lib/web_archive.php
index bfe04bcd3..7610d1669 100755
--- a/lib/web_archive.php
+++ b/lib/web_archive.php
@@ -299,7 +299,7 @@ class WebArchive

         $open_flag = false;
         if($fh == NULL) {
-            $fh =  fopen($this->filename, "r");
+            $fh =  $this->open();
             $open_flag = true;
         }

@@ -335,7 +335,7 @@ class WebArchive
         }

         if($open_flag) {
-            fclose($fh);
+            $this->close($fh);
         }

         return $objects;
diff --git a/models/phrase_model.php b/models/phrase_model.php
index aeaa6dd4f..3a9171416 100755
--- a/models/phrase_model.php
+++ b/models/phrase_model.php
@@ -223,23 +223,27 @@ class PhraseModel extends Model
         $pages = array();
         $summary_offset = NULL;
         $num_generations = $index_archive->generation_info['ACTIVE'];
-        for($i = 0; $i <= $num_generations && $num_retrieved < 1; $i++) {
-            $index_archive->setCurrentShard($i);
-            $word_iterator =
-                new WordIterator(crawlHash("info:$url"), $index_archive);
-            while(is_array($next_docs = $word_iterator->nextDocsWithWord()) &&
-                $num_retrieved < 1) {
-                 foreach($next_docs as $doc_key => $doc_info) {
-                     $summary_offset = &
-                        $doc_info[CrawlConstants::SUMMARY_OFFSET];
-                     $num_retrieved++;
-                     if($num_retrieved >=  1) {
-                         break 3;
-                     }
+        $word_iterator =
+            new WordIterator(crawlHash("info:$url"), $index_archive);
+        if(is_array($next_docs = $word_iterator->nextDocsWithWord())) {
+             foreach($next_docs as $doc_key => $doc_info) {
+                 $summary_offset =
+                    $doc_info[CrawlConstants::SUMMARY_OFFSET];
+                 $generation = $doc_info[CrawlConstants::GENERATION];
+                 $cache_partition = $doc_info[CrawlConstants::SUMMARY][
+                    CrawlConstants::CACHE_PAGE_PARTITION];
+                 $num_retrieved++;
+                 if($num_retrieved >=  1) {
+                     break;
                  }
-            }
+             }
+             if($num_retrieved == 0) {
+                return false;
+             }
+        } else {
+            return false;
         }
-        return array($summary_offset, $i);
+        return array($summary_offset, $generation, $cache_partition);
     }

     /**
diff --git a/views/search_view.php b/views/search_view.php
index fea95a2ed..07bb60486 100755
--- a/views/search_view.php
+++ b/views/search_view.php
@@ -151,8 +151,6 @@ class SearchView extends View implements CrawlConstants
                             ?>&amp;c=search&amp;a=cache&amp;q=<?php
                             e($data['QUERY']); ?>&amp;arg=<?php
                             e(urlencode($page[self::URL]));
-                            ?>&amp;so=<?php  e($page[self::SUMMARY_OFFSET]);
-                            ?>&amp;g=<?php e($page[self::CACHE_PAGE_PARTITION]);
                             ?>&amp;its=<?php e($data['its']); ?>" >
                         <?php
                         if($page[self::TYPE] == "text/html" ||
@@ -169,9 +167,8 @@ class SearchView extends View implements CrawlConstants
                     ?>
                     <a href="?YIOOP_TOKEN=<?php e($data['YIOOP_TOKEN']);
                         ?>&amp;c=search&amp;a=related&amp;arg=<?php
-                        e(urlencode($page[self::URL])); ?>&amp;so=<?php
-                        e($page[self::SUMMARY_OFFSET]);
-                        ?>&amp;its=<?php e($data['its']); ?>" ><?php
+                        e(urlencode($page[self::URL])); ?>&amp;
+                        its=<?php e($data['its']); ?>" ><?php
                         e(tl('search_view_similar'));
                     ?></a>.
                     <?php
ViewGit