fixes bugs regarding group iterator, tweaks memcache of page results settings, a=chris

Chris Pollett [2010-11-30 07:Nov:th]

fixes bugs regarding group iterator, tweaks memcache of page results settings, a=chris

Filename
lib/crawl_constants.php
lib/fetch_url.php
lib/index_bundle_iterators/group_iterator.php
lib/index_bundle_iterators/index_bundle_iterator.php
lib/index_bundle_iterators/intersect_iterator.php
lib/index_bundle_iterators/phrase_filter_iterator.php
lib/index_bundle_iterators/union_iterator.php
lib/index_bundle_iterators/word_iterator.php
lib/index_shard.php
models/model.php
models/phrase_model.php
views/search_view.php

diff --git a/lib/crawl_constants.php b/lib/crawl_constants.php
index 49cb8accf..829eaa9a9 100644
--- a/lib/crawl_constants.php
+++ b/lib/crawl_constants.php
@@ -135,9 +135,10 @@ interface CrawlConstants
     const RELEVANCE ='an';
     const DUPLICATE ='ao';
     const META_WORDS ='ap';
+    const CACHE_PAGE_PARTITION = 'aq';

     const NEEDS_OFFSET_FLAG = 0x7FFFFFFE;
     const DUPLICATE_FLAG = 0x7FFFFFFF;
-    const CACHE_PAGE_PARTITION = 'aq';
+
 }
 ?>
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 738201f4f..37c05a632 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -169,7 +169,7 @@ class FetchUrl implements CrawlConstants
                     if(isset($encoding_parts[1])) {
                         $sites[$i][self::ENCODING] =
                             mb_strtoupper(trim($encoding_parts[1]));
-                                //hopefuly safe to trust encoding sent
+                                //hopefully safe to trust encoding sent
                     }
                 } else {
                     $sites[$i][self::ENCODING] =
diff --git a/lib/index_bundle_iterators/group_iterator.php b/lib/index_bundle_iterators/group_iterator.php
index 2d2e30bbc..30b42bd1b 100644
--- a/lib/index_bundle_iterators/group_iterator.php
+++ b/lib/index_bundle_iterators/group_iterator.php
@@ -90,6 +90,11 @@ class GroupIterator extends IndexBundleIterator
      */
     var $grouped_keys;

+    /**
+     * the minimum number of pages to group from a block;
+     * this trumps $this->index_bundle_iterator->results_per_block
+     */
+    const MIN_FIND_RESULTS_PER_BLOCK = 200;

     /**
      * Creates a group iterator with the given parameters.
@@ -102,8 +107,9 @@ class GroupIterator extends IndexBundleIterator
     {
         $this->index_bundle_iterator = $index_bundle_iterator;
         $this->num_docs = $this->index_bundle_iterator->num_docs;
-        $this->results_per_block =
-            $this->index_bundle_iterator->results_per_block;
+        $this->results_per_block = max(
+            $this->index_bundle_iterator->results_per_block,
+            self::MIN_FIND_RESULTS_PER_BLOCK);
         $this->reset();
     }

@@ -120,6 +126,19 @@ class GroupIterator extends IndexBundleIterator
         $this->seen_docs_unfiltered = 0;
     }

+    /**
+     * Computes a relevancy score for a posting offset with respect to this
+     * iterator
+     * @param int $posting_offset an offset into word_docs to compute the
+     *      relevance of
+     * @return float a relevancy score based on BM25F.
+     */
+    function computeRelevance($posting_offset)
+    {
+        return $this->index_bundle_iterator->computeRelevance(
+                $posting_offset);
+    }
+
     /**
      * Hook function used by currentDocsWithWord to return the current block
      * of docs if it is not cached
@@ -128,9 +147,26 @@ class GroupIterator extends IndexBundleIterator
      */
     function findDocsWithWord()
     {
-        $pages =
-            $this->index_bundle_iterator->currentDocsWithWord();
-
+        $pages = array();
+        $count = 0;
+        $done = false;
+        do {
+            $new_pages = $this->index_bundle_iterator->currentDocsWithWord();
+            if(!is_array($new_pages)) {
+                $done = true;
+                if(count($pages) == 0) {
+                    $pages = -1;
+                }
+            } else {
+                $pages = array_merge($pages, $new_pages);
+                $count = count($pages);
+            }
+            if($count < $this->results_per_block && !$done) {
+                $this->index_bundle_iterator->advance();
+            } else {
+                $done = true;
+            }
+        } while(!$done);
         $this->count_block_unfiltered = count($pages);
         if(!is_array($pages)) {
             return $pages;
@@ -187,12 +223,17 @@ class GroupIterator extends IndexBundleIterator
                             $this->getIndex(), true);
                     $doc_array = $word_iterator->currentDocsWithWord();
                     if(is_array($doc_array) && count($doc_array) == 1) {
+                        $relevance = $this->computeRelevance(
+                            $word_iterator->current_offset);
                         $keys = array_keys($doc_array);
                         $key = $keys[0];
-                        if(!isset($doc_array[$key][self::DUPLICATE]) ) {;
-                            $pre_out_pages[$hash_url][$key] = $doc_array[$key];
-                            $pre_out_pages[$hash_url][$key]['IS_PAGE'] = true;
-                            $pre_out_pages[$hash_url][$key]['KEY'] = $key;
+                        if(!isset($doc_array[$key][self::DUPLICATE]) ) {
+                            $item = $doc_array[$key];
+                            $item[self::RELEVANCE] += $relevance;
+                            $item[self::SCORE] += $relevance;
+                            $item['IS_PAGE'] = true;
+                            $item['KEY'] = $key;
+                            array_unshift($pre_out_pages[$hash_url], $item);
                         } else {
                             /*
                                 Deduplication:
@@ -288,9 +329,15 @@ class GroupIterator extends IndexBundleIterator
                     list($key, $summary_offset) = $offset_array;
                     $index = & $this->getIndex($key);
                     $page = $index->getPage($summary_offset);
+                    if($page == array()) {continue;}
                     if(!isset($out_pages[$doc_key][self::SUMMARY])) {
                         $out_pages[$doc_key][self::SUMMARY] = $page;
                     } else if (isset($page[self::DESCRIPTION])) {
+                        if(!isset($out_pages[$doc_key][
+                            self::SUMMARY][self::DESCRIPTION])) {
+                            $out_pages[$doc_key][self::SUMMARY][
+                                self::DESCRIPTION] = "";
+                        }
                         $out_pages[$doc_key][self::SUMMARY][self::DESCRIPTION].=
                             " .. ".$page[self::DESCRIPTION];
                     }
diff --git a/lib/index_bundle_iterators/index_bundle_iterator.php b/lib/index_bundle_iterators/index_bundle_iterator.php
index 3878121d7..7c5f7e174 100644
--- a/lib/index_bundle_iterators/index_bundle_iterator.php
+++ b/lib/index_bundle_iterators/index_bundle_iterator.php
@@ -88,6 +88,15 @@ abstract class IndexBundleIterator implements CrawlConstants
      */
     const RESULTS_PER_BLOCK = 100;

+    /**
+     * Computes a relevancy score for a posting offset with respect to this
+     * iterator
+     * @param int $posting_offset an offset into word_docs to compute the
+     *      relevance of
+     * @return float a relevancy score based on BM25F.
+     */
+    abstract function computeRelevance($posting_offset);
+
     /**
      * Returns the iterators to the first document block that it could iterate
      * over
@@ -127,8 +136,6 @@ abstract class IndexBundleIterator implements CrawlConstants
      * Gets the current block of doc ids and score associated with the
      * this iterators word
      *
-     * @param bool $with_summaries specifies whether or not to return the
-     *      summaries associated with the document
      * @return mixed doc ids and score if there are docs left, -1 otherwise
      */
     function currentDocsWithWord()
diff --git a/lib/index_bundle_iterators/intersect_iterator.php b/lib/index_bundle_iterators/intersect_iterator.php
index 31f18f3a8..427f0105a 100644
--- a/lib/index_bundle_iterators/intersect_iterator.php
+++ b/lib/index_bundle_iterators/intersect_iterator.php
@@ -119,8 +119,9 @@ class IntersectIterator extends IndexBundleIterator
      */
     function reset()
     {
-        foreach($this->index_bundle_iterators as $iterator) {
-            $iterator->reset();
+        for($i = 0; $i < $this->num_iterators; $i++) {
+            $this->index_bundle_iterators[$i]->setResultsPerBlock(1);
+            $this->index_bundle_iterators[$i]->reset();
         }

         $this->seen_docs = 0;
@@ -128,6 +129,23 @@ class IntersectIterator extends IndexBundleIterator

     }

+    /**
+     * Computes a relevancy score for a posting offset with respect to this
+     * iterator
+     * @param int $posting_offset an offset into word_docs to compute the
+     *      relevance of
+     * @return float a relevancy score based on BM25F.
+     */
+    function computeRelevance($posting_offset)
+    {
+        $relevance = 0;
+        for($i = 0; $i < $this->num_iterators; $i++) {
+            $relevance += $this->index_bundle_iterators[$i]->computeRelevance(
+                $posting_offset);
+        }
+        return $relevance;
+    }
+
     /**
      * Hook function used by currentDocsWithWord to return the current block
      * of docs if it is not cached
@@ -142,8 +160,23 @@ class IntersectIterator extends IndexBundleIterator
         if($status == -1) {
             return -1;
         }
+        //next we finish computing BM25F
         $docs = $this->index_bundle_iterators[0]->currentDocsWithWord();
-        $this->count_block = count($docs);
+
+        if(is_array($docs) && count($docs) == 1) {
+            //we get intersect docs one at a time so should be only one
+            $keys = array_keys($docs);
+            $key = $keys[0];
+            for($i = 1; $i < $this->num_iterators; $i++) {
+                $i_docs =
+                    $this->index_bundle_iterators[$i]->currentDocsWithWord();
+
+                $docs[$key][self::RELEVANCE] += $i_docs[$key][self::RELEVANCE];
+            }
+            $docs[$key][self::SCORE] = $docs[$key][self::DOC_RANK] +
+                 $docs[$key][self::RELEVANCE];
+        }
+        $this->count_block = count($docs);
         $this->pages = $docs;
         return $docs;
     }
@@ -161,7 +194,7 @@ class IntersectIterator extends IndexBundleIterator
                     $this->index_bundle_iterators[
                         $i]->currentDocOffsetWithWord();
                 if($i == 0) {
-                    $biggest_offset = $new_doc_offset[$i];
+                    $biggest_offset = $new_doc_offset[0];
                 }
                 if($new_doc_offset[$i] == -1) {
                     return -1;
@@ -169,6 +202,8 @@ class IntersectIterator extends IndexBundleIterator
                 if($new_doc_offset[$i] > $biggest_offset) {
                     $biggest_offset = $new_doc_offset[$i];
                     $all_same = false;
+                } else if ($new_doc_offset[$i] < $biggest_offset) {
+                    $all_same = false;
                 }
             }
             if($all_same) {
@@ -176,6 +211,7 @@ class IntersectIterator extends IndexBundleIterator
             }
             for($i = 0; $i < $this->num_iterators; $i++) {
                 if($new_doc_offset[$i] < $biggest_offset) {
+
                     $this->index_bundle_iterators[$i]->advance($biggest_offset);
                 }
             }
@@ -205,7 +241,6 @@ class IntersectIterator extends IndexBundleIterator
                 floor(($this->seen_docs * $total_num_docs) /
                 $this->seen_docs_unfiltered);
         }
-
         $this->index_bundle_iterators[0]->advance($doc_offset);

     }
diff --git a/lib/index_bundle_iterators/phrase_filter_iterator.php b/lib/index_bundle_iterators/phrase_filter_iterator.php
index 36e492812..8a1361472 100644
--- a/lib/index_bundle_iterators/phrase_filter_iterator.php
+++ b/lib/index_bundle_iterators/phrase_filter_iterator.php
@@ -141,6 +141,19 @@ class PhraseFilterIterator extends IndexBundleIterator
         $doc_block = $this->currentDocsWithWord();
     }

+    /**
+     * Computes a relevancy score for a posting offset with respect to this
+     * iterator
+     * @param int $posting_offset an offset into word_docs to compute the
+     *      relevance of
+     * @return float a relevancy score based on BM25F.
+     */
+    function computeRelevance($posting_offset)
+    {
+        return $this->index_bundle_iterator->computeRelevance(
+                $posting_offset);
+    }
+
     /**
      * Hook function used by currentDocsWithWord to return the current block
      * of docs if it is not cached
diff --git a/lib/index_bundle_iterators/union_iterator.php b/lib/index_bundle_iterators/union_iterator.php
index a5cb0b6bd..4f4f008b1 100644
--- a/lib/index_bundle_iterators/union_iterator.php
+++ b/lib/index_bundle_iterators/union_iterator.php
@@ -131,6 +131,23 @@ class UnionIterator extends IndexBundleIterator

     }

+    /**
+     * Computes a relevancy score for a posting offset with respect to this
+     * iterator
+     * @param int $posting_offset an offset into word_docs to compute the
+     *      relevance of
+     * @return float a relevancy score based on BM25F.
+     */
+    function computeRelevance($posting_offset)
+    {
+        $relevance = 0;
+        for($i = 0; $i < $this->num_iterators; $i++) {
+            $relevance += $this->index_bundle_iterators[$i]->computeRelevance(
+                $posting_offset);
+        }
+        return $relevance;
+    }
+
     /**
      * Hook function used by currentDocsWithWord to return the current block
      * of docs if it is not cached
diff --git a/lib/index_bundle_iterators/word_iterator.php b/lib/index_bundle_iterators/word_iterator.php
index 25e4846c7..9cf31b405 100644
--- a/lib/index_bundle_iterators/word_iterator.php
+++ b/lib/index_bundle_iterators/word_iterator.php
@@ -146,6 +146,21 @@ class WordIterator extends IndexBundleIterator
         }
     }

+    /**
+     * Computes a relevancy score for a posting offset with respect to this
+     * iterator
+     * @param int $posting_offset an offset into word_docs to compute the
+     *      relevance of
+     * @return float a relevancy score based on BM25F.
+     */
+    function computeRelevance($posting_offset)
+    {
+        $item = array();
+        $this->index->getCurrentShard()->makeItem($item,
+            $this->start_offset, $posting_offset, $this->last_offset, 1);
+        return $item[self::RELEVANCE];
+    }
+
     /**
      * Returns the iterators to the first document block that it could iterate
      * over
@@ -173,6 +188,7 @@ class WordIterator extends IndexBundleIterator
         $this->next_offset = $this->current_offset;
         //the next call also updates next offset
         $results = $this->index->getCurrentShard()->getPostingsSlice(
+            $this->start_offset,
             $this->next_offset, $this->last_offset, $this->results_per_block);
         return $results;
     }
diff --git a/lib/index_shard.php b/lib/index_shard.php
index 09b9c2f2e..2a52e33d6 100644
--- a/lib/index_shard.php
+++ b/lib/index_shard.php
@@ -341,89 +341,107 @@ class IndexShard extends PersistentStructure implements CrawlConstants
      * reference the value of $next_offset will point to the next record in
      * the list (if it exists) after the function is called.
      *
+     * @param int $start_offset of the current posting list for query term
+     *      used in calculating BM25F.
      * @param int &$next_offset where to start in word docs
      * @param int $last_offset offset at which to stop by
      * @param int $len number of documents desired
      * @return array desired list of doc's and their info
      */
-    function getPostingsSlice(&$next_offset, $last_offset, $len)
+    function getPostingsSlice($start_offset, &$next_offset, $last_offset, $len)
     {
         if(!$this->read_only_from_disk && !$this->word_docs_packed) {
             $this->packWordDocs();
         }
         $num_docs_so_far = 0;
-        $num_doc_or_links =  ($next_offset > 0) ?
-            ($last_offset - $next_offset) >> 2
-            : 1;
         $results = array();
         $end = min($this->word_docs_len, $last_offset);
         do {
             if($next_offset > $end) {break;}
             $item = array();
-            $posting = $this->getWordDocsSubstring($next_offset, 4);
-            list($doc_index, $occurrences) = $this->unpackPosting($posting);
+            $doc_id =
+                $this->makeItem(
+                    &$item, $start_offset, $next_offset, $last_offset);
+            $results[$doc_id] = $item;
+            $num_docs_so_far ++;
+
             $old_next_offset = $next_offset;
             $next_offset += self::POSTING_LEN;
-            $doc_depth = log(10*(($doc_index +1) +
-                $this->generation_offset)*NUM_FETCHERS, 10);
-            $item[self::DOC_RANK] = number_format(11 -
-                $doc_depth, PRECISION);
-            $doc_loc = $doc_index << 4;
-            $doc_info_string = $this->getDocInfoSubstring($doc_loc, 12);
-            $doc_id = substr($doc_info_string, 0, 8);
-            $item[self::SUMMARY_OFFSET] = $this->unpackInt(
-                substr($doc_info_string, 8, 4));
-            $is_doc = false;
-            $skip_stats = false;
-
-            if($item[self::SUMMARY_OFFSET] == self::DUPLICATE_FLAG ||
-                $item[self::SUMMARY_OFFSET] == self::NEEDS_OFFSET_FLAG) {
-                $skip_stats = true;
-                $item[self::DUPLICATE] = true;
-            } else if(($item[self::SUMMARY_OFFSET]
-                & self::COMPOSITE_ID_FLAG) !== 0) {
-                //handles link item case
-                $item[self::SUMMARY_OFFSET] ^= self::COMPOSITE_ID_FLAG;
-                $doc_loc += 12;
-                $doc_info_string = $this->getDocInfoSubstring($doc_loc, 16);
-                $doc_id .= ":".
-                    substr($doc_info_string, 0, 8).":".
-                    substr($doc_info_string, 8, 8);
-                $average_doc_len = ($this->num_link_docs != 0) ?
-                    $this->len_all_link_docs/$this->num_link_docs : 0;
-                $num_docs = $this->num_link_docs;
-            } else {
-                $is_doc = true;
-                $average_doc_len = $this->len_all_docs/$this->num_docs;
-                $num_docs = $this->num_docs;
-            }

-            if(!$skip_stats) {
-                $tmp = unpack("N",$this->getDocInfoSubstring($doc_loc + 12, 4));
-                $doc_len = $tmp[1];
-                $doc_ratio = ($average_doc_len > 0) ?
-                    $doc_len/$average_doc_len : 0;
-                $pre_relevance = number_format(
-                        3 * $occurrences/
-                        ($occurrences + .5 + 1.5* $doc_ratio),
-                        PRECISION);
-                $num_term_occurrences = $num_doc_or_links *
-                    $num_docs/($this->num_docs + $this->num_link_docs);
-                $IDF = ($num_docs - $num_term_occurrences + 0.5) /
-                    ($num_term_occurrences + 0.5);
-                $item[self::RELEVANCE] = $IDF * $pre_relevance;
-
-                $item[self::SCORE] = $item[self::DOC_RANK] +
-                    .1/ ($item[self::RELEVANCE] + .1);
-            }
-            $results[$doc_id] = $item;
-            $num_docs_so_far ++;
-
         } while ($next_offset<= $last_offset && $num_docs_so_far < $len
             && $next_offset > $old_next_offset);
         return $results;
     }

+    /**
+     *
+     */
+    function makeItem(&$item, $start_offset, $current_offset, $last_offset,
+        $occurs = 0)
+    {
+        $num_doc_or_links =  ($last_offset - $start_offset) >> 2;
+
+        $posting = $this->getWordDocsSubstring($current_offset, 4);
+        list($doc_index, $occurrences) = $this->unpackPosting($posting);
+        if($occurrences < $occurs) {
+            $occurrences = $occurs;
+        }
+        $doc_depth = log(10*(($doc_index +1) +
+            $this->generation_offset)*NUM_FETCHERS, 10);
+        $item[self::DOC_RANK] = number_format(11 -
+            $doc_depth, PRECISION);
+        $doc_loc = $doc_index << 4;
+        $doc_info_string = $this->getDocInfoSubstring($doc_loc, 12);
+        $doc_id = substr($doc_info_string, 0, 8);
+        $item[self::SUMMARY_OFFSET] = $this->unpackInt(
+            substr($doc_info_string, 8, 4));
+        $is_doc = false;
+        $skip_stats = false;
+
+        if($item[self::SUMMARY_OFFSET] == self::DUPLICATE_FLAG ||
+            $item[self::SUMMARY_OFFSET] == self::NEEDS_OFFSET_FLAG) {
+            $skip_stats = true;
+            $item[self::DUPLICATE] = true;
+        } else if(($item[self::SUMMARY_OFFSET]
+            & self::COMPOSITE_ID_FLAG) !== 0) {
+            //handles link item case
+            $item[self::SUMMARY_OFFSET] ^= self::COMPOSITE_ID_FLAG;
+            $doc_loc += 12;
+            $doc_info_string = $this->getDocInfoSubstring($doc_loc, 16);
+            $doc_id .= ":".
+                substr($doc_info_string, 0, 8).":".
+                substr($doc_info_string, 8, 8);
+            $average_doc_len = ($this->num_link_docs != 0) ?
+                $this->len_all_link_docs/$this->num_link_docs : 0;
+            $num_docs = $this->num_link_docs;
+        } else {
+            $is_doc = true;
+            $average_doc_len = $this->len_all_docs/$this->num_docs;
+            $num_docs = $this->num_docs;
+        }
+
+        if(!$skip_stats) {
+            $doc_len = $this->unpackInt(
+                $this->getDocInfoSubstring($doc_loc + 12, 4));
+            $doc_ratio = ($average_doc_len > 0) ?
+                $doc_len/$average_doc_len : 0;
+            $pre_relevance = number_format(
+                    3 * $occurrences/
+                    ($occurrences + .5 + 1.5* $doc_ratio),
+                    PRECISION);
+            $num_term_occurrences = $num_doc_or_links *
+                $num_docs/($this->num_docs + $this->num_link_docs);
+            $IDF = ($num_docs - $num_term_occurrences + 0.5) /
+                ($num_term_occurrences + 0.5);
+            $item[self::RELEVANCE] = .05 * $IDF * $pre_relevance;
+
+            $item[self::SCORE] = $item[self::DOC_RANK] +
+                + $item[self::RELEVANCE];
+        }
+        return $doc_id;
+
+    }
+
     /**
      *
      */
@@ -495,7 +513,8 @@ class IndexShard extends PersistentStructure implements CrawlConstants
         if(isset($this->words[$word_id])) {
             list($first_offset, $last_offset,
                 $num_docs_or_links) = $this->getWordInfo($word_id, true);
-            $results = $this->getPostingsSlice($first_offset, $last_offset, $len);
+            $results = $this->getPostingsSlice($first_offset,
+                $first_offset, $last_offset, $len);
         }
         return $results;
     }
diff --git a/models/model.php b/models/model.php
index b95c49925..a0a72e7c8 100755
--- a/models/model.php
+++ b/models/model.php
@@ -107,7 +107,6 @@ class Model implements CrawlConstants
     {
         if(isset($results['PAGES'])) {
             $pages = $results['PAGES'];
-
             $num_pages = count($pages);
         } else {
             $output['TOTAL_ROWS'] = 0;
@@ -116,7 +115,9 @@ class Model implements CrawlConstants
         }
         for($i = 0; $i < $num_pages; $i++) {
             $page = $pages[$i];
-
+            if(!isset($page[self::TITLE])) {
+                $page[self::TITLE] = "";
+            }
             $page[self::TITLE] = strip_tags($page[self::TITLE]);

             if(strlen($page[self::TITLE]) == 0 ) {
@@ -134,7 +135,7 @@ class Model implements CrawlConstants
                     substr(strip_tags($page[self::DESCRIPTION]), 0, $end_title).
                     $ellipsis;
                 //still no text revert to url
-                if(strlen($page[self::TITLE]) == 0) {
+                if(strlen($page[self::TITLE]) == 0 && isset($page[self::URL])) {
                     $page[self::TITLE] = $page[self::URL];
                 }
             }
diff --git a/models/phrase_model.php b/models/phrase_model.php
index 5acd07ded..d404604c6 100755
--- a/models/phrase_model.php
+++ b/models/phrase_model.php
@@ -71,7 +71,11 @@ class PhraseModel extends Model
      */
     var $index_name;

-
+    /**
+     * Number of pages to cache in one go in memcache
+     * Size chosen based on 1MB max object size for memcache
+     */
+     const NUM_CACHE_PAGES = 80;
     /**
      * {@inheritdoc}
      */
@@ -416,8 +420,10 @@ class PhraseModel extends Model

         $pages = array();
         $generation = 0;
-        $to_retrieve = ceil(($limit+$num)/100) * 100;
-        $start_slice = floor(($limit)/100) * 100;
+        $to_retrieve = ceil(($limit+$num)/self::NUM_CACHE_PAGES) *
+            self::NUM_CACHE_PAGES;
+        $start_slice = floor(($limit)/self::NUM_CACHE_PAGES) *
+            self::NUM_CACHE_PAGES;
         if(USE_MEMCACHE) {
             $tmp = "";
             foreach($word_structs as $word_struct) {
@@ -448,7 +454,7 @@ class PhraseModel extends Model
              $pages = array_merge($pages, $gen_pages);
              $generation++;
         }
-        uasort($pages, "scoreOrderCallback");
+        usort($pages, "scoreOrderCallback");

         if($num_retrieved < $to_retrieve) {
             $results['TOTAL_ROWS'] = $num_retrieved;
@@ -463,10 +469,9 @@ class PhraseModel extends Model
         $results['PAGES'] = & $pages;
         $results['PAGES'] = array_slice($results['PAGES'], $start_slice);
         if(USE_MEMCACHE) {
-
             $MEMCACHE->set($summary_hash, $results);
         }
-        $results['PAGES'] = array_slice($results['PAGES'], $limit -$start_slice,
+        $results['PAGES'] = array_slice($results['PAGES'], $limit-$start_slice,
             $num);

         return $results;
diff --git a/views/search_view.php b/views/search_view.php
index 022da1865..3e15e5bea 100755
--- a/views/search_view.php
+++ b/views/search_view.php
@@ -116,7 +116,8 @@ class SearchView extends View implements CrawlConstants
             foreach($data['PAGES'] as $page) {?>
                 <div class='result'>
                 <h2>
-                <a href="<?php if($page[self::TYPE] != "link") {
+                <a href="<?php if(isset($page[self::TYPE])
+                    && $page[self::TYPE] != "link") {
                         e($page[self::URL]);
                     } else {
                         e(strip_tags($page[self::TITLE]));
@@ -126,23 +127,23 @@ class SearchView extends View implements CrawlConstants
                         e($page[self::TITLE]); ?>"  /> <?php
                  } else {
                     echo $page[self::TITLE];
-                    $this->filetypeHelper->render($page[self::TYPE]);
+                    if(isset($page[self::TYPE])) {
+                        $this->filetypeHelper->render($page[self::TYPE]);
+                    }
                 }
                 ?></a></h2>
                 <p><?php
                 echo $page[self::DESCRIPTION]; ?></p>
-                <p class="echolink" ><?php
-                    e(substr($page[self::URL],0, 200)." ");
+                <p class="echolink" ><?php if(isset($page[self::URL])){
+                    e(substr($page[self::URL],0, 200)." ");}
                     e(tl('search_view_rank',
                         number_format($page[self::DOC_RANK], 2)));
                     $page["WEIGHT"] = (isset($page["WEIGHT"])) ?
                         $page["WEIGHT"] : 1;
                     e(tl('search_view_relevancy',
-                        number_format((1.25*floatval($page[self::SCORE])
-                        - floatval($page[self::DOC_RANK]))
-                        / $page["WEIGHT"] , 2) ));
-                    e(tl('search_view_score', 1.25* $page[self::SCORE]));
-                if($page[self::TYPE] != "link") {
+                        number_format($page[self::RELEVANCE], 2) ));
+                    e(tl('search_view_score', $page[self::SCORE]));
+                if(isset($page[self::TYPE]) && $page[self::TYPE] != "link") {
                 ?>
                     <a href="?YIOOP_TOKEN=<?php e($data['YIOOP_TOKEN']);
                         ?>&amp;c=search&amp;a=cache&amp;q=<?php

ViewGit