Trying to improve snippet text take 2, a=chris

Chris Pollett [2012-06-24 05:Jun:th]
Trying to improve snippet text take 2, a=chris
Filename
models/model.php
models/parallel_model.php
diff --git a/models/model.php b/models/model.php
index 711a241a3..eae682e7b 100755
--- a/models/model.php
+++ b/models/model.php
@@ -230,19 +230,29 @@ class Model implements CrawlConstants

         $ellipsis = "";
         $words = array_unique($words);
-        $words = array_filter($words);
+        $start_words = array_filter($words);
         $snippet_string = "";
         $snippet_hash = array();
         $text_sources = explode(" .. ", $text);
         foreach($text_sources as $text_source) {
             $len = mb_strlen($text_source);
             $offset = 0;
+            $words = $start_words;
             if(strlen($text_source) < MIN_SNIPPET_LENGTH) {
                 if(!isset($snippet_hash[$text_source])) {
-                    $snippet_string .= $ellipsis. $text_source;
-                    $ellipsis = " ... ";
-                    $snippet_hash[$text_source] = true;
-                    if(strlen($snippet_string) >= $description_length) break;
+                    $found = false;
+                    foreach($words as $word) {
+                        if(mb_stristr($text_source, $word) !== false) {
+                            $found = true;
+                            break;
+                        }
+                    }
+                    if($found) {
+                        $snippet_string .= $ellipsis. $text_source;
+                        $ellipsis = " ... ";
+                        $snippet_hash[$text_source] = true;
+                        if(strlen($snippet_string)>= $description_length) break;
+                    }
                 }
                 continue;
             }
@@ -287,6 +297,7 @@ class Model implements CrawlConstants
                     if(strlen($snippet_string) >= $description_length) break 3;
                 }
                 $words = array_values($word_locations);
+                if($words == array()) break;
                 $offset = $new_offset + 1;
             } while($offset < $len);
         }
diff --git a/models/parallel_model.php b/models/parallel_model.php
index 5891434e4..ed41e7940 100755
--- a/models/parallel_model.php
+++ b/models/parallel_model.php
@@ -180,6 +180,7 @@ class ParallelModel extends Model implements CrawlConstants
                 $description_hash = array();
                 $result = unserialize(webdecode($elt[self::PAGE]));
                 if(!is_array($result)) continue;
+                $ellipsis = "";
                 foreach($result as $lookup => $summary) {
                     if(isset($summaries[$lookup])) {
                         if(isset($summary[self::DESCRIPTION])) {
@@ -188,8 +189,9 @@ class ParallelModel extends Model implements CrawlConstants
                                 $summaries[$lookup][self::DESCRIPTION] = "";
                             }
                             if(!isset($description_hash[$description])){
-                                $summaries[$lookup][self::DESCRIPTION] = " .. ".
-                                     $description;
+                                $summaries[$lookup][self::DESCRIPTION] =
+                                    $ellipsis . $description;
+                                $ellipsis = " .. ";
                                 $description_hash[$description] = true;
                             }
                         }
@@ -236,6 +238,7 @@ class ParallelModel extends Model implements CrawlConstants
                     $index_archive->getPage($summary_offset, $generation);
             } else {
                 $summary = array();
+                $ellipsis = "";
                 $description_hash = array();
                 foreach($lookup_info as $lookup_item) {
                     if(count($lookup_item) == 2) {
@@ -272,7 +275,8 @@ class ParallelModel extends Model implements CrawlConstants
                         }
                         if(!isset($description_hash[$description])){
                             $summary[self::DESCRIPTION] .=
-                                " .. ".$description;
+                                $ellipsis . $description;
+                            $ellipsis = " .. ";
                             $description_hash[$description] = true;
                         }
                         $copy = true;
ViewGit