Trying to improve snippet text take 2, a=chris
Trying to improve snippet text take 2, a=chris
diff --git a/models/model.php b/models/model.php
index 711a241a3..eae682e7b 100755
--- a/models/model.php
+++ b/models/model.php
@@ -230,19 +230,29 @@ class Model implements CrawlConstants
$ellipsis = "";
$words = array_unique($words);
- $words = array_filter($words);
+ $start_words = array_filter($words);
$snippet_string = "";
$snippet_hash = array();
$text_sources = explode(" .. ", $text);
foreach($text_sources as $text_source) {
$len = mb_strlen($text_source);
$offset = 0;
+ $words = $start_words;
if(strlen($text_source) < MIN_SNIPPET_LENGTH) {
if(!isset($snippet_hash[$text_source])) {
- $snippet_string .= $ellipsis. $text_source;
- $ellipsis = " ... ";
- $snippet_hash[$text_source] = true;
- if(strlen($snippet_string) >= $description_length) break;
+ $found = false;
+ foreach($words as $word) {
+ if(mb_stristr($text_source, $word) !== false) {
+ $found = true;
+ break;
+ }
+ }
+ if($found) {
+ $snippet_string .= $ellipsis. $text_source;
+ $ellipsis = " ... ";
+ $snippet_hash[$text_source] = true;
+ if(strlen($snippet_string)>= $description_length) break;
+ }
}
continue;
}
@@ -287,6 +297,7 @@ class Model implements CrawlConstants
if(strlen($snippet_string) >= $description_length) break 3;
}
$words = array_values($word_locations);
+ if($words == array()) break;
$offset = $new_offset + 1;
} while($offset < $len);
}
diff --git a/models/parallel_model.php b/models/parallel_model.php
index 5891434e4..ed41e7940 100755
--- a/models/parallel_model.php
+++ b/models/parallel_model.php
@@ -180,6 +180,7 @@ class ParallelModel extends Model implements CrawlConstants
$description_hash = array();
$result = unserialize(webdecode($elt[self::PAGE]));
if(!is_array($result)) continue;
+ $ellipsis = "";
foreach($result as $lookup => $summary) {
if(isset($summaries[$lookup])) {
if(isset($summary[self::DESCRIPTION])) {
@@ -188,8 +189,9 @@ class ParallelModel extends Model implements CrawlConstants
$summaries[$lookup][self::DESCRIPTION] = "";
}
if(!isset($description_hash[$description])){
- $summaries[$lookup][self::DESCRIPTION] = " .. ".
- $description;
+ $summaries[$lookup][self::DESCRIPTION] =
+ $ellipsis . $description;
+ $ellipsis = " .. ";
$description_hash[$description] = true;
}
}
@@ -236,6 +238,7 @@ class ParallelModel extends Model implements CrawlConstants
$index_archive->getPage($summary_offset, $generation);
} else {
$summary = array();
+ $ellipsis = "";
$description_hash = array();
foreach($lookup_info as $lookup_item) {
if(count($lookup_item) == 2) {
@@ -272,7 +275,8 @@ class ParallelModel extends Model implements CrawlConstants
}
if(!isset($description_hash[$description])){
$summary[self::DESCRIPTION] .=
- " .. ".$description;
+ $ellipsis . $description;
+ $ellipsis = " .. ";
$description_hash[$description] = true;
}
$copy = true;