take 4, a=chris

Chris Pollett [2019-02-16 22:Feb:th]
take 4, a=chris
Filename
src/models/Model.php
diff --git a/src/models/Model.php b/src/models/Model.php
index 4e84d8343..041c62dda 100755
--- a/src/models/Model.php
+++ b/src/models/Model.php
@@ -294,7 +294,7 @@ class Model implements CrawlConstants
         $left3 = $left - 3;
         $right = self::SNIPPET_LENGTH_RIGHT;
         //note we don't want ' to count as the cause of a word boundary
-        $start_regex2 = "/(?=[^\'].{1,$left})\b.{0,$left}(?:(?:";
+        $start_regex2 = "/\b(\w{3}.{0,$left3})?(?:(?:";
         $end_regex = "/ui";
         $end_regex2 = ").{0,$right}\b)+/ui";
         if (mb_strlen($text) < $description_length) {
@@ -317,18 +317,16 @@ class Model implements CrawlConstants
             }
         }
         $snippet_string = "";
-        $snippet_hash = [];
-        $text_sources = explode(".. ", $text);
+        $text_sources = array_filter(array_unique(explode(".. ", $text)));
         foreach ($text_sources as $text_source) {
             $len = mb_strlen($text_source);
             $offset = 0;
             if ($len < self::MIN_SNIPPET_LENGTH) {
-                if (!isset($snippet_hash[$text_source])) {
-                    if (preg_match($start_regex . $word_regex.
-                        $end_regex, $text_source, $match)) {
+                if (preg_match($start_regex . $word_regex.
+                    $end_regex, $text_source, $match)) {
+                    if (stristr($snippet_string, $text_source) === false) {
                         $snippet_string .= $ellipsis. $text_source;
                         $ellipsis = " ... ";
-                        $snippet_hash[$text_source] = true;
                         if (mb_strlen($snippet_string) >= $description_length) {
                             break;
                         }
@@ -340,6 +338,7 @@ class Model implements CrawlConstants
             preg_match_all($start_regex2 . $word_regex . $end_regex2,
                 $text_source, $matches);
             if (isset($matches[0])) {
+                $seen_match = [];
                 foreach ($matches[0] as $match) {
                     if ($match >= $description_length) {
                         $match = mb_substr($match, 0, $description_length);
@@ -348,11 +347,13 @@ class Model implements CrawlConstants
                             $match = mb_substr($match, 0, $rpos);
                         }
                     }
-                    $snippet_string .= $ellipsis. trim($match, ".");
-                    $ellipsis = " ... ";
-                    $snippet_hash[$text_source] = true;
-                    if (mb_strlen($snippet_string) >= $description_length) {
-                        break;
+                    $match = trim($match, ".");
+                    if (stristr($snippet_string, $match) === false) {
+                        $snippet_string .= $ellipsis. $match;
+                        $ellipsis = " ... ";
+                        if (mb_strlen($snippet_string) >= $description_length) {
+                            break;
+                        }
                     }
                 }
             }
ViewGit