Improvements to the speed with which computeSafeScore is calculated, add more timing info to Indexer, a=chris

Chris Pollett [2022-08-09 21:Aug:th]
Improvements to the speed with which computeSafeScore is calculated, add more timing info to Indexer, a=chris
Filename
src/controllers/SearchController.php
src/controllers/components/CrawlComponent.php
src/executables/ArcTool.php
src/library/FeedDocumentBundle.php
src/library/IndexArchiveBundle.php
src/library/IndexDocumentBundle.php
src/library/PhraseParser.php
tests/PhraseParserTest.php
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index f7dedf975..1080a47a7 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -2206,9 +2206,8 @@ EOD;
             }
             $word_lists = PhraseParser::extractPhrasesInLists(
                 $phrase_string, $crawl_item[self::LANG]);
-            $len = strlen($phrase_string);
-            if (PhraseParser::computeSafeSearchScore($word_lists['WORD_LIST'],
-                $len, $crawl_item[self::URL]) < 0.012) {
+            if (PhraseParser::computeSafeSearchScore($phrase_string,
+                $crawl_item[self::URL]) < PhraseParser::SAFE_PHRASE_THRESHOLD) {
                 $meta_ids[] = "safe:true";
                 $safe = true;
             } else {
diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php
index 96a190e71..50e72248a 100644
--- a/src/controllers/components/CrawlComponent.php
+++ b/src/controllers/components/CrawlComponent.php
@@ -2039,9 +2039,8 @@ class CrawlComponent extends Component implements CrawlConstants
                 }
                 $word_lists = PhraseParser::extractPhrasesInLists(
                     $phrase_string, $lang);
-                $len = strlen($phrase_string);
-                if (PhraseParser::computeSafeSearchScore(
-                    $word_lists['WORD_LIST'], $len) < 0.012) {
+                if (PhraseParser::computeSafeSearchScore($phrase_string) <
+                    PhraseParser::SAFE_PHRASE_THRESHOLD) {
                     $meta_ids[] = "safe:true";
                     $safe = true;
                 } else {
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index 8beeafb39..22b2c58a2 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -1185,6 +1185,8 @@ EOD;
         $next_partition = $start_generation;
         $continue = false;
         $dictionary_log = C\LOG_DIR . "/0-DictionaryUpdater.log";
+        $fp = fopen($dictionary_log, "w");
+        fclose($fp);
         while ($next_partition < $save_partition) {
             if ($old_next_partition != $next_partition) {
                 $old_next_partition = $next_partition;
diff --git a/src/library/FeedDocumentBundle.php b/src/library/FeedDocumentBundle.php
index ddad7a6c4..b6bbd4c3a 100644
--- a/src/library/FeedDocumentBundle.php
+++ b/src/library/FeedDocumentBundle.php
@@ -266,8 +266,8 @@ class FeedDocumentBundle extends IndexDocumentBundle
                 $source_name, $item[self::HASH], $media_category);
             $len = strlen($phrase_string);
             $word_list = $word_and_qa_lists["WORD_LIST"];
-            if (PhraseParser::computeSafeSearchScore($word_list, $len,
-                $item[self::URL]) < 0.012) {
+            if (PhraseParser::computeSafeSearchScore($phrase_string,
+                $item[self::URL]) < PhraseParser::SAFE_PHRASE_THRESHOLD) {
                 $meta_ids[] = "safe:true";
                 $meta_ids[] = "safe:all";
             } else {
diff --git a/src/library/IndexArchiveBundle.php b/src/library/IndexArchiveBundle.php
index 0e370bf58..36c8d5f10 100644
--- a/src/library/IndexArchiveBundle.php
+++ b/src/library/IndexArchiveBundle.php
@@ -556,10 +556,9 @@ class IndexArchiveBundle implements CrawlConstants
                     $word_and_qa_lists = PhraseParser::extractPhrasesInLists(
                         $phrase_string, $lang);
                     $word_lists = $word_and_qa_lists['WORD_LIST'];
-                    $len = strlen($phrase_string);
                     if (isset($this->programming_language_extension[$lang]) ||
-                        PhraseParser::computeSafeSearchScore($word_lists, $len,
-                            $site_url) < 0.012) {
+                        PhraseParser::computeSafeSearchScore($phrase_string,
+                            $site_url) < PhraseParser::SAFE_PHRASE_THRESHOLD) {
                         $meta_ids[] = "safe:all";
                         $meta_ids[] = "safe:true";
                         $is_safe = true;
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 97790453e..6ec4168bf 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -607,8 +607,8 @@ class IndexDocumentBundle implements CrawlConstants
         $this->positions = "";
         crawlLog("Indexer Preparing Index Map...");
         $index_map = $this->prepareIndexMap($partition);
-        crawlLog("Number of documents in mapped partition:" .
-            count($index_map));
+        crawlLog("Done Prepare Index Map. Number of documents in mapped ".
+            "partition:" . count($index_map));
         $cnt = 0;
         $non_aux_doc_cnt = 0;
         $link_cnt = 0;
@@ -616,11 +616,21 @@ class IndexDocumentBundle implements CrawlConstants
         $doc_field = self::DOC_ID;
         $score_field = self::SCORE;
         $aux_docs_field = self::AUX_DOCS;
+        $get_summaries_time = 0;
+        $aux_get_summaries_time = 0;
+        $safe_score_time = 0;
+        $safe_meta_score_time = 0;
+        $invert_pages_time = 0;
+        $invert_metas_time = 0;
+        $invert_links_time = 0;
+        $this->extract_phrase_time = 0;
         foreach ($index_map as $hash_url => $url_info) {
             $site = [];
             $non_aux_doc_cnt++;
             if (!empty($url_info[$doc_field])) {
+                $start_get_summaries = microtime(true);
                 $site = $this->getSummary($url_info[$doc_field], $partition);
+                $get_summaries_time += changeInMicrotime($start_get_summaries);
                 if (empty($site) || !is_array($site)) {
                     continue;
                 }
@@ -635,23 +645,28 @@ class IndexDocumentBundle implements CrawlConstants
             $metas_only = ($url_info[$aux_docs_field] == 'metas_only');
             $aux_description = "";
             $tmp_description = $site[self::DESCRIPTION] ?? "";
-            $len = strlen($tmp_description);
             if (isset($site[self::TYPE]) && $site[self::TYPE] == "link") {
                 $site_url = $site[self::TITLE];
             } else {
                 $site_url = str_replace('|', "%7C", $site[self::URL] ?? "");
             }
             if ($metas_only) {
-                if (PhraseParser::computeSafeSearchScore($tmp_description, $len,
-                        $site_url) < 0.012) {
+                $start_safe_meta_time = microtime(true);
+                if (PhraseParser::computeSafeSearchScore($tmp_description,
+                        $site_url) < PhraseParser::SAFE_PHRASE_THRESHOLD) {
                     $site[self::IS_SAFE] = true;
                     $url_info[self::IS_SAFE] = true;
                 } else {
                     $site[self::IS_SAFE] = false;
                     $url_info[self::IS_SAFE] = false;
                 }
+                $safe_meta_score_time +=
+                    changeInMicrotime($start_safe_meta_time);
                 $site[self::JUST_METAS] = true;
+                $start_invert_metas = microtime(true);
                 $site_url = $this->invertOneSite($site, $url_info, $link_cnt);
+                $invert_metas_time +=
+                    changeInMicrotime($start_invert_metas);
                 continue;
             }
             /*
@@ -662,7 +677,10 @@ class IndexDocumentBundle implements CrawlConstants
             $aux_sites = [];
             foreach ($pre_aux_docs as $pre_aux_doc) {
                 $aux_doc = decode255($pre_aux_doc);
+                $start_get_summaries = microtime(true);
                 $aux_site = $this->getSummary($aux_doc, $partition);
+                $aux_get_summaries_time +=
+                    changeInMicrotime($start_get_summaries);
                 if (empty($aux_site) || !is_array($aux_site)) {
                     $aux_site = []; // make sure empty
                     continue;
@@ -687,20 +705,27 @@ class IndexDocumentBundle implements CrawlConstants
             }
             $site[self::DESCRIPTION] ??= "";
             $site[self::DESCRIPTION] .= $aux_description;
+            $start_safe_time = microtime(true);
             if (PhraseParser::computeSafeSearchScore($site[self::DESCRIPTION],
-                $len, $site_url) < 0.012) {
+                $site_url) < PhraseParser::SAFE_PHRASE_THRESHOLD) {
                 $site[self::IS_SAFE] = true;
                 $url_info[self::IS_SAFE] = true;
             } else {
                 $site[self::IS_SAFE] = false;
                 $url_info[self::IS_SAFE] = false;
             }
+            $safe_score_time +=
+                changeInMicrotime($start_safe_time);
             $cnt++;
+            $start_invert_page = microtime(true);
             $site_url = $this->invertOneSite($site, $url_info, $link_cnt);
+            $invert_pages_time += changeInMicrotime($start_invert_page);
             foreach ($aux_sites as $aux_site) {
                 $cnt++;
+                $start_invert_links = microtime(true);
                 $site_url = $this->invertOneSite($aux_site, $url_info,
                     $link_cnt);
+                $invert_links_time += changeInMicrotime($start_invert_links);
             }
             $memory_usage = memory_get_usage();
             $link_to = (isset($site[self::TYPE]) &&
@@ -736,13 +761,29 @@ class IndexDocumentBundle implements CrawlConstants
             ];
             return $statistics;
         }
+        $start_save_times = microtime(true);
         $doc_map_tools->save($doc_map_filename, $this->doc_map);
         ksort($this->postings);
         $postings_tools->save($postings_filename, $this->postings);
         $last_entries_tools->save($last_entries_filename, $this->last_entries);
         file_put_contents($positions_filename, $this->positions);
+        $final_save_time = changeInMicrotime($start_save_times);
+        $time_string = makeTimestamp();
         crawlLog("  Indexer build inverted index time ".
-            changeInMicrotime($start_time));
+            changeInMicrotime($start_time) .
+            "\n$time_string  ..Component times:" .
+            "\n$time_string  ....Get page summaries time: $get_summaries_time" .
+            "\n$time_string  ....Get link summaries time: " .
+                $aux_get_summaries_time .
+            "\n$time_string  ....Compute Safe Page time: $safe_score_time" .
+            "\n$time_string  ....Compute Safe Meta time: $safe_meta_score_time".
+            "\n$time_string  ....Invert pages time: $invert_pages_time" .
+            "\n$time_string  ....Invert meta pages time: $invert_metas_time" .
+            "\n$time_string  ....Invert links time: $invert_links_time" .
+            "\n$time_string  ....Final file saves time: $final_save_time" .
+            "\n$time_string  ----" .
+            "\n$time_string  ....Of Invert times, time in " .
+                "extractPhrasesInLists:". $this->extract_phrase_time);
         return true;
     }
     /**
@@ -840,6 +881,11 @@ class IndexDocumentBundle implements CrawlConstants
             }
             $word_and_qa_lists = PhraseParser::extractPhrasesInLists(
                 $phrase_string, $lang);
+            if (!isset($this->extract_phrase_time)) {
+                $this->extract_phrase_time = 0;
+            }
+            $this->extract_phrase_time +=
+                $word_and_qa_lists['TIMES']['TOTAL_TIME'] ?? 0;
             $word_lists = $word_and_qa_lists['WORD_LIST'];
             if (!empty($word_lists["r6t"][2])) {
                 if ($path_keywords_end_pos < 255) {
diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index 39fadde13..b9bb938e5 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -83,6 +83,10 @@ class PhraseParser
      * Indicates the control word for programming languages
      */
     const REGEX_INITIAL_POSITION = 1;
+    /**
+     * Threshold to use for a string to be conisdered "safe" (not X-rated)
+     */
+    const SAFE_PHRASE_THRESHOLD = 0.03;
     /**
      * Converts a summary of a web page into a string of space separated words
      *
@@ -202,7 +206,8 @@ class PhraseParser
         $start_time = microtime(true);
         $phrase_list = ['TIMES' => [ 'CANONICALIZE' => 0,
             'TERM_POSITIONS_SENTENCE_TAGGING' => 0,
-            'QUESTION_ANSWER_EXTRACT' => 0]];
+            'QUESTION_ANSWER_EXTRACT' => 0,
+            'TOTAL_TIME' => 0]];
         if (!isset(self::$programming_language_map[$lang])) {
             self::canonicalizePunctuatedTerms($string, $lang);
             self::hyphenateEntities($string, $lang);
@@ -236,6 +241,7 @@ class PhraseParser
             }
         }
         $phrase_list['WORD_LIST'] = $phrase_and_sentences["TERM_POSITIONS"];
+        $phrase_list['TIMES']['TOTAL_TIME'] = changeInMicrotime($start_time);
         return $phrase_list;
     }
     /**
@@ -1447,42 +1453,35 @@ class PhraseParser
      * Scores documents according to the lack or nonlack of sexually explicit
      * terms. Tries to work for several languages. Very crude classifier.
      *
-     * @param array|string $word_lists if array, word => pos_list tuples
-     *  if string, then string will be converted to such an array first
-     * @param int $len length of text being examined in characters
+     * @param string $phrase to check for X-ratedness
      * @param string $url optional url that the word_list came used to check
      *  against known porn sites
-     * @return int $score of how explicit document is
+     * @return int $score of how explicit the phrase is between 0 and 1
      */
-    public static function computeSafeSearchScore($word_lists, $len, $url = "")
+    public static function computeSafeSearchScore($phrase, $url = "")
     {
-        static $unsafe_phrase = "
-XXX sex slut nymphomaniac MILF lolita lesbian sadomasochism
-bondage fisting erotic vagina Tribadism penis facial hermaphrodite
-transsexual tranny bestiality snuff boob fondle tit
-blowjob lap cock dick hardcore pr0n fuck pussy penetration ass
-cunt bisexual prostitution screw ass masturbation clitoris clit suck whore
-bitch cuckold porn femdom exhibitionism
-bellaco cachar chingar shimar chinquechar chichar clavar coger culear hundir
-joder mámalo singar cojon carajo caray bicho concha chucha chocha
-chuchamadre coño panocha almeja culo fundillo fundío puta puto teta
-connorito cul pute putain sexe pénis vulve foutre baiser sein nicher nichons
-puta sapatão foder ferro punheta vadia buceta bucetinha bunda caralho
-mentula cunnus verpa sōpiō pipinna
-cōleī cunnilingus futuō copulate cēveō crīsō
-scortor meretrīx futatrix minchia coglione cornuto culo inocchio frocio puttana
-vaffanculo fok hoer kut lul やりまん 打っ掛け
- 二形 ふたなりゴックン ゴックン
-ショタコン 全裸 受け 裏本 пизда́ хуй еба́ть
-блядь елда́ гондо́н хер манда́ му́ди мудя
-пидора́с залу́па жо́па за́дница буфер
-雞巴 鷄巴 雞雞 鷄鷄 阴茎 陰莖 胯下物
-屌 吊 小鳥 龟头 龜頭 屄 鸡白 雞白 傻屄 老二
-那话儿 那話兒 屄 鸡白 雞白 阴道 陰道
-阴户 陰戶 大姨妈 淫蟲 老嫖 妓女 臭婊子 卖豆腐
-賣豆腐 咪咪 大豆腐 爆乳 肏操
-炒饭 炒飯 cặc lồn kaltak orospu siktir sıçmak amcık";
-        static $unsafe_terms = [];
+        static $pre_unsafe_regex = "XXX|sex|slut|nymphomaniac|MILF|lolita|" .
+            "lesbian|sadomasochism|bondage|fisting|erotic|vagina|Tribadism|" .
+            "penis|facial|hermaphrodite|transsexual|tranny|bestiality|snuff|" .
+            "boob|fondle|tit|blowjob|lap|cock|dick|hardcore|pr0n|fuck|pussy|" .
+            "penetration|ass|cunt|bisexual|prostitution|screw|ass|melon|" .
+            "masturbation|clitoris|clit|suck|whore|bitch|cuckold|porn|" .
+            "femdom|exhibitionism|bellaco|cachar|chingar|shimar|chinquechar|" .
+            "chichar|clavar|coger|culear|hundir|joder|mámalo|singar|cojon|" .
+            "carajo|caray|bicho|concha|chucha|chocha|chuchamadre|coño|" .
+            "panocha|almeja|culo|fundillo|fundío|puta|puto|teta|connorito|" .
+            "cul|pute|putain|sexe|pénis|vulve|foutre|baiser|sein|nicher|" .
+            "nichons|puta|sapatão|foder|ferro|punheta|vadia|buceta|bucetinha|" .
+            "bunda|caralho|mentula|cunnus|verpa|sōpiō|pipinna|cōleī|" .
+            "cunnilingus|futuō|copulate|cēveō|crīsō|scortor|meretrīx|" .
+            "futatrix|minchia|coglione|cornuto|culo|inocchio|frocio|puttana|" .
+            "vaffanculo|fok|hoer|kut|lul|やりまん|打っ掛け|二形|ふたなりゴックン|" .
+            "ゴックン|ショタコン|全裸|受け|裏本|пизда́|хуй|еба́ть|блядь|елда́|гондо́н|" .
+            "хер|манда́|му́ди|мудя|пидора́с|залу́па|жо́па|за́дница|буфер|雞巴|鷄巴|" .
+            "雞雞|鷄鷄|阴茎|陰莖|胯下物|屌|吊|小鳥|龟头|龜頭|屄|鸡白|雞白|傻屄|老二|" .
+            "那话儿|那話兒|屄|鸡白|雞白|阴道|陰道|阴户|陰戶|大姨妈|淫蟲|老嫖|妓女|" .
+            "臭婊子|卖豆腐|賣豆腐|咪咪|大豆腐|爆乳|肏操|炒饭|炒飯|cặc|lồn|kaltak|" .
+            "orospu|siktir|sıçmak|amcık";
         /* took keywords from top level domains from some of theporndude list
          */
         static $unsafe_url_regex = "/porn|xvideos|livejasmin|".
@@ -1517,33 +1516,29 @@ vaffanculo fok hoer kut lul やりまん 打っ掛け
         if (!empty($url) && preg_match($unsafe_url_regex, $url)) {
             return 1;
         }
-        if (is_string($word_lists)) {
-            $lang = guessLocaleFromString($word_lists);
-            $word_and_qa_lists = PhraseParser::extractPhrasesInLists(
-                $word_lists, $lang);
-            $word_lists = $word_and_qa_lists['WORD_LIST'];
-        }
-        if (count($word_lists) == 0) {
+        if (empty($phrase)) {
             return 0;
+        } else if (!is_string($phrase)) { // wrong type is X-rated!
+            return 1;
         }
-        if ($unsafe_terms == []) {
-            $triplet_list = PhraseParser::extractPhrasesInLists($unsafe_phrase,
-                 "en-US");
-            $unsafe_lists = $triplet_list['WORD_LIST'];
-            $unsafe_terms = array_keys($unsafe_lists);
-        }
-        $num_unsafe_terms = 0;
-        $unsafe_count = 0;
-        $words = array_keys($word_lists);
-        $unsafe_found = array_intersect($words, $unsafe_terms);
-        foreach ($unsafe_found as $term) {
-            $count = count($word_lists[$term]);
-            if ($count > 0 ) {
-                $unsafe_count += $count;
-                $num_unsafe_terms++;
-            }
+        $term_boundaries = preg_match_all("/\b/", $phrase);
+        $len = max(mb_strlen($phrase), 1);
+        /*
+           8 characters is greater than the average word length for most
+           languages. So if the number of term boundaries is < the length
+           of the string/8, likely we have a language which doesn't use
+           word boundaries like Chinese. In this case, we will assume
+           around 3 character per word (maybe higher for Chinese, low for
+           Japanese or Korean?)
+         */
+        if ($term_boundaries < ceil($len/8)) { //maybe text
+            $term_boundaries = ceil($len/3);
+            $unsafe_regex = "/$pre_unsafe_regex/ui";
+        } else {
+            $unsafe_regex = "/\b$pre_unsafe_regex\b/ui";
         }
-        $score = $num_unsafe_terms * $unsafe_count/($len + 1);
+        $match_count = preg_match_all($unsafe_regex, $phrase);
+        $score = $match_count/$term_boundaries;
         return $score;
     }
     /**
diff --git a/tests/PhraseParserTest.php b/tests/PhraseParserTest.php
index 98eddfca2..2dbf7ae4f 100644
--- a/tests/PhraseParserTest.php
+++ b/tests/PhraseParserTest.php
@@ -160,25 +160,18 @@ small table the the the the the the the the the the the their there there
 this those three to to to trap uncle uncle wagon walls was was was was was
 were where which which whirlwinds who who wife with
 EOD;
-        $extracted_data = PhraseParser::extractPhrasesInLists($phrase_string,
-            "en-US");
-        $word_lists = $extracted_data['WORD_LIST'];
-        $len = strlen($phrase_string);
-        $score = PhraseParser::computeSafeSearchScore($word_lists, $len);
-        $this->assertTrue(($score < 0.012), "Easy Safe Test 1");
-
+        $score = PhraseParser::computeSafeSearchScore($phrase_string);
+        $this->assertTrue(($score < PhraseParser::SAFE_PHRASE_THRESHOLD),
+            "Easy Safe Test 1");
         $phrase_string = <<< EOD
 a afraid all and anon baby big boobs but cock crave dicking does
 for from grown has how in is isnt knot lolita matts monster pussies ready
 she she shew slut teens their thom them thought they're tight to to to total
 up use whether
 EOD;
-        $extracted_data = PhraseParser::extractPhrasesInLists($phrase_string,
-            "en-US");
-        $word_lists = $extracted_data['WORD_LIST'];
-        $len = strlen($phrase_string);
-        $score = PhraseParser::computeSafeSearchScore($word_lists, $len);
-        $this->assertTrue(($score > 0.012), "Easy Unsafe Test 1");
+        $score = PhraseParser::computeSafeSearchScore($phrase_string);
+        $this->assertTrue(($score > PhraseParser::SAFE_PHRASE_THRESHOLD),
+            "Easy Unsafe Test 1");
         $phrase_string = <<< EOD
 a a a a a adventure after all alotta amazing and and and and and
 and and and and and around as ball ball big body boobies bounce boy
@@ -192,12 +185,9 @@ those those those tit titties titty to to to togo today tramp truly
 us was we we we what what when what wild with with with workout wrap yes
 you
 EOD;
-        $word_lists = PhraseParser::extractPhrasesInLists($phrase_string,
-            "en-US");
-        $word_lists = $extracted_data['WORD_LIST'];
-        $len = strlen($phrase_string);
-        $score = PhraseParser::computeSafeSearchScore($word_lists, $len);
-        $this->assertTrue(($score > 0.012), "Harder Unsafe Test 1");
+        $score = PhraseParser::computeSafeSearchScore($phrase_string);
+        $this->assertTrue(($score > PhraseParser::SAFE_PHRASE_THRESHOLD),
+            "Harder Unsafe Test 1");
         $phrase_string = <<< EOD
 amino hog known a a a a an and and
 and and are are as as asymmetry be biology both but can cases cells
@@ -210,12 +200,9 @@ over parents process reproduce reproduce result sex sex sexual
 sexual small specialist specialized specific such that that the the the
 the their to to traits traits transport two types variety while young
 EOD;
-        $extracted_data = PhraseParser::extractPhrasesInLists($phrase_string,
-            "en-US");
-        $word_lists = $extracted_data['WORD_LIST'];
-        $len = strlen($phrase_string);
-        $score = PhraseParser::computeSafeSearchScore($word_lists, $len);
-        $this->assertTrue(($score < 0.012), "Harder Safe Test 1");
+        $score = PhraseParser::computeSafeSearchScore($phrase_string);
+        $this->assertTrue(($score < PhraseParser::SAFE_PHRASE_THRESHOLD),
+            "Harder Safe Test 1");
         $phrase_string = <<< EOD
 a a active adverb an an and are as as as attribute be
 between by caught characterized daft describe describe desire desire deft
@@ -224,12 +211,9 @@ lesbian may moist verb object of of or or or others secondary refer relay
 romantic same sex sexual trim the the the them to to to to to used
 used who who wide women ward
 EOD;
-        $extracted_data = PhraseParser::extractPhrasesInLists($phrase_string,
-            "en-US");
-        $word_lists = $extracted_data['WORD_LIST'];
-        $len = strlen($phrase_string);
-        $score = PhraseParser::computeSafeSearchScore($word_lists, $len);
-        $this->assertTrue(($score < 0.012), "Harder Safe Test 2");
+        $score = PhraseParser::computeSafeSearchScore($phrase_string);
+        $this->assertTrue(($score < PhraseParser::SAFE_PHRASE_THRESHOLD),
+            "Harder Safe Test 2");
     }
     /**
      * Tests whether chargrams are computed correctly from various strings
ViewGit