Tweak documentation modify how lexicon read in, a=chris

Chris Pollett [2018-05-10 19:May:th]
Tweak documentation modify how lexicon read in, a=chris
Filename
src/examples/StockBot.php
src/examples/WeatherBot.php
src/locale/en_US/resources/Tokenizer.php
src/locale/hi/resources/Tokenizer.php
src/models/BotModel.php
tests/HiTokenizerTest.php
diff --git a/src/examples/StockBot.php b/src/examples/StockBot.php
index f84c23228..03f015b38 100644
--- a/src/examples/StockBot.php
+++ b/src/examples/StockBot.php
@@ -175,7 +175,8 @@ class StockBot
     /**
      * Get stock price information for a ticker symbol
      *
-     * @param string $args[0] should be the name to get stock price for
+     * @param string $args the value of $args[0] should be the name
+     *      to get stock price for
      * @return string stock price information
      */
     function getStockPrice($args)
@@ -197,7 +198,8 @@ class StockBot
     /**
      * Get ticker symbol for a company name
      *
-     * @param array $args[0] should be the company name to get ticker symbol for
+     * @param array $args the value of $args[0] should be the company name to
+     *      get ticker symbol for
      * @return string ticker symbol
      */
     function getSymbol($args)
diff --git a/src/examples/WeatherBot.php b/src/examples/WeatherBot.php
index 5f7f94719..53f388367 100644
--- a/src/examples/WeatherBot.php
+++ b/src/examples/WeatherBot.php
@@ -163,7 +163,8 @@ class WeatherBot
     /**
      * Get weather information about a location
      *
-     * @param array $args[0] should have the location to get weather update for
+     * @param array $args the value of $args[0] should have
+     *      the location to get weather update for
      * @return string weather information
      */
     function getWeather($args)
@@ -183,7 +184,8 @@ class WeatherBot
      * Return which location is warmer, the one stored in $args[0] or the
      * one stored in $args[1]
      *
-     * @param array $args[0] should have the location to get weather update for
+     * @param array $args the value of $args[0] should have the
+     *      location to get weather update for
      * @return string weather information
      */
     function getWarmer($args)
diff --git a/src/locale/en_US/resources/Tokenizer.php b/src/locale/en_US/resources/Tokenizer.php
index 473ce13eb..d735fe1fe 100755
--- a/src/locale/en_US/resources/Tokenizer.php
+++ b/src/locale/en_US/resources/Tokenizer.php
@@ -378,22 +378,15 @@ class Tokenizer
    public static function tagTokenizePartOfSpeech($text)
     {
         static $dictionary = [];
+        static $dictionary = [];
+        $lexicon_file = C\LOCALE_DIR . "/en-US/resources/lexicon.txt.gz";
         if (empty($dictionary)) {
-            $serial_lex_file = C\LOCALE_DIR .
-                "/en_US/resources/serial_lexicon.txt";
-            $lex_file = C\LOCALE_DIR . "/en_US/resources/lexicon.txt.gz";
-            if (file_exists($serial_lex_file) &&
-                filemtime($serial_lex_file) > filemtime($lex_file)) {
-                $dictionary = unserialize(file_get_contents($serial_lex_file));
-            } else {
-                $lines = gzfile($lex_file);
+            if (file_exists($lexicon_file)) {
+                $lines = gzfile($lexicon_file);
                 foreach ($lines as $line) {
-                    $tags = explode(' ', $line);
-                    $dictionary[strtolower(array_shift($tags))] = $tags;
+                    $tags = preg_split('/(\s+|\,)/u', trim($line));
+                    $dictionary[array_shift($tags)] = array_filter($tags);
                 }
-                $dictionary_string = serialize($dictionary);
-                file_put_contents($serial_lex_file, $dictionary_string);
-                chmod($serial_lex_file, 0777);
             }
         }
         preg_match_all("/[\w\d]+/", $text, $matches);
diff --git a/src/locale/hi/resources/Tokenizer.php b/src/locale/hi/resources/Tokenizer.php
index 47db3ecf2..c4a53107b 100755
--- a/src/locale/hi/resources/Tokenizer.php
+++ b/src/locale/hi/resources/Tokenizer.php
@@ -141,21 +141,14 @@ class Tokenizer
     public static function tagTokenizePartOfSpeech($text)
     {
         static $dictionary = [];
+        $lexicon_file = C\LOCALE_DIR . "/hi/resources/lexicon.txt.gz";
         if (empty($dictionary)) {
-            $serial_lex_file = C\LOCALE_DIR .
-                "/hi/resources/serial_lexicon.txt";
-            if (file_exists($serial_lex_file)) {
-                $dictionary = unserialize(file_get_contents($serial_lex_file));
-            } else {
-                $lines = gzfile(C\LOCALE_DIR . "/hi/resources/lexicon.txt.gz");
+            if (file_exists($lexicon_file)) {
+                $lines = gzfile($lexicon_file);
                 foreach ($lines as $line) {
                     $tags = preg_split('/(\s+|\,)/u', trim($line));
-                    $dictionary[base64_encode(array_shift($tags))] =
-                        array_filter($tags);
+                    $dictionary[array_shift($tags)] = array_filter($tags);
                 }
-                $dictionary_string = serialize($dictionary);
-                file_put_contents($serial_lex_file, $dictionary_string);
-                chmod($serial_lex_file, 0777);
             }
         }
         $tokens = preg_split("/\s+/u", $text);
@@ -168,8 +161,8 @@ class Tokenizer
             $token = trim($token);
             $current = ["token" => $token, "tag" => "UNKNOWN"];
             $term = $current["token"];
-            if (!empty($dictionary[base64_encode($token)])) {
-                $tag_list = $dictionary[base64_encode($token)];
+            if (!empty($dictionary[$token])) {
+                $tag_list = $dictionary[$token];
                 $current['tag'] = $tag_list[0];
             }
             if (is_numeric($token)) {
diff --git a/src/models/BotModel.php b/src/models/BotModel.php
index 0ef0cd62a..21c5ac255 100644
--- a/src/models/BotModel.php
+++ b/src/models/BotModel.php
@@ -93,7 +93,7 @@ class BotModel extends Model
     /**
      * Get pattern the its id
      *
-     * @param int $expression_id to use to look up an expression
+     * @param int $pattern_id to use to look up an expression
      * @return array expression corresponding to the expressionid.
      */
     public function getPatternById($pattern_id)
@@ -134,7 +134,17 @@ class BotModel extends Model
         $this->db->execute($sql, $params);
     }
     /**
-     * {@inheritDoc}
+     * Controls which tables and the names of tables
+     * underlie the given model and should be used in a getRows call
+     * This defaults to the single table whose name is whatever is before
+     * Model in the name of the model. For example, by default on FooModel
+     * this method would return "FOO". If a different behavior, this can be
+     * overriden in subclasses of Model
+     *
+     * @param mixed $args any additional arguments which should be used to
+     *     determine these tables
+     * @return string a comma separated list of tables suitable for a SQL
+     *     query
      */
     public function fromCallback($args = null)
     {
diff --git a/tests/HiTokenizerTest.php b/tests/HiTokenizerTest.php
index a46e5380a..493169e05 100644
--- a/tests/HiTokenizerTest.php
+++ b/tests/HiTokenizerTest.php
@@ -97,12 +97,12 @@ class HiTokenizerTest extends UnitTest
         }
     }
     /**
-     *
+     * Tests that phrase tagger can correctly assign parts of speech to
+     * the Hindi translation of Mahatma Gandhi's birth was on October 2
      */
     public function partsOfSpeechTestCase()
     {
         $tokenizer = $this->test_objects['FILE1'];
         echo $tokenizer::tagPartsOfSpeechPhrase("महामा गाँधी का जम 2 अक्टूबर को हुआ");
-
     }
 }
ViewGit