Tweak documentation modify how lexicon read in, a=chris
Tweak documentation modify how lexicon read in, a=chris
diff --git a/src/examples/StockBot.php b/src/examples/StockBot.php
index f84c23228..03f015b38 100644
--- a/src/examples/StockBot.php
+++ b/src/examples/StockBot.php
@@ -175,7 +175,8 @@ class StockBot
/**
* Get stock price information for a ticker symbol
*
- * @param string $args[0] should be the name to get stock price for
+ * @param string $args the value of $args[0] should be the name
+ * to get stock price for
* @return string stock price information
*/
function getStockPrice($args)
@@ -197,7 +198,8 @@ class StockBot
/**
* Get ticker symbol for a company name
*
- * @param array $args[0] should be the company name to get ticker symbol for
+ * @param array $args the value of $args[0] should be the company name to
+ * get ticker symbol for
* @return string ticker symbol
*/
function getSymbol($args)
diff --git a/src/examples/WeatherBot.php b/src/examples/WeatherBot.php
index 5f7f94719..53f388367 100644
--- a/src/examples/WeatherBot.php
+++ b/src/examples/WeatherBot.php
@@ -163,7 +163,8 @@ class WeatherBot
/**
* Get weather information about a location
*
- * @param array $args[0] should have the location to get weather update for
+ * @param array $args the value of $args[0] should have
+ * the location to get weather update for
* @return string weather information
*/
function getWeather($args)
@@ -183,7 +184,8 @@ class WeatherBot
* Return which location is warmer, the one stored in $args[0] or the
* one stored in $args[1]
*
- * @param array $args[0] should have the location to get weather update for
+ * @param array $args the value of $args[0] should have the
+ * location to get weather update for
* @return string weather information
*/
function getWarmer($args)
diff --git a/src/locale/en_US/resources/Tokenizer.php b/src/locale/en_US/resources/Tokenizer.php
index 473ce13eb..d735fe1fe 100755
--- a/src/locale/en_US/resources/Tokenizer.php
+++ b/src/locale/en_US/resources/Tokenizer.php
@@ -378,22 +378,15 @@ class Tokenizer
public static function tagTokenizePartOfSpeech($text)
{
static $dictionary = [];
+ static $dictionary = [];
+ $lexicon_file = C\LOCALE_DIR . "/en-US/resources/lexicon.txt.gz";
if (empty($dictionary)) {
- $serial_lex_file = C\LOCALE_DIR .
- "/en_US/resources/serial_lexicon.txt";
- $lex_file = C\LOCALE_DIR . "/en_US/resources/lexicon.txt.gz";
- if (file_exists($serial_lex_file) &&
- filemtime($serial_lex_file) > filemtime($lex_file)) {
- $dictionary = unserialize(file_get_contents($serial_lex_file));
- } else {
- $lines = gzfile($lex_file);
+ if (file_exists($lexicon_file)) {
+ $lines = gzfile($lexicon_file);
foreach ($lines as $line) {
- $tags = explode(' ', $line);
- $dictionary[strtolower(array_shift($tags))] = $tags;
+ $tags = preg_split('/(\s+|\,)/u', trim($line));
+ $dictionary[array_shift($tags)] = array_filter($tags);
}
- $dictionary_string = serialize($dictionary);
- file_put_contents($serial_lex_file, $dictionary_string);
- chmod($serial_lex_file, 0777);
}
}
preg_match_all("/[\w\d]+/", $text, $matches);
diff --git a/src/locale/hi/resources/Tokenizer.php b/src/locale/hi/resources/Tokenizer.php
index 47db3ecf2..c4a53107b 100755
--- a/src/locale/hi/resources/Tokenizer.php
+++ b/src/locale/hi/resources/Tokenizer.php
@@ -141,21 +141,14 @@ class Tokenizer
public static function tagTokenizePartOfSpeech($text)
{
static $dictionary = [];
+ $lexicon_file = C\LOCALE_DIR . "/hi/resources/lexicon.txt.gz";
if (empty($dictionary)) {
- $serial_lex_file = C\LOCALE_DIR .
- "/hi/resources/serial_lexicon.txt";
- if (file_exists($serial_lex_file)) {
- $dictionary = unserialize(file_get_contents($serial_lex_file));
- } else {
- $lines = gzfile(C\LOCALE_DIR . "/hi/resources/lexicon.txt.gz");
+ if (file_exists($lexicon_file)) {
+ $lines = gzfile($lexicon_file);
foreach ($lines as $line) {
$tags = preg_split('/(\s+|\,)/u', trim($line));
- $dictionary[base64_encode(array_shift($tags))] =
- array_filter($tags);
+ $dictionary[array_shift($tags)] = array_filter($tags);
}
- $dictionary_string = serialize($dictionary);
- file_put_contents($serial_lex_file, $dictionary_string);
- chmod($serial_lex_file, 0777);
}
}
$tokens = preg_split("/\s+/u", $text);
@@ -168,8 +161,8 @@ class Tokenizer
$token = trim($token);
$current = ["token" => $token, "tag" => "UNKNOWN"];
$term = $current["token"];
- if (!empty($dictionary[base64_encode($token)])) {
- $tag_list = $dictionary[base64_encode($token)];
+ if (!empty($dictionary[$token])) {
+ $tag_list = $dictionary[$token];
$current['tag'] = $tag_list[0];
}
if (is_numeric($token)) {
diff --git a/src/models/BotModel.php b/src/models/BotModel.php
index 0ef0cd62a..21c5ac255 100644
--- a/src/models/BotModel.php
+++ b/src/models/BotModel.php
@@ -93,7 +93,7 @@ class BotModel extends Model
/**
* Get pattern the its id
*
- * @param int $expression_id to use to look up an expression
+ * @param int $pattern_id to use to look up an expression
* @return array expression corresponding to the expressionid.
*/
public function getPatternById($pattern_id)
@@ -134,7 +134,17 @@ class BotModel extends Model
$this->db->execute($sql, $params);
}
/**
- * {@inheritDoc}
+ * Controls which tables and the names of tables
+ * underlie the given model and should be used in a getRows call
+ * This defaults to the single table whose name is whatever is before
+ * Model in the name of the model. For example, by default on FooModel
+ * this method would return "FOO". If a different behavior, this can be
+ * overriden in subclasses of Model
+ *
+ * @param mixed $args any additional arguments which should be used to
+ * determine these tables
+ * @return string a comma separated list of tables suitable for a SQL
+ * query
*/
public function fromCallback($args = null)
{
diff --git a/tests/HiTokenizerTest.php b/tests/HiTokenizerTest.php
index a46e5380a..493169e05 100644
--- a/tests/HiTokenizerTest.php
+++ b/tests/HiTokenizerTest.php
@@ -97,12 +97,12 @@ class HiTokenizerTest extends UnitTest
}
}
/**
- *
+ * Tests that phrase tagger can correctly assign parts of speech to
+ * the Hindi translation of Mahatma Gandhi's birth was on October 2
*/
public function partsOfSpeechTestCase()
{
$tokenizer = $this->test_objects['FILE1'];
echo $tokenizer::tagPartsOfSpeechPhrase("महामा गाँधी का जम 2 अक्टूबर को हुआ");
-
}
}