Last commit for tests/ZhTokenizerTest.php: 2addb500315b7393a90fe66431d7832b1e7386c7

Adjust copyrights years

Chris Pollett [2024-01-03 21:Jan:rd]
Adjust copyrights years
<?php
/**
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 * Copyright (C) 2009 - 2022  Chris Pollett chris@pollett.org
 *
 * LICENSE:
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * END LICENSE
 *
 * @author Chris Pollett chris@pollett.org
 * @license https://www.gnu.org/licenses/ GPL3
 * @link https://www.seekquarry.com/
 * @copyright 2009 - 2022
 * @filesource
 */
namespace seekquarry\yioop\tests;

use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\locale\zh_CN\resources\Tokenizer;
use seekquarry\yioop\library\UnitTest;

/**
 * Used to test Named Entity Tagging and Part of Speech Tagging for the
 * Chinese Language. Word segmentation is already tested in
 * @see seekquarry\yioop\tests\PhraseParserTest
 */
class ZhTokenizerTest extends UnitTest
{
    /**
     * Each test we set up a new Italian Tokenizer object
     */
    public function setUp()
    {
    }
    /**
     * Nothing done for unit test tear done
     */
    public function tearDown()
    {
    }
    /**
     * Tests whether Yioop correctly identity Chinese Named Entities
     */
    public function namedEntityTestCase()
    {
        $source = "孙向宏喜欢去洛杉矶旅游";
        $expected_tagging = "孙向宏_nr 洛杉矶_ns";
        $ne_tagger = new L\NamedEntityContextTagger('zh-CN');
        $output_tagging = $ne_tagger->tag($source);
        $this->assertEqual($output_tagging, $expected_tagging,
            "Named Entities Correctly Found in Chinese Source String");
    }
    /**
     * Tests whether Yioop can correctly tag a Chinese sentence
     */
    public function partOfSpeechTestCase()
    {
        $source = "印度 总统 是 印度 国家元首 和 " .
            "武装部队 总司令 有 该国 第一 公民 之 称";
        $expected_tagging = "印度_NR 总统_NN 是_VC 印度_NR 国家元首_NN ".
            "和_CC 武装部队_NN 总司令_NN 有_VE 该国_NN 第一_VV 公民_NN 之_DEG 称_NN";
        $pos_tagger = new L\PartOfSpeechContextTagger('zh-CN');
        $output_tagging = $pos_tagger->tag($source);
        $this->assertEqual($output_tagging, $expected_tagging,
            "Parts of Speech Correctly Tagged in Chinese Source String");
    }
    /**
     * Traditional to Simplified mapping test
     */
    public function traditionalSimplifiedTestCase()
    {
        $traditional = "那是一個黑暗而暴風雨的夜晚。";
        $simplified = "那是一个黑暗而暴风雨的夜晚。";
        $this->assertEqual(Tokenizer::normalize($traditional), $simplified,
            "Traditional characters correctly mapped to simplied ones");
    }
}
ViewGit