Adjust copyrights years
<?php
/**
* SeekQuarry/Yioop --
* Open Source Pure PHP Search Engine, Crawler, and Indexer
*
* Copyright (C) 2009 - 2021 Chris Pollett chris@pollett.org
*
* LICENSE:
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* END LICENSE
*
* This file contains unit tests of the IndexDocumentBundleTable class
*
* @author Chris Pollett chris@pollett.org
* @license https://www.gnu.org/licenses/ GPL3
* @link https://www.seekquarry.com/
* @copyright 2009 - 2021
* @filesource
*/
namespace seekquarry\yioop\tests;
use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\models\Model;
use seekquarry\yioop\library\CrawlConstants as CC;
use seekquarry\yioop\library\IndexDocumentBundle;
use seekquarry\yioop\library\IndexManager;
use seekquarry\yioop\library\UnitTest;
use seekquarry\yioop\library\index_bundle_iterators\WordIterator;
/**
*
*/
class WordIteratorTest extends UnitTest
{
/**
*
*/
const TEST_BUNDLE = "IndexData200000000";
/**
* Prefix of folders for index manager test
*/
const TEST_DIR = __DIR__ . '/test_files/word_iterator_test';
/**
* Sets up an array to keep track of what linear hash tables we've made
* so that we can delete them when done a test.
*/
public function setUp()
{
$test_bundle = self::TEST_BUNDLE;
if (!file_exists(self::TEST_DIR)) {
mkdir(self::TEST_DIR);
}
$this->index_archive = new IndexDocumentBundle(self::TEST_DIR .
"/$test_bundle", false, "TestBundle", 3, 5);
}
/**
*
*/
public function tearDown()
{
$model = new Model();
$model->db->unlinkRecursive(self::TEST_DIR);
}
/**
*
*/
public function createTestCase()
{
IndexManager::clearCache();
$index_archive = $this->index_archive;
$dictionary = $index_archive->dictionary;
$keys = [];
for ($i = 0; $i < 10; $i++) {
$keys[$i] = $this->docidFromIntKeys($i, $i, $i);
$docs[] = [
CC::DOC_ID => $keys[$i],
CC::SUMMARY =>
[
CC::DESCRIPTION => "to$i be or$i not$i to$i be...",
CC::HASH => str_pad("$i", 8, "0", STR_PAD_LEFT),
CC::TITLE => "Some$i Shakespeare$i Play$i",
CC::URL => "https://www.somewhere$i.com/"
],
CC::PAGE => "Page $i",
];
}
$num_docs = count($docs);
$index_archive->addPages($docs, $num_docs);
$index_archive->updateDictionary();
$index_archive->forceSave();
$word_iterator = new WordIterator(L\canonicalTerm("be"),
self::TEST_DIR . "/". self::TEST_BUNDLE, true, null, 1);
}
/**
*
*/
protected function docidFromInt($i)
{
return str_pad("$i", 24, "0", STR_PAD_LEFT);
}
/**
*
*/
protected function docidFromIntKeys($i_hash_url, $j_hash_page,
$k_hash_host, $is_doc = true)
{
$doc_or_link = ($is_doc) ? "d" : "l";
return str_pad("$i_hash_url", 8, "0", STR_PAD_LEFT) . $doc_or_link .
str_pad("$j_hash_page", 7, "0", STR_PAD_LEFT) .
str_pad("$k_hash_host", 8, "0", STR_PAD_LEFT);
}
}