First pass at modifying Yioop to again use Logarithmic Merge Trees for its dictionary structures
First pass at modifying Yioop to again use Logarithmic Merge Trees for its dictionary structures
Folder structure of IndexDocumentBundles also modified and now supports overflow folder (which
could be on a different hard drive). ArcTool has been updated to support migration to new
indexes
<?php
/**
* SeekQuarry/Yioop --
* Open Source Pure PHP Search Engine, Crawler, and Indexer
*
* Copyright (C) 2009 - 2022 Chris Pollett chris@pollett.org
*
* LICENSE:
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* END LICENSE
*
* This file contains unit tests of the BTree class used to keep track of
* etags during a crawl
*
* @author Chris Pollett chris@pollett.org
* @license https://www.gnu.org/licenses/ GPL3
* @link https://www.seekquarry.com/
* @copyright 2009 - 2022
* @filesource
*/
namespace seekquarry\yioop\tests;
use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\models\Model;
use seekquarry\yioop\library\BPlusTree;
use seekquarry\yioop\library\PackedTableTools;
use seekquarry\yioop\library\UnitTest;
/**
* Yioop B+-tree Unit Class
*
* Used to test insert, lookup, and deletion of key-value pairs on the B+-Tree.
* @author Chris Pollett
*/
class BPlusTreeTest extends UnitTest
{
/**
* Test directory to hold btree used for these unit tests
*/
const TEST_DIR = '/test_files/b_plus_tree_test';
/**
* Minimum degree is set to 2 and the number of key-value pairs is set to 25
*/
public function setUp()
{
$this->table_dirs = [];
}
/**
* Delete the B+-Tree files created during the test
*/
public function tearDown()
{
$model = new Model();
foreach ($this->table_dirs as $table_dir) {
$model->db->unlinkRecursive($table_dir);
}
$this->table_dirs = [];
}
/**
* Used to create a single hash table in the folder
* TEST_DIR . $max_items_per_file which allows at most
* $max_items_per_file to be stored in a bucket
*
* @param int $max_items_per_file number of items allowed to be stored in a
* bucket
*/
public function createTree($max_degree, $format =
["PRIMARY KEY" => ["KEY", -1], "VALUE" => "BLOB"])
{
$table_dir = __DIR__ . self::TEST_DIR . $max_degree;
$this->table_dirs[] = $table_dir;
return new L\BPlusTree($table_dir, $format, $max_degree,
C\NS_COMPRESSORS . "NonCompressor",
C\NS_COMPRESSORS . "NonCompressor");
}
/**
* Test putting items in bplustrees of odd sized nodes between 3 and 13 and
* then seeing if the items can be retrieved
*/
public function putGetTextTestCase()
{
$format = ["PRIMARY KEY" => ["KEY", -1], "VALUE" => "TEXT"];
for ($i = 3; $i <= 13; $i += 2) {
$bptree = $this->createTree($i, $format);
for ($j = 0; $j < ($i * 40); $j++) {
for($k = 0; $k < 5; $k++) {
$bptree->put(["KEY" => str_pad("$j",4,"0", STR_PAD_LEFT),
"VALUE" => "row{$j}_{$k}"],
PackedTableTools::APPEND_MODE);
}
}
$bptree->flushLastPutNode();
for ($j = 0; $j < ($i * 40); $j++) {
$rows = $bptree->get(str_pad("$j",4,"0", STR_PAD_LEFT));
for($k = 0; $k < 5; $k++) {
$this->assertEqual("row{$j}_{$k}", $rows[$k]["VALUE"],
"{$j}th insert into tree of size $i was retrieved okay".
" {$k}th case");
}
}
}
}
/**
* Test putting items in bplustrees of odd sized nodes between 3 and 13 and
* then seeing if the items can be retrieved
*/
public function putGetBlobTestCase()
{
for ($i = 3; $i <= 13; $i += 2) {
$bptree = $this->createTree($i);
for ($j = 0; $j < ($i * 40); $j++) {
for($k = 0; $k < 5; $k++) {
$bptree->put(["KEY" => str_pad("$j",4,"0", STR_PAD_LEFT),
"VALUE" => "row{$j}_{$k}"],
PackedTableTools::APPEND_MODE);
}
}
$bptree->flushLastPutNode();
for ($j = 0; $j < ($i * 40); $j++) {
$rows = $bptree->get(str_pad("$j",4,"0",
STR_PAD_LEFT));
for($k = 0; $k < 5; $k++) {
$this->assertEqual("row{$j}_{$k}", $rows[$k]["VALUE"],
"{$j}th insert into tree of size $i was retrieved okay".
" {$k}th case");
}
}
}
}
}