Last commit for tests/BPlusTreeTest.php: afd6930f42e31d81a53d42061b5fd758f56c62de

First pass at modifying Yioop to again use Logarithmic Merge Trees for its dictionary structures

Chris Pollett [2024-01-15 02:Jan:th]
First pass at modifying Yioop to again use Logarithmic Merge Trees for its dictionary structures
Folder structure of IndexDocumentBundles also modified and now supports overflow folder (which
could be on a different hard drive). ArcTool has been updated to support migration to new
indexes
<?php
/**
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 * Copyright (C) 2009 - 2022  Chris Pollett chris@pollett.org
 *
 * LICENSE:
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * END LICENSE
 *
 * This file contains unit tests of the BTree class used to keep track of
 * etags during a crawl
 *
 * @author Chris Pollett chris@pollett.org
 * @license https://www.gnu.org/licenses/ GPL3
 * @link https://www.seekquarry.com/
 * @copyright 2009 - 2022
 * @filesource
 */
namespace seekquarry\yioop\tests;

use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\models\Model;
use seekquarry\yioop\library\BPlusTree;
use seekquarry\yioop\library\PackedTableTools;
use seekquarry\yioop\library\UnitTest;

/**
 * Yioop B+-tree Unit Class
 *
 * Used to test insert, lookup, and deletion of key-value pairs on the B+-Tree.
 * @author Chris Pollett
 */
 class BPlusTreeTest extends UnitTest
{
    /**
     * Test directory to hold btree used for these unit tests
     */
    const TEST_DIR = '/test_files/b_plus_tree_test';
    /**
     * Minimum degree is set to 2 and the number of key-value pairs is set to 25
     */
    public function setUp()
    {
        $this->table_dirs = [];
    }
    /**
     * Delete the B+-Tree files created during the test
     */
    public function tearDown()
    {
        $model = new Model();
        foreach ($this->table_dirs as $table_dir) {
            $model->db->unlinkRecursive($table_dir);
        }
        $this->table_dirs = [];
    }
    /**
     * Used to create a single hash table in the folder
     * TEST_DIR . $max_items_per_file which allows at most
     * $max_items_per_file to be stored in a bucket
     *
     * @param int $max_items_per_file number of items allowed to be stored in a
     *  bucket
     */
    public function createTree($max_degree, $format =
        ["PRIMARY KEY" => ["KEY", -1], "VALUE" => "BLOB"])
    {
        $table_dir = __DIR__ . self::TEST_DIR . $max_degree;
        $this->table_dirs[] = $table_dir;
        return new L\BPlusTree($table_dir, $format, $max_degree,
            C\NS_COMPRESSORS . "NonCompressor",
            C\NS_COMPRESSORS . "NonCompressor");
    }
    /**
     * Test putting items in bplustrees of odd sized nodes between 3 and 13 and
     * then seeing if the items can be retrieved
     */
    public function putGetTextTestCase()
    {
        $format = ["PRIMARY KEY" => ["KEY", -1], "VALUE" => "TEXT"];
        for ($i = 3; $i <= 13; $i += 2) {
            $bptree = $this->createTree($i, $format);
            for ($j = 0; $j < ($i * 40); $j++) {
                for($k = 0; $k < 5; $k++) {
                    $bptree->put(["KEY" => str_pad("$j",4,"0", STR_PAD_LEFT),
                        "VALUE" => "row{$j}_{$k}"],
                        PackedTableTools::APPEND_MODE);
                }
            }
            $bptree->flushLastPutNode();
            for ($j = 0; $j < ($i * 40); $j++) {
                $rows = $bptree->get(str_pad("$j",4,"0", STR_PAD_LEFT));
                for($k = 0; $k < 5; $k++) {
                    $this->assertEqual("row{$j}_{$k}", $rows[$k]["VALUE"],
                        "{$j}th insert into tree of size $i was retrieved okay".
                        " {$k}th case");
                }
            }
        }
    }
    /**
     * Test putting items in bplustrees of odd sized nodes between 3 and 13 and
     * then seeing if the items can be retrieved
     */
    public function putGetBlobTestCase()
    {
        for ($i = 3; $i <= 13; $i += 2) {
            $bptree = $this->createTree($i);
            for ($j = 0; $j < ($i * 40); $j++) {
                for($k = 0; $k < 5; $k++) {
                    $bptree->put(["KEY" => str_pad("$j",4,"0", STR_PAD_LEFT),
                        "VALUE" => "row{$j}_{$k}"],
                        PackedTableTools::APPEND_MODE);
                }
            }
            $bptree->flushLastPutNode();
            for ($j = 0; $j < ($i * 40); $j++) {
                $rows = $bptree->get(str_pad("$j",4,"0",
                    STR_PAD_LEFT));
                for($k = 0; $k < 5; $k++) {
                    $this->assertEqual("row{$j}_{$k}", $rows[$k]["VALUE"],
                        "{$j}th insert into tree of size $i was retrieved okay".
                        " {$k}th case");
                }
            }
        }
    }
}
ViewGit