<?php /** * SeekQuarry/Yioop -- * Open Source Pure PHP Search Engine, Crawler, and Indexer * * Copyright (C) 2009 - 2024 Chris Pollett chris@pollett.org * * LICENSE: * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * END LICENSE * * This file contains unit tests of the LSMTree class used to keep track of * * @author Chris Pollett chris@pollett.org * @license https://www.gnu.org/licenses/ GPL3 * @link https://www.seekquarry.com/ * @copyright 2009 - 2024 * @filesource */ namespace seekquarry\yioop\tests; use seekquarry\yioop\configs as C; use seekquarry\yioop\library as L; use seekquarry\yioop\models\Model; use seekquarry\yioop\library\LSMTree; use seekquarry\yioop\library\PackedTableTools; use seekquarry\yioop\library\UnitTest; /** * Yioop LSMTree Unit Class * * Used to test insert, lookup, and deletion of key-value pairs in a * Log-structured Merge Tree. * @author Chris Pollett */ class LSMTreeTest extends UnitTest { /** * Test directory to hold LSMTree used for these unit tests */ const TEST_DIR = '/test_files/lsm_tree_test'; /** * Folder names to use for test LSMTree * @param string */ public $table_dirs; /** * Minimum degree is set to 2 and the number of key-value pairs is set to 25 */ public function setUp() { $this->table_dirs = []; } /** * Delete the LSMTree files created during the test */ public function tearDown() { $model = new Model(); foreach ($this->table_dirs as $table_dir) { $model->db->unlinkRecursive($table_dir, true); } $this->table_dirs = []; } /** * Used to create a single hash table in the folder * TEST_DIR . $max_items_per_file which allows at most * $max_items_per_file to be stored in a bucket * * @param int $max_items_per_file number of items allowed to be stored in a * bucket */ public function createTree($format, $max_file_size, $block_factor, $suffix = "") { $key_size = $format["PRIMARY KEY"][1]; $table_dir = __DIR__ . self::TEST_DIR . "_{$key_size}_{$max_file_size}_{$block_factor}_$suffix"; $this->table_dirs[] = $table_dir; return new LSMTree($table_dir, $format, $max_file_size, $block_factor); } /** * Test putting items in lsm-tree and * then seeing if the items can be retrieved */ public function simplePutGetTierTestCase() { for ($max_file_size = 20; $max_file_size < 60; $max_file_size += 30) { for ($block_factor = 1; $block_factor < 3; $block_factor++) { for ($key_len = 16; $key_len > 5; $key_len -= 8) { for ($cols = 0; $cols < 3; $cols++) { $format = ["PRIMARY KEY" => ["KEY", $key_len], "VALUE" => "TEXT"]; for ($j = 0; $j < $cols; $j++) { $format["COL$j"] = "INT"; } $lsm_tree = $this->createTree($format, $max_file_size, $block_factor, "{$key_len}_$cols"); for ($i = 0; $i < 50; $i++) { $key = str_pad("key$i", $key_len, "0", STR_PAD_LEFT); $entry = ["KEY" => $key, "VALUE" => "value$i"]; for ($j = 0; $j < $cols; $j++) { $entry["COL$j"] = $i; } $lsm_tree->put($entry); } $lsm_tree->flush(); for ($i = 0; $i < 50; $i++) { $key = str_pad("key$i", $key_len, "0", STR_PAD_LEFT); $values = $lsm_tree->get($key); $this->assertEqual("value$i", $values[0]["VALUE"], "{$i}th insert key length $key_len, pad ". "columns $j into single LSTM Tier ". "{$values[0]["VALUE"]} should be retrieved ". "as value$i"); } } } } } } /** * Checks that the correct number of block folders are created when * inserting several items into am LSMTree with a small block factor * and max file size */ public function blockFactorTestCase() { $max_file_size = 20; $block_factor = 2; $key_len = 16; $cols = 2; $format = ["PRIMARY KEY" => ["KEY", $key_len], "VALUE" => "TEXT"]; $lsm_tree = $this->createTree($format, $max_file_size, $block_factor, "{$key_len}"); for ($i = 0; $i < 15; $i++) { $key = str_pad("key$i", $key_len, "0", STR_PAD_LEFT); $entry = ["KEY" => $key, "VALUE" => "value$i"]; $lsm_tree->put($entry); } $this->assertTrue(file_exists($lsm_tree->folder . "/Tier0/A/F00000000000key12") && !file_exists($lsm_tree->folder . "/Tier0/A/F00000000000key14"), "Correct number of block folders created"); } /** * Checks that the correct number of data files are created when * inserting several items into am LSMTree with a small max file size */ public function maxFileSizeTestCase() { $max_file_size = 50; $block_factor = 10000; $key_len = 16; $cols = 2; $format = ["PRIMARY KEY" => ["KEY", $key_len], "VALUE" => "TEXT"]; $lsm_tree = $this->createTree($format, $max_file_size, $block_factor, "{$key_len}"); for ($i = 0; $i < 15; $i++) { $key = str_pad("key$i", $key_len, "0", STR_PAD_LEFT); $entry = ["KEY" => $key, "VALUE" => "value$i"]; $lsm_tree->put($entry); } $block_folder = $lsm_tree->folder . "/Tier0/A/F000000000000key0"; $this->assertTrue(file_exists("$block_folder/D00000000000key13") && !file_exists("$block_folder/D00000000000key14"), "Correct number of data files created"); } /** * Tests that values for identical keys are correctly merged when merge * two slots within a tier. */ public function mergeRecordsTierTestCase() { $max_file_size = 50; $block_factor = 2; $key_len = 16; $cols = 2; $format = ["PRIMARY KEY" => ["KEY", $key_len], "VALUE" => "TEXT"]; for ($j = 0; $j < $cols; $j++) { $format["COL$j"] = "INT"; } $lsm_tree = $this->createTree($format, $max_file_size, $block_factor, "{$key_len}_$cols"); foreach ([["A", 0, 10], ["B", 5, 15]] as $slot_info) { $lsm_tree->selectPutSlot($slot_info[0]); for ($i = $slot_info[1]; $i < $slot_info[2]; $i++) { $key = str_pad("key$i", $key_len, "0", STR_PAD_LEFT); $entry = ["KEY" => $key, "VALUE" => "value{$slot_info[0]}$i"]; for ($j = 0; $j < $cols; $j++) { $entry["COL$j"] = $i; } $lsm_tree->put($entry); } $lsm_tree->flush(); } $lsm_tree->mergeTiers(); $key = str_pad("key10", $key_len, "0", STR_PAD_LEFT); $values = $lsm_tree->get($key); for ($i = 0; $i < 15; $i++) { $key = str_pad("key$i", $key_len, "0", STR_PAD_LEFT); $values = $lsm_tree->get($key); if ($i < 10) { $this->assertEqual("valueA$i", $values[0]["VALUE"], "Able to retrieve Slot A value $i"); } if ($i > 4 && $i < 10) { $this->assertEqual("valueB$i", $values[1]["VALUE"], "Able to retrieve Slot B value $i"); } if ($i >= 10) { $this->assertEqual("valueB$i", $values[0]["VALUE"], "Able to retrieve Slot B value $i"); } } } /** * Tests that tiers are correctly created and merged when many items are * inserted into a tree with a small block factor and max file size. * Test correctness by seeing if each value can be retrieved with its key. */ public function mergeTiersTestCase() { $max_file_size = 50; $block_factor = 2; $key_len = 16; $cols = 3; $format = ["PRIMARY KEY" => ["KEY", $key_len], "VALUE" => "TEXT"]; for ($j = 0; $j < $cols; $j++) { $format["COL$j"] = "INT"; } $lsm_tree = $this->createTree($format, $max_file_size, $block_factor, "{$key_len}_$cols"); $slot = "B"; for ($i = 0; $i < 250; $i++) { if ($i % 5 == 0) { $slot = ($slot == "B") ? "A" : "B"; $lsm_tree->flush(); $lsm_tree->selectPutSlot($slot); } if ($i > 0 && $i % 10 == 0) { $lsm_tree->mergeTiers(); } $k = 10 * floor($i / 10); $k += ($slot == "A") ? 2 * ($i % 10) : 2 * (($i % 10) - 5) + 1; $key = str_pad("key$k", $key_len, "0", STR_PAD_LEFT); $entry = ["KEY" => $key, "VALUE" => "value$k"]; for ($j = 0; $j < $cols; $j++) { $entry["COL$j"] = $k; } $lsm_tree->put($entry); } $lsm_tree->flush(); $lsm_tree->mergeTiers(); $this->assertEqual($lsm_tree->getMaxTier(), 5, "Final maxTiers is correct"); $tiers = [false, true, false, false, true, true]; for ($i = 0; $i <= 5; $i++) { $this->assertEqual($lsm_tree->occupiedTier($i), $tiers[$i], "{$i} th tier correctly occupied"); } for ($i = 0; $i < 250; $i++) { $key = str_pad("key$i", $key_len, "0", STR_PAD_LEFT); $values = $lsm_tree->get($key); $this->assertEqual("value$i", $values[0]["VALUE"], "Able to retrieve {$i}th value"); } } }