Last commit for tests/UtilityTest.php: 2addb500315b7393a90fe66431d7832b1e7386c7

Adjust copyrights years

Chris Pollett [2024-01-03 21:Jan:rd]
Adjust copyrights years
<?php
/**
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 * Copyright (C) 2009 - 2022  Chris Pollett chris@pollett.org
 *
 * LICENSE:
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * END LICENSE
 *
 * @author Chris Pollett chris@pollett.org
 * @license https://www.gnu.org/licenses/ GPL3
 * @link https://www.seekquarry.com/
 * @copyright 2009 - 2022
 * @filesource
 */
namespace seekquarry\yioop\tests;

use seekquarry\yioop\library as L;
use seekquarry\yioop\library\UnitTest;

/**
 * Used to test the various methods in utility, in particular, those
 * related to posting lists and time.
 *
 * @author Chris Pollett
 */
class UtilityTest extends UnitTest
{
    /**
     * No set up being done for the time being
     */
    public function setUp()
    {
    }
    /**
     * No tear down being done for the time being
     */
    public function tearDown()
    {
    }
    /**
     * Determines if the checkTimeInterval method can correctly determine
     * if a time of day is between the times of day of two timestamps
     */
    public function checkTimeIntervalTestCase()
    {
        $three_oh_five = 1592172350;
        $one_hour = 3600;
        $this->assertEqual(-1, L\checkTimeInterval("14:00", -1, $three_oh_five),
            "(a) no sleep duration (-1) does not contain 3:05pm");
        $this->assertEqual(-1, L\checkTimeInterval("16:00", -1, $three_oh_five),
            "(b) no sleep duration (-1) does not contain 3:05pm");
        $this->assertEqual(-1, L\checkTimeInterval("14:00", $one_hour,
            $three_oh_five), "2pm +1hr does not contain 3:05pm");
        $this->assertEqual(1592175600, L\checkTimeInterval("14:00",
            2 * $one_hour,  $three_oh_five),
            "2pm +2hr interval contains 3:05pm and ends at 4pm");
    }
    /**
     * Used to check Encoding decoding using unary coding
     */
    public function unaryCodeTestCase()
    {
        $start = 0;
        $current_string = "";
        for($i = 1; $i <= 20; $i++) {
            $current_string = L\appendUnary($i, $current_string, $start);
        }
        for($j = 20; $j >= 1; $j--) {
            $current_string = L\appendUnary($j, $current_string, $start);
        }
        $start = 0;
        for($i = 1; $i <= 20; $i++) {
            $decoded = L\decodeUnary($current_string, $start);
            $this->assertEqual($i, $decoded, "(a) Decode Encode $i");
        }
        for($j = 20; $j >= 1; $j--) {
            $decoded = L\decodeUnary($current_string, $start);
            $this->assertEqual($j, $decoded, "(b) Decode Encode $j");
        }
        $start = 0;
        for($i = 0; $i <= 15; $i++) {
            $decoded = L\decodeUnary("\xFF\xFF", $start);
            $this->assertEqual(1, $decoded, "$i th encoded 1 decodes to 1");
        }
    }
    /**
     * Used to check Encoding decoding using unary coding
     */
    public function encodeDecodeBitsCodeTestCase()
    {
        $to_encodes = [1, 257, 4, 9, 65535, 93];
        $bit_lens = [1, 9, 3, 4, 16, 7];
        $start = 0;
        $encoded = "";
        foreach ($to_encodes as $to_encode) {
            $encoded = L\appendBits($to_encode, $encoded, $start);
        }
        $i = 0;
        $start = 0;
        foreach ($bit_lens as $bit_len) {
            $decode = L\decodeBits($encoded, $start, $bit_len);
            $this->assertEqual($to_encodes[$i], $decode, "Encode ".
                $to_encodes[$i] ." decodes as $decode");
            $i++;
        }
    }
    /**
     * Used to check Encoding decoding gamma codes
     */
    public function encodeDecodeGammaTestCase()
    {
        $to_encodes = [1, 257, 4, 9, 65535, 93];
        $start = 0;
        $encoded = "";
        foreach ($to_encodes as $to_encode) {
            $encoded = L\appendGamma($to_encode, $encoded, $start);
        }
        $start = 0;
        $num_encoded = count($to_encodes);
        $decodes = L\decodeGammaList($encoded, $start, $num_encoded);
        for ($i = 0; $i < $num_encoded; $i++) {
            $this->assertEqual($to_encodes[$i], $decodes[$i], "Encode ".
                "{$to_encodes[$i]} decodes as {$decodes[$i]}");
        }
    }
    /**
     * Check that encoding and decoding integers using the vByte scheme works
     */
    public function encodeDecodeVByteTestCase()
    {
        for ($i = 0; $i < 1000000; $i += 500) {
            $enc = L\vByteEncode($i);
            $start = 0;
            $decode = L\vByteDecode($enc, $start);
            $this->assertEqual($i, $decode,
                "Encoding and decoding $i give $i");
        }
    }
    /**
     * Used to check Encoding decoding using unary coding
     */
    public function encodeDecodeRiceTestCase()
    {
        $position_list = [90, 101, 570, 581, 737, 950, 1100, 1119, 1127,
            1147, 1175, 1185, 1930, 1969, 2020, 2040, 2068, 2083, 2090, 2102,
            2126, 2170, 2182, 2191, 2217, 2228, 2250, 2260, 2370, 2392, 2403,
            2447, 2456, 2467, 2476, 2486, 2503, 2508, 2610, 2628, 2629, 2641,
            2674, 2693, 2710, 2753, 2761, 2770, 2847, 2885, 2899, 2920, 2934,
            3000, 3019, 3039, 3058, 3070, 3133, 3168, 3227, 3240, 3249, 3266,
            3277, 3296, 3309, 3327, 3348, 3366, 3368, 3375, 3424, 3456, 3458,
            3463, 3478, 3487, 3511, 3513, 3523, 3557, 3614, 3828, 3880, 3896,
            3910, 3999, 4039, 4056, 4165, 4226, 4248, 4269, 4308, 4324, 4338,
            4444, 4484, 4560, 4577, 4597, 4622, 4695, 4710, 4801, 4824, 4859,
            4876, 4981, 5071, 5109, 5131, 5199, 5232, 5270, 5287, 5317, 5330,
            5373, 5409, 5426, 5490, 5500, 5501, 5533, 5544, 5722, 5765, 5799,
            5821, 5854, 5938, 5967, 6004, 6036, 6195, 6262, 6319, 6337, 6345,
            6346, 6391, 6430, 6452, 6460, 6514, 6580, 6736, 6758, 6794, 6820,
            6976];
        $num_positions = count($position_list);
        $average_gap = ($position_list[$num_positions - 1] -
            $position_list[0])/$num_positions;
        $modulus =  max(ceil(log($average_gap + 1, 2)), 2);
        $start = 0;
        $encoded = L\appendRiceSequence($position_list, $modulus, "",
            $start, 0);
        $start = 0;
        $decodes = L\decodeRiceSequence($encoded, $start, $num_positions, 0);
        for ($i = 0; $i < $num_positions; $i++) {
            $this->assertEqual($position_list[$i], $decodes[$i], "Encode ".
                "{$position_list[$i]} decodes as {$decodes[$i]}");
        }
    }
    /**
     * Used to check Encoding decoding using unary coding
     */
    public function encodeDecodePositionListTestCase()
    {
        $position_list = [90, 101, 570, 581, 737, 950, 1100, 1119, 1127,
            1147, 1175, 1185, 1930, 1969, 2020, 2040, 2068, 2083, 2090, 2102,
            2126, 2170, 2182, 2191, 2217, 2228, 2250, 2260, 2370, 2392, 2403,
            2447, 2456, 2467, 2476, 2486, 2503, 2508, 2610, 2628, 2629, 2641,
            2674, 2693, 2710, 2753, 2761, 2770, 2847, 2885, 2899, 2920, 2934,
            3000, 3019, 3039, 3058, 3070, 3133, 3168, 3227, 3240, 3249, 3266,
            3277, 3296, 3309, 3327, 3348, 3366, 3368, 3375, 3424, 3456, 3458,
            3463, 3478, 3487, 3511, 3513, 3523, 3557, 3614, 3828, 3880, 3896,
            3910, 3999, 4039, 4056, 4165, 4226, 4248, 4269, 4308, 4324, 4338,
            4444, 4484, 4560, 4577, 4597, 4622, 4695, 4710, 4801, 4824, 4859,
            4876, 4981, 5071, 5109, 5131, 5199, 5232, 5270, 5287, 5317, 5330,
            5373, 5409, 5426, 5490, 5500, 5501, 5533, 5544, 5722, 5765, 5799,
            5821, 5854, 5938, 5967, 6004, 6036, 6195, 6262, 6319, 6337, 6345,
            6346, 6391, 6430, 6452, 6460, 6514, 6580, 6736, 6758, 6794, 6820,
            6976];
        $num_positions = count($position_list);
        $encoded = L\encodePositionList($position_list);
        $decodes = L\decodePositionList($encoded, $num_positions);
        for ($i = 0; $i < $num_positions; $i++) {
            $this->assertEqual($position_list[$i], $decodes[$i], "Encode ".
                "{$position_list[$i]} decodes as {$decodes[$i]}");
        }
    }
    /**
     * Used to check Encoding decoding using Modified9 coding
     */
    public function modified9TestCase()
    {
        $encode_list = [151466751, 11746, 11746];
        $encoded = L\encodeModified9($encode_list);
        $offset = 0;
        $decode_list = L\decodeModified9($encoded, $offset);
        $this->assertEqual($encode_list, $decode_list,
            "Encoding and decoding an array with Modified9 gives same result");
    }
    /**
     * Used to check if posting lists can be properly encoded/decoded
     */
    public function packUnpackPostingTestCase()
    {
        $posting_list = [90, 101, 570, 581, 737, 950, 1100, 1119, 1127,
            1147, 1175, 1185, 1930, 1969, 2020, 2040, 2068, 2083, 2090, 2102,
            2126, 2170, 2182, 2191, 2217, 2228, 2250, 2260, 2370, 2392, 2403,
            2447, 2456, 2467, 2476, 2486, 2503, 2508, 2610, 2628, 2629, 2641,
            2674, 2693, 2710, 2753, 2761, 2770, 2847, 2885, 2899, 2920, 2934,
            3000, 3019, 3039, 3058, 3070, 3133, 3168, 3227, 3240, 3249, 3266,
            3277, 3296, 3309, 3327, 3348, 3366, 3368, 3375, 3424, 3456, 3458,
            3463, 3478, 3487, 3511, 3513, 3523, 3557, 3614, 3828, 3880, 3896,
            3910, 3999, 4039, 4056, 4165, 4226, 4248, 4269, 4308, 4324, 4338,
            4444, 4484, 4560, 4577, 4597, 4622, 4695, 4710, 4801, 4824, 4859,
            4876, 4981, 5071, 5109, 5131, 5199, 5232, 5270, 5287, 5317, 5330,
            5373, 5409, 5426, 5490, 5500, 5501, 5533, 5544, 5722, 5765, 5799,
            5821, 5854, 5938, 5967, 6004, 6036, 6195, 6262, 6319, 6337, 6345,
            6346, 6391, 6430, 6452, 6460, 6514, 6580, 6736, 6758, 6794, 6820,
            6976];
        $packed = L\packPosting(10, $posting_list);
        $offset = 0;
        $out_doc_list = L\unpackPosting($packed, $offset, true);
        $this->assertEqual($out_doc_list[0], 10,
            "Doc index from unpack of long packed posting equal");
        $this->assertEqual($out_doc_list[1], $posting_list,
            "Unpack of long packed posting equal");
        $offset = 0;
        $posting_list = [254, 12000, 24000];
        $packed = L\packPosting(33689, $posting_list);
        $out_doc_list = L\unpackPosting($packed, $offset, true);
        $this->assertEqual($out_doc_list[0], 33689,
            "Doc index from unpack of first word has delta[0] case");
        $this->assertEqual($out_doc_list[1], $posting_list,
            "Unpack of delta[0] case");
        $offset = 0;
        $posting_list = [511, 12000, 24000];
        $packed = L\packPosting(33689, $posting_list);
        $out_doc_list = L\unpackPosting($packed, $offset, true);
        $this->assertEqual($out_doc_list[0], 33689,
            "Doc index from unpack of first word has delta[0] case 2");
        $this->assertEqual($out_doc_list[1], $posting_list,
            "Unpack of delta[0] case 2");
        $posting_list = [6000, 12000, 24000];
        $packed = L\packPosting(100000, $posting_list);
        $offset = 0;
        $out_doc_list = L\unpackPosting($packed, $offset, true);
        $this->assertEqual($out_doc_list[0], 100000,
            "Bigger Doc index from unpack of long packed posting equal");
        $this->assertEqual($out_doc_list[1], $posting_list,
            "Bigger Delta unpack of posting equal");
        $posting_list = [1, 4, 7, 174];
        $packed = L\packPosting(0, $posting_list);
        $unpack_int = unpack("N*", $packed);
        $offset = 0;
        $out_doc_list = L\unpackPosting($packed, $offset, true);
        $this->assertEqual($out_doc_list[0], 0,
            "Doc index from unpack of doc index 0 case");
        $this->assertEqual($out_doc_list[1], $posting_list,
            "Unpack of doc index 0 case");
    }
    /**
     * Checks webencode/webdecode to see inverses. Checks base64Hash/
     * unbase64Hash to see inverses
     */
    public function webencodeWebdecodeTestCase()
    {
        $expected = "=+~-@hi ya everyone!!@~+-=";
        $encode_decoded = L\webdecode(L\webencode($expected));
        $this->assertEqual($expected, $encode_decoded,
            "Webencode/Webdecode works correctly");
        $encode_decoded = L\unbase64Hash(L\base64Hash($expected));
        $this->assertEqual($expected, $encode_decoded,
            "base64Hash/unbase64Hash works correctly");
        $expected = "\xFE\xFD\xFF\xFE\xFD";
        $encode_decoded = L\decode255(L\encode255("\xFE\xFD\xFF\xFE\xFD"));
        $this->assertEqual($expected, $encode_decoded,
            "encode255/decode255 works correctly");
    }
}
ViewGit