Modifies index so postings stored with partitions rather than as blobs which are part of the b+ tree, a=chris

Chris Pollett [2022-08-08 19:Aug:th]
Modifies index so postings stored with partitions rather than as blobs which are part of the b+ tree, a=chris
Filename
src/controllers/components/CrawlComponent.php
src/controllers/components/SystemComponent.php
src/executables/ArcTool.php
src/library/BPlusTree.php
src/library/IndexDocumentBundle.php
src/library/PackedTableTools.php
src/library/PartitionDocumentBundle.php
src/library/index_bundle_iterators/WordIterator.php
src/library/processors/ImageProcessor.php
src/models/PhraseModel.php
tests/BPlusTreeTest.php
tests/IndexDocumentBundleTest.php
tests/IndexManagerTest.php
diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php
index 157a03006..96a190e71 100644
--- a/src/controllers/components/CrawlComponent.php
+++ b/src/controllers/components/CrawlComponent.php
@@ -2287,7 +2287,7 @@ class CrawlComponent extends Component implements CrawlConstants
             $data["URL_ACTION"] = -1;
         }
         if ($data["URL"] != "") {
-            $data["URL"] = UrlParser::canonicalLink($data["URL"], "");
+            $data["URL"] = UrlParser::canonicalLink($data["URL"], "", false);
             if ($data["URL_ACTION"] == C\SEARCH_FILTER_GROUP_ITEM) {
                 $data["URL"] = UrlParser::getHost($data["URL"]);
             }
diff --git a/src/controllers/components/SystemComponent.php b/src/controllers/components/SystemComponent.php
index 26935e562..e4d7e8e29 100755
--- a/src/controllers/components/SystemComponent.php
+++ b/src/controllers/components/SystemComponent.php
@@ -1375,7 +1375,7 @@ EOD;
                 $comma = ",<br />";
             }
         }
-        if (!defined('PHP_VERSION_ID') || PHP_VERSION_ID < 70400) {
+        if (!defined('PHP_VERSION_ID') || PHP_VERSION_ID < 80800) {
             $missing_required .= $comma . tl("system_component_php_version");
             $comma = ", ";
         }
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index 8beeafb39..ca9897095 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -1185,6 +1185,9 @@ EOD;
         $next_partition = $start_generation;
         $continue = false;
         $dictionary_log = C\LOG_DIR . "/0-DictionaryUpdater.log";
+        if (file_exists($dictionary_log)) {
+            file_put_contents($dictionary_log, "");
+        }
         while ($next_partition < $save_partition) {
             if ($old_next_partition != $next_partition) {
                 $old_next_partition = $next_partition;
diff --git a/src/library/BPlusTree.php b/src/library/BPlusTree.php
index 0b99d4d96..19ea997e3 100644
--- a/src/library/BPlusTree.php
+++ b/src/library/BPlusTree.php
@@ -304,8 +304,8 @@ class BPlusTree
                 $table_tools->load($insert_node_path, $mode) ?? [];
         }
         $insert_node = $this->insert_node_cache[$insert_node_path];
-        $archive_filename = $this->archiveFilenameFromNodeFilename(
-            $insert_node_path);
+        $archive_filename = (empty($this->blob_columns)) ? "" :
+            $this->archiveFilenameFromNodeFilename($insert_node_path);
         $this->putNode($row, $insert_node, $archive_filename, $is_encoded_key,
             $mode);
         if (count($insert_node) > $this->parameters["MAX_KEYS"]) {
@@ -509,8 +509,8 @@ class BPlusTree
         $node_prefix = self::NODE_PREFIX;
         $tmp_filename = "$parent_folder/$temp_node_name";
         $tmp_archive_filename = "$parent_folder/$archive_prefix$temp_node_name";
-        $archive_filename = $this->archiveFilenameFromNodeFilename(
-            $node_path);
+        $archive_filename = (empty($this->blob_columns)) ? "" :
+            $this->archiveFilenameFromNodeFilename($node_path);
         $num_keys = count($node);
         $half_num = ceil($num_keys/2);
         $keys = array_keys($node);
@@ -545,7 +545,9 @@ class BPlusTree
         $this->add_archive_cache = [null, "", -1];
         $this->get_archive_cache = [null, "", -1];
         rename($tmp_filename, $node_path);
-        rename($tmp_archive_filename, $archive_filename);
+        if (!empty($this->blob_columns)) {
+            rename($tmp_archive_filename, $archive_filename);
+        }
     }
     /**
      * Returns the record associated with a $key as stored in the BPlusTree.
@@ -573,11 +575,11 @@ class BPlusTree
     {
         $table_tools = $this->table_tools;
         $key_node_filename = $this->find($key, $is_encoded_key);
-        $archive_filename = $this->archiveFilenameFromNodeFilename(
-            $key_node_filename);
         if (!$key_node_filename) {
             return null;
         }
+        $archive_filename = (empty($this->blob_columns)) ? "" :
+            $this->archiveFilenameFromNodeFilename($key_node_filename);
         if ($use_string_node) {
             $key_node = $table_tools->load($key_node_filename,
                 $table_tools::AS_STRING_MODE, true);
@@ -629,7 +631,8 @@ class BPlusTree
             return $values;
         }
         if (!($values = $table_tools->unpack($values, $offset, $limit))) {
-            crawlLog("Unpack BPlusTree error!!! Key was:$key ..");
+            crawlLog("Unpack BPlusTree error!!! (Key,offset,limit) was: ".
+                "($key, $offset, $limit) ..");
             $value_message = (is_string($values)) ? toHexString($values) :
                 serialize($values);
             crawlLog(".. value was:" . $value_message);
@@ -639,27 +642,28 @@ class BPlusTree
         if (!$look_up_blobs) {
             return $values;
         }
-        if (!empty($this->blob_columns)) {
-            $num_blob_columns = count($this->blob_columns);
-            for ($k = 0; $k < $num_unpacked; $k++) {
-                $offset = intval($values[$k][$this->blob_columns[0]]);
+        $num_blob_columns = count($this->blob_columns);
+        for ($k = 0; $k < $num_unpacked; $k++) {
+            if ($num_blob_columns > 0) {
+                $blob_offset = intval($values[$k][$this->blob_columns[0]]);
                 for ($i = 0; $i < $num_blob_columns; $i++) {
                     $column_name = $this->blob_columns[$i];
                     $len = ($i + 1 < $num_blob_columns) ?
                         intval($values[$k][$this->blob_columns[$i + 1]]) :
                         $values[$k]["LAST_BLOB_LEN"];
                     $values[$k][$column_name] = ($len == 0) ? "" :
-                        $this->getArchive($archive_filename, $offset, $len);
-                    $offset += $len;
+                        $this->getArchive($archive_filename, $blob_offset,
+                        $len);
+                    $blob_offset += $len;
                 }
                 unset($values[$k]["LAST_BLOB_LEN"]);
                 foreach ($this->serial_columns as $field_name) {
                     $values[$k][$field_name] = unserialize(
                         $values[$k][$field_name]);
                 }
-                $values[$k][$this->key_field] = ($is_encoded_key) ?
-                    $key : rawurldecode($encode_key);
             }
+            $values[$k][$this->key_field] = ($is_encoded_key) ?
+                $key : rawurldecode($encode_key);
         }
         return $values;
     }
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index c5d280b8d..27739bd8a 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -54,7 +54,7 @@ class IndexDocumentBundle implements CrawlConstants
      * The version of this IndexDocumentBundle. The lowest format number is
      * 3.0 as prior inverted index/document stores used IndexArchiveBundle's
      */
-    const DEFAULT_VERSION = "3.1";
+    const DEFAULT_VERSION = "3.2";
     /**
      * Default values for the configuration parameters of an
      * IndexDocumentBundle
@@ -131,6 +131,16 @@ class IndexDocumentBundle implements CrawlConstants
      * term.
      */
     const POSTINGS_FILENAME = "postings";
+    /**
+     * Temporary name for postings from a POSTINGS_FILENAME file while
+     * they are being compressed.
+     */
+    const TEMP_POSTINGS_FILENAME = "temp_postings";
+    /**
+     * How many bytes of posting to buffer before writing, when
+     * addPartitionPostingsDictionary
+     */
+    const POSTINGS_BUFFER_SIZE = 1000000;
     /**
      * Name of the folder used to hold position lists and document maps. Within
      * this folder there is a subfolder for each partition which contains a
@@ -313,11 +323,20 @@ class IndexDocumentBundle implements CrawlConstants
         if (!$read_only_archive) {
             $this->documents->initCountIfNotExists("VISITED_URLS_COUNT");
         }
-        $this->dictionary = new BPlusTree($this->dir_name . "/" .
-            self::DICTIONARY_FOLDER, ["PRIMARY KEY" => ["TERM", 16],
-            "PARTITION" => "INT", "NUM_DOCS" => "INT",
-            "NUM_OCCURRENCES" => "INT", "POSTINGS" => "BLOB"], $max_keys,
-            $record_compressor, $bplus_blob_compressor);
+        if ($this->archive_info['VERSION'] < "3.2") {
+            $this->dictionary = new BPlusTree($this->dir_name . "/" .
+                self::DICTIONARY_FOLDER, ["PRIMARY KEY" => ["TERM", 16],
+                "PARTITION" => "INT", "NUM_DOCS" => "INT",
+                "NUM_OCCURRENCES" => "INT", "POSTINGS" => "BLOB"], $max_keys,
+                $record_compressor, $bplus_blob_compressor);
+        } else {
+            $this->dictionary = new BPlusTree($this->dir_name . "/" .
+                self::DICTIONARY_FOLDER, ["PRIMARY KEY" => ["TERM", 16],
+                "PARTITION" => "INT", "NUM_DOCS" => "INT",
+                "NUM_OCCURRENCES" => "INT", "POSTINGS_OFFSET" => "INT",
+                "POSTINGS_LEN" => "INT"], $max_keys,
+                $record_compressor, $bplus_blob_compressor);
+        }
     }
     /**
      * Add the array of $pages to the documents PartitionDocumentBundle
@@ -428,11 +447,32 @@ class IndexDocumentBundle implements CrawlConstants
         }
         crawlLog("Start Adding Partition Posting Info to Dictionary");
         $start_time = microtime(true);
-        $this->postings = $postings_tools->load($postings_filename);
+        $markers = $postings_tools->getEntryMarkers($postings_filename);
+        $hash_postings_name = crawlHash($postings_filename);
+        $postings_string = $postings_tools->table_cache[$hash_postings_name];
+        $temp_postings_filename = $base_folder . "/" .
+            self::TEMP_POSTINGS_FILENAME;
+        rename($postings_filename, $temp_postings_filename);
+        unset($postings_tools->table_cache[$hash_postings_name]);
+        $posting_files_len = strlen($postings_string);
+        //add a marker for the end of the file as a string
+        $key_len = $this->postings_tools->key_len;
         $this->last_entries = $last_entries_tools->load($last_entries_filename);
-        $num_postings = count($this->postings);
-        $i = 0;
-        foreach ($this->postings as $term => $entry) {
+        $num_postings = count($markers) + 1;
+        $last_marker = 0;
+        $out_postings = "";
+        $postings_offset = 0;
+        $fh = fopen($postings_filename, "w");
+        for ($i = 0; $i < $num_postings; $i++) {
+            $cur_marker = $markers[$i] ?? null;
+            $diff = ($cur_marker === null) ? $cur_marker :
+                $cur_marker - $last_marker;
+            $pre_row = substr($postings_string, $last_marker, $diff);
+            $last_marker = $cur_marker + 1;
+            $term = substr($pre_row, 0, $key_len);
+            $row = decode255(substr($pre_row, $key_len));
+            $postings_len = strlen($row);
+            $out_postings .= $row;
             if(crawlTimeoutLog("..Indexer Still processing partition ".
                 "$partition. Have completed $i postings of $num_postings.") &&
                 $taking_too_long_touch) {
@@ -441,7 +481,7 @@ class IndexDocumentBundle implements CrawlConstants
                 }
             }
             $start = 0;
-            $num_docs_term = vByteDecode($entry, $start);
+            $num_docs_term = vByteDecode($row, $start);
             $num_occurrences_term = 0;
             $last_entry = $last_entries_tools->find($this->last_entries, $term);
             if (!empty($last_entry)) {
@@ -452,18 +492,22 @@ class IndexDocumentBundle implements CrawlConstants
             $dictionary->put(["TERM" => $term, "PARTITION" => $partition,
                 "NUM_DOCS" => $num_docs_term,
                 "NUM_OCCURRENCES"  => $num_occurrences_term,
-                "POSTINGS" => $entry]);
-            $i++;
+                "POSTINGS_OFFSET" => $postings_offset,
+                "POSTINGS_LEN" => $postings_len]);
+            $postings_offset += $postings_len;
+            if (strlen($out_postings) > self::POSTINGS_BUFFER_SIZE) {
+                fwrite($fh, $out_postings);
+                $out_postings = "";
+            }
         }
         $dictionary->flushLastPutNode();
+        fwrite($fh, $out_postings);
+        fclose($fh);
+        unlink($temp_postings_filename);
         crawlLog("...Finished Adding Partition Posting Info to " .
             "Dictionary: " . changeInMicrotime($start_time));
         if (!C\nsdefined("KEEP_PARTITION_CALCULATIONS") ||
             !C\KEEP_PARTITION_CALCULATIONS) {
-            crawlLog("Deleting partition posting calculations..");
-            if (file_exists($postings_filename)) {
-                unlink($postings_filename);
-            }
             if (file_exists($last_entries_filename)) {
                 unlink($last_entries_filename);
             }
@@ -692,6 +736,7 @@ class IndexDocumentBundle implements CrawlConstants
             return $statistics;
         }
         $doc_map_tools->save($doc_map_filename, $this->doc_map);
+        ksort($this->postings);
         $postings_tools->save($postings_filename, $this->postings);
         $last_entries_tools->save($last_entries_filename, $this->last_entries);
         file_put_contents($positions_filename, $this->positions);
@@ -1264,6 +1309,34 @@ class IndexDocumentBundle implements CrawlConstants
         $result['TOTAL_OCCURRENCES'] = $occurrence_count;
         return $result;
     }
+    /**
+     * Get the postings stored in the postings file in a partition from
+     * $offset to $offset+len remove the 255 encoding.
+     *
+     * @param int $partition partition to retrieve posting from
+     * @param int $offset byte offset int partition/postings file to look for
+     *  them
+     * @param int $len length of the posting list to retrieve.
+     * @return string encoded posting list data -- vbyte encoded number of
+     *  postings, followed by the posting data in PacktableTools format
+     */
+    public function getPostingsString($partition, $offset, $len)
+    {
+        static $file_handles = [];
+        if (empty($file_handles[$partition])) {
+            $postings_filename = $this->getPartitionBaseFolder($partition) .
+                "/" . IndexDocumentBundle::POSTINGS_FILENAME;
+            $fh = fopen($postings_filename , "r");
+            $file_handles[$partition] = $fh;
+        } else {
+            $fh = $file_handles[$partition];
+        }
+        if ($fh && fseek($fh, $offset) == 0 && $len > 0) {
+            $out = fread($fh, $len);
+            return $out;
+        }
+        return "";
+    }
     /**
      * Given the postings as a string for a partition for a term unpacks them
      * into an array of postings, doing de-delta of doc_map_indices and
@@ -1286,6 +1359,7 @@ class IndexDocumentBundle implements CrawlConstants
         if (empty($postings_string)) {
             return [];
         }
+        $items = [];
         $sum_frequencies = 0;
         $doc_map_index = 0;
         $positions_offset = 0;
@@ -1302,6 +1376,10 @@ class IndexDocumentBundle implements CrawlConstants
             $sum_frequencies += $pre_item["FREQUENCY"];
             $current_pos += $unpack_len_map[$int_info];
             $items[] = $item;
+            if ($current_pos >= strlen($postings_string)) {
+                crawlLog("Posting decode error");
+                break; //sanity check break
+            }
         }
         return [$items, $sum_frequencies];
     }
diff --git a/src/library/PackedTableTools.php b/src/library/PackedTableTools.php
index 232e3c07b..7c8757dfd 100644
--- a/src/library/PackedTableTools.php
+++ b/src/library/PackedTableTools.php
@@ -373,6 +373,7 @@ class PackedTableTools
         if (!empty($this->table_entry_markers[$hash_name])) {
             return $this->table_entry_markers[$hash_name];
         }
+        $this->table_entry_markers[$hash_name] = [];
         $table_string = $this->load($table_name, self::AS_STRING_MODE, true);
         $delim = "\xFF";
         $this->table_entry_markers[$hash_name] = [];
@@ -381,7 +382,7 @@ class PackedTableTools
             $this->table_entry_markers[$hash_name] =
                 array_column($matches[0], 1);
         }
-        return $matches[0];
+        return $this->table_entry_markers[$hash_name];
     }
     /**
      * Returns the $index'th entry out of a string packed according to the
diff --git a/src/library/PartitionDocumentBundle.php b/src/library/PartitionDocumentBundle.php
index 8b7a72eeb..598d950c4 100644
--- a/src/library/PartitionDocumentBundle.php
+++ b/src/library/PartitionDocumentBundle.php
@@ -273,7 +273,7 @@ class PartitionDocumentBundle
      * @param int $partition to look for record in
      * @param array $fields names of fields in this PartitionDocumentBundle
      *      to return
-     * @return array unpacked record on success, otherwise false
+     * @return array|false unpacked record on success, otherwise false
      */
     public function get($key, $partition, $fields = [])
     {
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index 85b5d1265..da7a54ef6 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -927,16 +927,29 @@ class WordIterator extends IndexBundleIterator
             return [];
         }
         $generation_info = $this->dictionary_info[$generation];
-        if (is_array($generation_info['POSTINGS'])) {
+        if (!empty($generation_info['POSTINGS']) &&
+            is_array($generation_info['POSTINGS'])) {
             return $generation_info['POSTINGS']; //already loaded
         }
         $index = IndexManager::getIndex($this->index_name);
-        if (empty($generation_info['LAST_BLOB_LEN'])) {
-            $postings_entry = "";
+        if ($this->index_version < "3.2") {
+            if (empty($generation_info['LAST_BLOB_LEN'])) {
+                $postings_entry = "";
+            } else {
+                $postings_entry = $index->dictionary->getArchive(
+                    $this->archive_file, $generation_info['POSTINGS'],
+                    $generation_info['LAST_BLOB_LEN']);
+                unset($this->dictionary_info[$generation]['LAST_BLOB_LEN']);
+            }
         } else {
-            $postings_entry = $index->dictionary->getArchive(
-                $this->archive_file, $generation_info['POSTINGS'],
-                $generation_info['LAST_BLOB_LEN']);
+            if (empty($generation_info['POSTINGS_OFFSET']) ||
+                empty($generation_info['POSTINGS_LEN'])) {
+                $postings_entry = "";
+            } else {
+                $postings_entry = $index->getPostingsString($generation,
+                    $generation_info['POSTINGS_OFFSET'],
+                    $generation_info['POSTINGS_LEN']);
+            }
         }
         if (empty($postings_entry)) {
             $postings = [];
@@ -944,7 +957,6 @@ class WordIterator extends IndexBundleIterator
             list($postings,) = $index->unpackPostings($postings_entry);
         }
         $this->dictionary_info[$generation]['POSTINGS'] = $postings;
-        unset($this->dictionary_info[$generation]['LAST_BLOB_LEN']);
         return $postings;
     }
     /**
@@ -979,8 +991,8 @@ class WordIterator extends IndexBundleIterator
             $partition_info = $this->dictionary_info[$this->generation_pointer];
             $this->current_generation = $partition_info['PARTITION'];
             $postings = $this->getGenerationPostings($this->generation_pointer);
-            $this->current_doc_offset =
-                $postings[$this->current_offset]['DOC_MAP_INDEX'];
+            $this->current_doc_offset = ($postings) ?
+                $postings[$this->current_offset]['DOC_MAP_INDEX'] : -1;
         }
         return [$this->current_generation, $this->current_doc_offset];
     }
diff --git a/src/library/processors/ImageProcessor.php b/src/library/processors/ImageProcessor.php
index 47220acb6..e3bf43d28 100755
--- a/src/library/processors/ImageProcessor.php
+++ b/src/library/processors/ImageProcessor.php
@@ -143,7 +143,7 @@ class ImageProcessor extends PageProcessor
     public static function createThumb($image, $width = C\THUMB_DIM,
         $height = C\THUMB_DIM)
     {
-        if (empty($image)) {
+        if (empty($image) || ($width == 0 && $height == 0)) {
             return "";
         }
         $size_x = imagesx($image);
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index 316f6f264..4d3f381e9 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -354,7 +354,7 @@ class PhraseModel extends ParallelModel
                         L\guessLocaleFromString($disjunct_phrases[0]));
                     $map_cnt = 0;
                     foreach ($query_map as $map_url) {
-                        $map_parts = explode("#", $map_url);
+                        $map_parts = explode("###", $map_url);
                         $map_result[self::URL] = $map_parts[0];
                         $map_result[self::PINNED] = true;
                         $map_result[self::SCORE] = 0;
diff --git a/tests/BPlusTreeTest.php b/tests/BPlusTreeTest.php
index d8e00a959..0b5c15e19 100644
--- a/tests/BPlusTreeTest.php
+++ b/tests/BPlusTreeTest.php
@@ -91,7 +91,34 @@ use seekquarry\yioop\library\UnitTest;
      * Test putting items in bplustrees of odd sized nodes between 3 and 13 and
      * then seeing if the items can be retrieved
      */
-    public function putGetTestCase()
+    public function putGetTextTestCase()
+    {
+        $format = ["PRIMARY KEY" => ["KEY", -1], "VALUE" => "TEXT"];
+        for ($i = 3; $i <= 13; $i += 2) {
+            $bptree = $this->createTree($i, $format);
+            for ($j = 0; $j < ($i * 40); $j++) {
+                for($k = 0; $k < 5; $k++) {
+                    $bptree->put(["KEY" => str_pad("$j",4,"0", STR_PAD_LEFT),
+                        "VALUE" => "row{$j}_{$k}"],
+                        PackedTableTools::APPEND_MODE);
+                }
+            }
+            $bptree->flushLastPutNode();
+            for ($j = 0; $j < ($i * 40); $j++) {
+                $rows = $bptree->get(str_pad("$j",4,"0", STR_PAD_LEFT));
+                for($k = 0; $k < 5; $k++) {
+                    $this->assertEqual("row{$j}_{$k}", $rows[$k]["VALUE"],
+                        "{$j}th insert into tree of size $i was retrieved okay".
+                        " {$k}th case");
+                }
+            }
+        }
+    }
+    /**
+     * Test putting items in bplustrees of odd sized nodes between 3 and 13 and
+     * then seeing if the items can be retrieved
+     */
+    public function putGetBlobTestCase()
     {
         for ($i = 3; $i <= 13; $i += 2) {
             $bptree = $this->createTree($i);
diff --git a/tests/IndexDocumentBundleTest.php b/tests/IndexDocumentBundleTest.php
index d06fa31ad..515145368 100644
--- a/tests/IndexDocumentBundleTest.php
+++ b/tests/IndexDocumentBundleTest.php
@@ -315,7 +315,15 @@ use seekquarry\yioop\library\UnitTest;
         $this->assertEqual($sum + count($active_postings), $num_docs,
             "Term 'be' occurs in correct number of documents");
         for ($i = 0; $i < 2; $i++) {
-            $postings = $posting_tools->unpack($term_row[$i]['POSTINGS']);
+            $partition = $term_row[$i]['PARTITION'];
+            $partition_folder = $this->index_archive->getPartitionBaseFolder(
+                $partition);
+            $postings_filename = $partition_folder . "/" .
+                IndexDocumentBundle::POSTINGS_FILENAME;
+            $postings_string = file_get_contents($postings_filename,
+                false, null, $term_row[$i]['POSTINGS_OFFSET'],
+                $term_row[$i]['POSTINGS_LEN']);
+            $postings = $posting_tools->unpack($postings_string);
             $base_folder = $this->index_archive->getPartitionBaseFolder(
                 $term_row[$i]['PARTITION']);
             $positions_filename = $base_folder . "/" .
diff --git a/tests/IndexManagerTest.php b/tests/IndexManagerTest.php
index a91886e50..daea12d8a 100644
--- a/tests/IndexManagerTest.php
+++ b/tests/IndexManagerTest.php
@@ -111,8 +111,8 @@ use seekquarry\yioop\library\UnitTest;
             self::OLD_BUNDLE);
         $version_new = IndexManager::getVersion(self::TEST_DIR . "/".
             self::NEW_BUNDLE);
-        $this->assertEqual($version_old, 1, "Version 1 index detected");
-        $this->assertEqual($version_new, 3.1, "Version 3.1 index detected");
+        $this->assertEqual($version_old, "1", "Version 1 index detected");
+        $this->assertEqual($version_new, "3.2", "Version 3.2 index detected");
     }
     /**
      * Tests if IndexManager can return the dictionary information about a
@@ -124,19 +124,23 @@ use seekquarry\yioop\library\UnitTest;
         $index_archive = $this->index_archive;
         $dictionary = $index_archive->dictionary;
         $keys = [];
+        $a = "";
         for ($i = 0; $i < 10; $i++) {
             $keys[$i] = $this->docidFromIntKeys($i, $i, $i);
             $docs[] = [
                 CC::DOC_ID => $keys[$i],
                 CC::SUMMARY =>
                     [
-                        CC::DESCRIPTION => "to$i be or$i not$i to$i be...",
+                        CC::DESCRIPTION => "$a to$i be or$i not$i to$i be...",
                         CC::HASH => str_pad("$i", 8, "0", STR_PAD_LEFT),
                         CC::TITLE => "Some$i Shakespeare$i Play$i",
                         CC::URL => "https://www.somewhere$i.com/"
                     ],
                 CC::PAGE => "Page $i",
             ];
+            if ($i > 3) {
+                $a = "aha be ";
+            }
         }
         $num_docs = count($docs);
         $index_archive->addPages($docs, $num_docs);
@@ -150,10 +154,9 @@ use seekquarry\yioop\library\UnitTest;
             "Active partition postings has been returned as array.");
         $index = IndexManager::getIndex(self::TEST_DIR . "/".
             self::NEW_BUNDLE);
-        $postings = $index->dictionary->getArchive(
-            $be_info['ARCHIVE_FILE'],
-            $be_info['ROWS'][0]['POSTINGS'],
-            $be_info['ROWS'][0]['LAST_BLOB_LEN']);
+        $postings = $index->getPostingsString(0,
+            $be_info['ROWS'][0]['POSTINGS_OFFSET'],
+            $be_info['ROWS'][0]['POSTINGS_LEN']);
         $posting_rows = $index->postings_tools->unpack($postings);
         $this->assertEqual(count($posting_rows), 4,
             "Able to look up postings for Partition 0 of 'be' Word Info.");
ViewGit