fixes a getPostingsString bug where string needed to be decode255'd, remove a lot of the code for serving results for older index formats

Chris Pollett [2024-01-24 05:Jan:th]
fixes a getPostingsString bug where string needed to be decode255'd, remove a lot of the code for serving results for older index formats
Filename
src/executables/ArcTool.php
src/library/FeedDocumentBundle.php
src/library/IndexDocumentBundle.php
src/library/LSMTree.php
src/library/PackedTableTools.php
src/library/index_bundle_iterators/DocIterator.php
src/library/index_bundle_iterators/GroupIterator.php
src/library/index_bundle_iterators/WordIterator.php
src/models/ParallelModel.php
src/models/PhraseModel.php
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index 183b375a8..7e298c674 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -433,11 +433,13 @@ class ArcTool extends DictionaryUpdater implements CrawlConstants
             if ($start_record < 0 || $record['PARTITION'] >= $start_record) {
                 echo "RECORD: $i\n";
                 echo "PARTITION: {$record['PARTITION']}\n";
-                echo "NUMBER OF DOCS: {$record['NUM_DOCS']}\n\n";
+                echo "NUMBER OF DOCS: {$record['NUM_DOCS']}\n";
                 $postings_offset = (empty($record['POSTINGS_OFFSET'])) ?
                     -1: $record['POSTINGS_OFFSET'];
+                echo "POSTINGS_OFFSET: $postings_offset\n";
                 $postings_len = (empty($record['POSTINGS_LEN']))?
                     -1 : $record['POSTINGS_LEN'];
+                echo "POSTINGS_LEN: $postings_len\n\n";
                 $is_postings_array = isset($record['POSTINGS']) &&
                     is_array($record['POSTINGS']);
                 if ($postings_offset == -1 && !$is_postings_array) {
@@ -747,7 +749,7 @@ class ArcTool extends DictionaryUpdater implements CrawlConstants
         return $name;
     }
     /**
-     * Outputs tot the terminal if the bloom filter $filter_path contains
+     * Outputs to the terminal if the bloom filter $filter_path contains
      * the string $item
      * @param string $filter_path name of bloom filter file to check if
      *  contains item
@@ -847,7 +849,6 @@ class ArcTool extends DictionaryUpdater implements CrawlConstants
         $description = ($alternate_description) ? $alternate_description :
             "Description: " . $info['DESCRIPTION'];
         echo "$description\n";
-        var_dump($info);
         if (!$only_crawl_params) {
             $num_partitions = $info['SAVE_PARTITION'] + 1;
             echo "Number of partitions: $num_partitions \n";
diff --git a/src/library/FeedDocumentBundle.php b/src/library/FeedDocumentBundle.php
index 6b74faf45..a75030026 100644
--- a/src/library/FeedDocumentBundle.php
+++ b/src/library/FeedDocumentBundle.php
@@ -288,8 +288,8 @@ class FeedDocumentBundle extends IndexDocumentBundle
             $this->addScoresDocMap($doc_id, $num_words,
                 intval($item[self::PUBDATE]), 0, $title_length, $title_length,
                 [], []);
-            $this->addTermPostingLists(0, $num_words,
-                $word_list, $meta_ids, $this->doc_map_counter);
+            $this->addTermPostingLists(0, $word_list, $meta_ids,
+                $this->doc_map_counter);
             $this->doc_map_counter++;
             $this->updateTrendingTermCounts($term_counts, $phrase_string,
                 $word_list, $media_category, $source_name, $lang,
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index ed65f7a05..60259c7cc 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -184,16 +184,14 @@ class IndexDocumentBundle implements CrawlConstants
      */
     public $next_partition_to_add;
     /**
-     * IndexDictionary for all shards in the IndexArchiveBundle
-     * This contains entries of the form (word, num_shards with word,
-     * posting list info 0th shard containing the word,
-     * posting list info 1st shard containing the word, ...)
-     * @var object
+     * Reference to the LSMTree used to store term => array of partition
+     * posting list info
+     * @var LSMTree
      */
     public $dictionary;
     /**
      * PartitionDocumentBundle for web page documents
-     * @var object
+     * @var PartitionDocumentBundle
      */
     public $documents;
     /**
@@ -965,8 +963,8 @@ class IndexDocumentBundle implements CrawlConstants
             $url_info[self::SCORE], $host_keywords_end_pos, $title_end_pos,
             $path_keywords_end_pos, $description_scores,
             $user_ranks, $terms_filter);
-        $this->addTermPostingLists(0, $num_words,
-            $word_lists, $meta_ids, $this->doc_map_counter);
+        $this->addTermPostingLists(0, $word_lists, $meta_ids,
+            $this->doc_map_counter);
         $this->doc_map_counter++;
         $interim_elapse = changeInMicrotime($interim_time);
         if ($interim_elapse > 5) {
@@ -1150,8 +1148,6 @@ class IndexDocumentBundle implements CrawlConstants
      * @param int $position_offset number of header bytes that might be used
      *  before including any position data in the file that positions will
      *  eventually be stored.
-     * @param int $doc_length length of document in terms for the document
-     *  for which we are adding posting data.
      * @param array $word_lists term => positions within current document of
      *  that term for the document whose posting data we are adding
      * @param array $meta_ids meta terms associated with the document we are
@@ -1160,9 +1156,10 @@ class IndexDocumentBundle implements CrawlConstants
      *  we are adding. I.e., 5 would mean there were 5 earlier documents whose
      *  postings we have already added.
      */
-    public function addTermPostingLists($position_offset, $doc_length,
-        $word_lists, $meta_ids, $doc_map_index)
+    public function addTermPostingLists($position_offset, $word_lists,
+        $meta_ids, $doc_map_index)
     {
+        static $my_counter = 0;
         $postings_tools = $this->postings_tools;
         $last_entries_tools = $this->last_entries_tools;
         foreach ($meta_ids as $meta_id) {
@@ -1171,9 +1168,8 @@ class IndexDocumentBundle implements CrawlConstants
         foreach ($word_lists as $word => $position_list) {
             $term_id = canonicalTerm($word);
             $meta_prefix = substr($word, 0, 5);
-            $site_meta = ($meta_prefix == "site:" || $meta_prefix == "info:");
-            $occurrences = $site_meta ? $doc_length : count($position_list);
-            if (!$site_meta && $occurrences > 0) {
+            $occurrences = count($position_list);
+            if ($occurrences > 0) {
                 $encoded_position_list = encodePositionList($position_list);
                 $offset = $position_offset + strlen($this->positions);
                 $len = strlen($encoded_position_list);
@@ -1192,7 +1188,7 @@ class IndexDocumentBundle implements CrawlConstants
                     array_values($last_entry_row[0]);
             }
             $diff_doc_map_index = $doc_map_index - $last_index;
-            $diff_offset = (!$site_meta && $occurrences > 0) ?
+            $diff_offset = ($occurrences > 0) ?
                 $offset - $last_offset : 0;
             $entry = $postings_tools->pack([
                 "DOC_MAP_INDEX" => $diff_doc_map_index,
@@ -1561,7 +1557,7 @@ class IndexDocumentBundle implements CrawlConstants
             $file_handles[$partition] = $fh;
         }
         if ($fh && fseek($fh, $offset) == 0 && $len > 0) {
-            $out = fread($fh, $len);
+            $out = decode255(fread($fh, $len) ?? "");
             return $out;
         }
         return "";
@@ -1619,13 +1615,16 @@ class IndexDocumentBundle implements CrawlConstants
                 $current_pos);
             if ($pre_item["FREQUENCY"] > C\MAX_DESCRIPTION_LEN) {
                 crawlLog("Posting decode error! Frequency too large");
+                crawlLog(".. Decode Format was: " . $unpack_map[$int_info]);
                 crawlLog("..Number to decode items: " . $num_items);
                 crawlLog("..Number decoded: " . $i);
                 crawlLog("..Length posting string: " .
                     strlen($postings_string));
                 crawlLog("..Current position: " . $current_pos);
                 crawlLog("..Large Frequency Observed: ".
-                    $pre_item["FREQUENCY"] . " ". C\MAX_DESCRIPTION_LEN);
+                    $pre_item["FREQUENCY"] .
+                    " more than max description length:".
+                    C\MAX_DESCRIPTION_LEN);
                 return [$items, $sum_frequencies]; // sanity check 3
             }
             $item = $pre_item;
diff --git a/src/library/LSMTree.php b/src/library/LSMTree.php
index e9c93e163..32c69e747 100644
--- a/src/library/LSMTree.php
+++ b/src/library/LSMTree.php
@@ -129,7 +129,7 @@ class LSMTree
         $this->block_factor = $block_factor;
     }
     /**
-     *
+     * @return string
      */
     public function getTierFolder($tier)
     {
@@ -137,14 +137,17 @@ class LSMTree
             sprintf("%'.04d", $tier);
     }
     /**
-     *
+     * @return bool
      */
     public function occupiedTier($tier)
     {
         return file_exists($this->getTierFolder($tier) . "/A");
     }
     /**
-     *
+     * Returns the highest occupied tier of the LSMTree
+     * @param bool $recompute whether to return cache value if exists (false) or
+     *  recompute it by examing the file system (true)
+     * @return int the maximum tier
      */
     public function getMaxTier($recompute = false)
     {
@@ -167,7 +170,8 @@ class LSMTree
         return self::$max_tier = $max_tier;
     }
     /**
-     *
+     * Within a tier select which of the two slots (A or B) to write
+     * entry data to
      */
     public function selectPutSlot($letter)
     {
diff --git a/src/library/PackedTableTools.php b/src/library/PackedTableTools.php
index 48fe14ff9..950d1580c 100644
--- a/src/library/PackedTableTools.php
+++ b/src/library/PackedTableTools.php
@@ -528,6 +528,9 @@ class PackedTableTools
                             $packed_data .= chr($magnitude + $positive);
                             $cur_int_char = ($cur_int_char == -1) ? 0 :
                                 $cur_int_char;
+                            $cur_int_add = 0; /*(0 << $shift) deliberately
+                                set so not to use old value by accident
+                              */
                         } else {
                             if ($magnitude < 32768) {
                                 $packed_int = pack("n", $magnitude);
@@ -690,10 +693,13 @@ class PackedTableTools
                         $current_pos += $len;
                         break;
                     case "INT":
-                        if ($ints_used >= $num_int_columns ||
-                            empty($int_info)) {
+                        if ($ints_used >= $num_int_columns) {
                             return null;
                         }
+                        if (empty($int_info)) {
+                            $item[$field_name] = 0;
+                            break;
+                        }
                         $int_code = (ord($int_info[$current_int_pos]) &
                             (3 << $shift)) >> $shift;
                         if (!isset($table_row[$current_pos])) {
diff --git a/src/library/index_bundle_iterators/DocIterator.php b/src/library/index_bundle_iterators/DocIterator.php
index b4ca52210..2fb59e8af 100755
--- a/src/library/index_bundle_iterators/DocIterator.php
+++ b/src/library/index_bundle_iterators/DocIterator.php
@@ -76,13 +76,6 @@ class DocIterator extends IndexBundleIterator
      * @var int
      */
     public $key_index;
-    /**
-     * The index version affects how the iterator cycles through documents
-     * There was a big change in index format between version 3 and prior
-     * formats
-     * @var int
-     */
-    public $index_version;
     /**
      * The next byte offset of a doc in the IndexShard
      * @var int
@@ -152,7 +145,6 @@ class DocIterator extends IndexBundleIterator
         $this->index_name =  $index_name;
         $this->direction = $direction;
         $this->ranking_factors = $ranking_factors;
-        $this->index_version = IndexManager::getVersion($index_name);
         $index = IndexManager::getIndex($index_name, $direction);
         if (empty($index)) {
             $this->num_docs = 0;
@@ -163,15 +155,8 @@ class DocIterator extends IndexBundleIterator
             return;
         }
         $info = $index->getArchiveInfo($index->dir_name);
-        if ($this->index_version < 3) {
-            $this->num_docs = $info['COUNT'];
-            $this->num_generations =
-                (isset($index->generation_info['ACTIVE'])) ?
-                $index->generation_info['ACTIVE'] + 1 : 0;
-        } else {
-            $this->num_docs = ($info['COUNT'] ?? 0) + ($info['ACTIVE'] ?? 0);
-            $this->num_generations = $info['SAVE_PARTITION'] + 1;
-        }
+        $this->num_docs = ($info['COUNT'] ?? 0) + ($info['ACTIVE'] ?? 0);
+        $this->num_generations = $info['SAVE_PARTITION'] + 1;
         $this->results_per_block = $results_per_block;
         $this->current_block_fresh = false;
         $this->reset();
@@ -207,36 +192,25 @@ class DocIterator extends IndexBundleIterator
         if($this->num_generations <= 0) {
             return;
         }
-        if ($this->index_version < 3 && isset($this->shard_lens[$generation])) {
-            $this->last_offset = $this->shard_lens[$generation];
-        } else {
-            $index = IndexManager::getIndex($this->index_name);
-            if ($this->index_version < 3) {
-                $index->setCurrentShard($generation, true);
-                $shard = $index->getCurrentShard();
-                $this->last_offset = $shard->docids_len;
-                $this->shard_lens[$generation] = $shard->docids_len;
-            } else {
-                if ($generation != $this->doc_map_generation) {
-                    $base_folder = $index->getPartitionBaseFolder(
-                        $this->current_generation);
-                    $doc_map_filename = $base_folder . "/" .
-                        IndexDocumentBundle::DOC_MAP_FILENAME;
-                    $doc_map_tools = $index->doc_map_tools;
-                    $this->doc_map = $doc_map_tools->load($doc_map_filename)
-                        ?? [];
-                    $doc_keys = array_keys($this->doc_map);
-                    $key_index = [];
-                    foreach ($this->doc_map as $key => $entry) {
-                        if (!$index->isType($key, "link")) {
-                            $key_index[] = $key;
-                        }
-                    }
-                    $this->key_index = $key_index;
-                    $this->last_offset = count($key_index) - 1;
-                    $this->doc_map_generation = $generation;
+        $index = IndexManager::getIndex($this->index_name);
+        if ($generation != $this->doc_map_generation) {
+            $base_folder = $index->getPartitionBaseFolder(
+                $this->current_generation);
+            $doc_map_filename = $base_folder . "/" .
+                IndexDocumentBundle::DOC_MAP_FILENAME;
+            $doc_map_tools = $index->doc_map_tools;
+            $this->doc_map = $doc_map_tools->load($doc_map_filename)
+                ?? [];
+            $doc_keys = array_keys($this->doc_map);
+            $key_index = [];
+            foreach ($this->doc_map as $key => $entry) {
+                if (!$index->isType($key, "link")) {
+                    $key_index[] = $key;
                 }
             }
+            $this->key_index = $key_index;
+            $this->last_offset = count($key_index) - 1;
+            $this->doc_map_generation = $generation;
         }
     }
     /**
@@ -262,20 +236,10 @@ class DocIterator extends IndexBundleIterator
         $pre_results = [];
         $this->next_offset = $this->current_offset;
         $index = IndexManager::getIndex($this->index_name);
-        if ($this->index_version < 3) {
-            $index->setCurrentShard($this->current_generation, true);
-            //the next call also updates next offset
-            $shard = $index->getCurrentShard();
-            $num_docs_or_links = ($this->index_version < 3) ?
-                $shard->num_docs + $shard->num_link_docs : 0;
-            $doc_offset_key_len = IndexShard::DOC_KEY_LEN;
-        }
         $this->getGenerationInfo($this->current_generation);
-        if ($this->index_version >= 3) {
-            $doc_map_tools = $index->doc_map_tools;
-            $doc_keys = $this->key_index;
-            $doc_map = $this->doc_map;
-        }
+        $doc_map_tools = $index->doc_map_tools;
+        $doc_keys = $this->key_index;
+        $doc_map = $this->doc_map;
         $pre_results = [];
         $num_docs_so_far = 0;
         $termsfilter_len = IndexDocumentBundle::TERMSFILTER_LEN;
@@ -284,47 +248,32 @@ class DocIterator extends IndexBundleIterator
                 || (!$is_ascending && $this->next_offset < 0)) {
                 break;
             }
-            if ($this->index_version < 3) {
-                $posting = L\packPosting($this->next_offset >> 4, [1]);
-                list($doc_id, $num_keys, $item) =
-                    $shard->makeItem($posting, $num_docs_or_links,
-                        $this->direction);
-            } else {
-                $doc_id = $doc_keys[$this->next_offset];
-                $map_entry = $doc_map[$doc_id];
-                // skip term filter if present
-                $map_entry = ($map_entry >= ($termsfilter_len + 1) &&
-                    $map_entry[0] == 't') ?
-                    substr($map_entry, $termsfilter_len + 1) :
-                    $map_entry;
-                $doc_info = $doc_map_tools->unpack($map_entry);
-                $item = [self::GENERATION => $this->current_generation];
-                $item[self::DOC_RANK] = $this->computeDocRank($doc_id,
-                    $this->next_offset, $this->current_generation,
-                    $this->num_generations, $this->last_offset,
-                    $this->last_offset, $this->last_offset,
-                    $this->ranking_factors, $is_ascending);
-                list($item[self::DOC_LEN], ) =
-                    array_values(array_shift($doc_info));
-                $item[self::SCORE] = $item[self::DOC_RANK];
-                list(, $num_description_scores) =
-                    array_values(array_shift($doc_info));
-                $item[self::DESCRIPTION_SCORES] = array_slice($doc_info, 0,
-                    $num_description_scores);
-                $item[self::USER_RANKS] = array_slice($doc_info,
-                    $num_description_scores);
-                $item[self::INDEX_VERSION] = $this->index_version;
-                $item[self::IS_DOC] = true;
-            }
+            $doc_id = $doc_keys[$this->next_offset];
+            $map_entry = $doc_map[$doc_id];
+            // skip term filter if present
+            $map_entry = ($map_entry >= ($termsfilter_len + 1) &&
+                $map_entry[0] == 't') ?
+                substr($map_entry, $termsfilter_len + 1) :
+                $map_entry;
+            $doc_info = $doc_map_tools->unpack($map_entry);
+            $item = [self::GENERATION => $this->current_generation];
+            $item[self::DOC_RANK] = $this->computeDocRank($doc_id,
+                $this->next_offset, $this->current_generation,
+                $this->num_generations, $this->last_offset,
+                $this->last_offset, $this->last_offset,
+                $this->ranking_factors, $is_ascending);
+            list($item[self::DOC_LEN], ) =
+                array_values(array_shift($doc_info));
+            $item[self::SCORE] = $item[self::DOC_RANK];
+            list(, $num_description_scores) =
+                array_values(array_shift($doc_info));
+            $item[self::DESCRIPTION_SCORES] = array_slice($doc_info, 0,
+                $num_description_scores);
+            $item[self::USER_RANKS] = array_slice($doc_info,
+                $num_description_scores);
+            $item[self::IS_DOC] = true;
             if ($is_ascending) {
-                if ($this->index_version < 3) {
-                    if ($num_keys % 2 == 0) {
-                        $num_keys++;
-                    }
-                    $this->next_offset += ($num_keys + 1) * $doc_offset_key_len;
-                } else {
-                    $this->next_offset++;
-                }
+                $this->next_offset++;
             } else {
                 $this->next_offset = $this->getPreviousDocOffset(
                     $this->next_offset);
@@ -361,10 +310,7 @@ class DocIterator extends IndexBundleIterator
      */
     public function getPreviousDocOffset($doc_offset)
     {
-        $doc_item_len = ($this->index_version < 3) ?
-            4 * IndexShard::DOC_KEY_LEN : 1;
-        // this is not correct, only works if no additions doc keys
-        return $doc_offset - $doc_item_len;
+        return $doc_offset - 1;
     }
     /**
      * Updates the seen_docs count during an advance() call
@@ -373,8 +319,7 @@ class DocIterator extends IndexBundleIterator
     {
         if ($this->current_block_fresh != true) {
             $is_ascending = ($this->direction == self::ASCENDING);
-            $doc_item_len = ($this->index_version < 3) ?
-                4 * IndexShard::DOC_KEY_LEN : 1;
+            $doc_item_len = 1;
             $pre_num_docs = ($is_ascending) ?
                 ($this->last_offset - $this->next_offset) / $doc_item_len :
                 $this->next_offset/$doc_item_len;
@@ -437,8 +382,7 @@ class DocIterator extends IndexBundleIterator
                     $this->next_offset = $this->current_offset;
                 }
             }
-            $this->seen_docs = $this->current_offset /
-                (($this->index_version < 3) ? 4 * IndexShard::DOC_KEY_LEN : 1);
+            $this->seen_docs = $this->current_offset;
         }
     }
     /**
diff --git a/src/library/index_bundle_iterators/GroupIterator.php b/src/library/index_bundle_iterators/GroupIterator.php
index 06d32b0fe..83581e3d0 100644
--- a/src/library/index_bundle_iterators/GroupIterator.php
+++ b/src/library/index_bundle_iterators/GroupIterator.php
@@ -343,24 +343,11 @@ class GroupIterator extends IndexBundleIterator
                 if (isset($doc_info[self::GENERATION])) {
                     $machine_id = (isset($doc_info[self::MACHINE_ID])) ?
                         $doc_info[self::MACHINE_ID] :$this->current_machine;
-                    if (!empty($doc_info[self::INDEX_VERSION])) {
-                        $out_pages[$hash_url][self::SUMMARY_OFFSET][] =
-                            [$machine_id, $doc_info[self::KEY],
-                                $doc_info[self::CRAWL_TIME],
-                                $doc_info[self::GENERATION],
-                                "PDB"];
-                    } else if (is_int($doc_info[self::SUMMARY_OFFSET])) {
-                        $out_pages[$hash_url][self::SUMMARY_OFFSET][] =
-                            [$machine_id, $doc_info[self::KEY],
-                                $doc_info[self::CRAWL_TIME],
-                                $doc_info[self::GENERATION],
-                                $doc_info[self::SUMMARY_OFFSET]];
-                    } else if (is_array($doc_info[self::SUMMARY_OFFSET])) {
-                        $out_pages[$hash_url][self::SUMMARY_OFFSET] =
-                            array_merge(
-                                $out_pages[$hash_url][self::SUMMARY_OFFSET],
-                                $doc_info[self::SUMMARY_OFFSET]);
-                    }
+                    $out_pages[$hash_url][self::SUMMARY_OFFSET][] =
+                        [$machine_id, $doc_info[self::KEY],
+                            $doc_info[self::CRAWL_TIME],
+                            $doc_info[self::GENERATION],
+                            "PDB"];
                 }
             }
             if ($add_lookup) {
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index 11d6bbd22..a1dcb9bd0 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -106,13 +106,6 @@ class WordIterator extends IndexBundleIterator
      * @var string
      */
     public $index_name;
-    /**
-     * The index version affects how the iterator cycles through documents
-     * There was a big change in index format between version 3 and prior
-     * formats
-     * @var int
-     */
-    public $index_version;
     /**
      * Whether word key corresponds to a meta word
      * @var string
@@ -280,14 +273,9 @@ class WordIterator extends IndexBundleIterator
             $info = ($this->direction == self::ASCENDING) ?
                 $this->dictionary_info[0] : $this->dictionary_info[
                 $this->num_generations - 1];
-            if ($this->index_version < 3) {
-                list($this->current_generation, $this->start_offset,
-                    $this->last_offset, ) = $info;
-            } else {
-                $this->current_generation = $info['PARTITION'];
-                $this->start_offset = 0;
-                $this->last_offset = $info['NUM_DOCS'] - 1;
-            }
+            $this->current_generation = $info['PARTITION'];
+            $this->start_offset = 0;
+            $this->last_offset = $info['NUM_DOCS'] - 1;
         } else {
             $this->start_offset = 0;
             $this->last_offset = -1;
@@ -320,7 +308,6 @@ class WordIterator extends IndexBundleIterator
         if (!empty($this->term_info_computed)) {
             return;
         }
-        $this->index_version = IndexManager::getVersion($index_name);
         $word_info = IndexManager::getWordInfo($index_name, $word_key, -1, -1,
             C\NUM_DISTINCT_GENERATIONS, true);
         $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0;
@@ -414,16 +401,6 @@ class WordIterator extends IndexBundleIterator
     public function getPostingsSliceResults()
     {
         $this->next_offset = $this->current_offset;
-        if ($this->index_version < 3) {
-            $index = IndexManager::getIndex($this->index_name);
-            $index->setCurrentShard($this->current_generation, true);
-            //the next call also updates next offset
-            $shard = $index->getCurrentShard(true);
-            $pre_results = $shard->getPostingsSlice($this->start_offset,
-                $this->next_offset, $this->last_offset,
-                $this->results_per_block, $this->direction);
-            return $pre_results;
-        }
         if ($this->direction == self::ASCENDING) {
             if ($this->current_offset < $this->start_offset) {
                 $this->current_offset = $this->start_offset;
@@ -699,7 +676,6 @@ class WordIterator extends IndexBundleIterator
                 $posting[self::RELEVANCE];
             $posting[self::USER_RANKS] = array_slice($doc_info,
                 $num_description_scores);
-            $posting[self::INDEX_VERSION] = $this->index_version;
             $key_postings[$doc_key] = $posting;
         }
         if (!empty($fh)) {
@@ -809,41 +785,20 @@ class WordIterator extends IndexBundleIterator
      */
     public function advanceSeenDocs()
     {
-        $version = $this->index_version;
         if ($this->current_block_fresh != true) {
             if ($this->direction == self::ASCENDING) {
-                $remaining_postings = ($version < 3) ?
-                    IndexShard::numDocsOrLinks(
-                    $this->next_offset, $this->last_offset) :
-                    $this->last_offset - $this->next_offset;
+                $remaining_postings = $this->last_offset - $this->next_offset;
                 $num_docs = min($this->results_per_block, $remaining_postings);
                 $delta_sign = 1;
             } else {
-                if ($version < 3) {
-                    $total_guess = IndexShard::numDocsOrLinks(
-                        $this->start_offset, $this->next_offset);
-                    $num_docs = $total_guess % $this->results_per_block;
-                    if ($num_docs == 0) {
-                        $num_docs = $this->results_per_block;
-                    } else {
-                        $num_docs = IndexShard::numDocsOrLinks(
-                            $this->start_offset, $this->last_offset) %
-                            $this->results_per_block;
-                        if ($num_docs == 0) {
-                            $num_docs = $this->results_per_block;
-                        }
-                    }
-                } else {
-                    $remaining_postings = $this->next_offset -
-                        $this->start_offset + 1;
-                    $num_docs = min($this->results_per_block,
-                        $remaining_postings);
-                }
+                $remaining_postings = $this->next_offset -
+                    $this->start_offset + 1;
+                $num_docs = min($this->results_per_block,
+                    $remaining_postings);
                 $delta_sign = -1;
             }
-            $posting_len = ($version < 3) ? IndexShard::POSTING_LEN : 1;
             $this->next_offset = $this->current_offset;
-            $this->next_offset += $delta_sign * $posting_len * $num_docs;
+            $this->next_offset += $delta_sign * $num_docs;
             if ($num_docs <= 0) {
                 return;
             }
@@ -881,22 +836,9 @@ class WordIterator extends IndexBundleIterator
             $this->advanceGeneration($gen_doc_offset[0]);
             $this->next_offset = $this->current_offset;
         }
-        if ($this->index_version < 3) {
-            $index = IndexManager::getIndex($this->index_name);
-            $index->setCurrentShard($this->current_generation, true);
-            $shard = $index->getCurrentShard();
-        }
         if ($this->current_generation == $gen_doc_offset[0]) {
-            if ($this->index_version < 3) {
-                $end_offset = ($is_ascending) ? $this->last_offset :
-                    $this->start_offset;
-                $offset_pair = $shard->nextPostingOffsetDocOffset(
-                    $this->next_offset, $end_offset, $gen_doc_offset[1],
-                    $this->direction);
-            } else {
-                $offset_pair = $this->nextDocIndexOffsetPair(
-                    $gen_doc_offset[1]);
-            }
+            $offset_pair = $this->nextDocIndexOffsetPair(
+                $gen_doc_offset[1]);
             if ($offset_pair === false) {
                 $this->advanceGeneration();
                 $this->next_offset = $this->current_offset;
@@ -906,13 +848,10 @@ class WordIterator extends IndexBundleIterator
                 $this->next_offset = $this->current_offset;
             }
         }
-        $posting_len = ($this->index_version < 3) ? IndexShard::POSTING_LEN : 1;
         if ($is_ascending) {
-            $this->seen_docs = ($this->current_offset - $this->start_offset) /
-                $posting_len;
+            $this->seen_docs = ($this->current_offset - $this->start_offset);
         } else {
-            $this->seen_docs = ($this->last_offset - $this->current_offset) /
-                $posting_len;
+            $this->seen_docs = ($this->last_offset - $this->current_offset);
         }
         $this->current_block_fresh = false;
     }
@@ -1029,17 +968,11 @@ class WordIterator extends IndexBundleIterator
                 $this->generation_pointer < $this->num_generations :
                 $this->generation_pointer >= 0;
             if ($gen_check) {
-                if ($this->index_version < 3) {
-                    list($this->current_generation, $this->start_offset,
-                        $this->last_offset, )
-                        = $this->dictionary_info[$this->generation_pointer];
-                } else {
-                    $partition_info =
-                        $this->dictionary_info[$this->generation_pointer];
-                    $this->current_generation = $partition_info['PARTITION'];
-                    $this->start_offset = 0;
-                    $this->last_offset = ($partition_info['NUM_DOCS'] ?? 1) - 1;
-                }
+                $partition_info =
+                    $this->dictionary_info[$this->generation_pointer];
+                $this->current_generation = $partition_info['PARTITION'];
+                $this->start_offset = 0;
+                $this->last_offset = ($partition_info['NUM_DOCS'] ?? 1) - 1;
                 $this->current_offset = ($is_ascending) ? $this->start_offset:
                     $this->last_offset;
             }
@@ -1061,8 +994,7 @@ class WordIterator extends IndexBundleIterator
     public function getGenerationPostings($generation)
     {
         static $test_time = 0;
-        if ($this->index_version < 3 ||
-            empty($this->dictionary_info[$generation])) {
+        if (empty($this->dictionary_info[$generation])) {
             return [];
         }
         $generation_info = $this->dictionary_info[$generation];
@@ -1071,24 +1003,13 @@ class WordIterator extends IndexBundleIterator
             return $generation_info['POSTINGS']; //already loaded
         }
         $index = IndexManager::getIndex($this->index_name);
-        if ($this->index_version < "3.2") {
-            if (empty($generation_info['LAST_BLOB_LEN'])) {
-                $postings_entry = "";
-            } else {
-                $postings_entry = $index->dictionary->getArchive(
-                    $this->archive_file, $generation_info['POSTINGS'],
-                    $generation_info['LAST_BLOB_LEN']);
-                unset($this->dictionary_info[$generation]['LAST_BLOB_LEN']);
-            }
+        if (empty($generation_info['POSTINGS_OFFSET']) ||
+            empty($generation_info['POSTINGS_LEN'])) {
+            $postings_entry = "";
         } else {
-            if (empty($generation_info['POSTINGS_OFFSET']) ||
-                empty($generation_info['POSTINGS_LEN'])) {
-                $postings_entry = "";
-            } else {
-                $postings_entry = $index->getPostingsString($generation,
-                    $generation_info['POSTINGS_OFFSET'],
-                    $generation_info['POSTINGS_LEN']);
-            }
+            $postings_entry = $index->getPostingsString($generation,
+                $generation_info['POSTINGS_OFFSET'],
+                $generation_info['POSTINGS_LEN']);
         }
         $postings = [];
         if (!empty($postings_entry)) {
@@ -1118,15 +1039,8 @@ class WordIterator extends IndexBundleIterator
             $this->generation_pointer >= $this->num_generations) :
             ($this->current_offset < $this->start_offset||
             $this->generation_pointer < -1);
-        if ($offset_check) {
-            return -1;
-        }
-        if ($this->index_version < 3) {
-            $index = IndexManager::getIndex($this->index_name);
-            $index->setCurrentShard($this->current_generation, true);
-            $this->current_doc_offset = $index->getCurrentShard(
-                )->docOffsetFromPostingOffset($this->current_offset);
-        } else if (empty($this->dictionary_info[$this->generation_pointer])){
+        if ($offset_check ||
+            empty($this->dictionary_info[$this->generation_pointer])) {
             return -1;
         } else {
             $partition_info = $this->dictionary_info[$this->generation_pointer];
diff --git a/src/models/ParallelModel.php b/src/models/ParallelModel.php
index 5066c23eb..4067f56c8 100755
--- a/src/models/ParallelModel.php
+++ b/src/models/ParallelModel.php
@@ -286,18 +286,13 @@ class ParallelModel extends Model
                 } else {
                     return false;
                 }
-                if (IndexManager::getVersion($index_name) < 3) {
-                    $summary =
-                        $index_archive->getPage($summary_offset, $generation);
-                } else {
-                    $summary =
-                        $index_archive->getSummary($summary_offset,
+                $summary =
+                    $index_archive->getSummary($summary_offset,
+                    $generation);
+                if ($return_cached_page) {
+                    $summary[self::PAGE] =
+                        $index_archive->getCachePage($summary_offset,
                         $generation);
-                    if ($return_cached_page) {
-                        $summary[self::PAGE] =
-                            $index_archive->getCachePage($summary_offset,
-                            $generation);
-                    }
                 }
             } else {
                 $test_time = microtime(true);
@@ -426,21 +421,12 @@ class ParallelModel extends Model
             $index_name = $this->index_name;
         }
         $index_archive = IndexManager::getIndex($index_name);
-        $index_version = IndexManager::getVersion($index_name);
-        $make_term_id = ($index_version < 3) ? C\NS_LIB . "crawlHashWord" :
-            C\NS_LIB . "canonicalTerm";
+        $make_term_id = C\NS_LIB . "canonicalTerm";
         if (!$index_archive) {
             return false;
         }
         $num_retrieved = 0;
         $summary_offset = null;
-        if ($index_version < 3 &&
-            !isset($index_archive->generation_info['ACTIVE'])) {
-            return false;
-        }
-        if ($index_version < 3) {
-            $num_generations = $index_archive->generation_info['ACTIVE'];
-        }
         $add_info = (strncmp($url_or_key, "info:", 5) == 0) ? "" :
             "info:";
         $hash_key = ($is_key) ? $make_term_id($url_or_key, true) :
@@ -452,7 +438,7 @@ class ParallelModel extends Model
         if (!isset($info[0][4]) && empty($info['ROWS'][0])) {
             return false;
         }
-        $term_id = ($index_version < 3) ? $info[0][4] : $hash_key;
+        $term_id = $hash_key;
         if (!empty($info['ROWS'][0])) {
             $generation = $info['ROWS'][0]['PARTITION'];
         }
@@ -462,8 +448,7 @@ class ParallelModel extends Model
             if (!$doc_info) {
                 return false;
             }
-            $summary_offset = ($index_version < 3) ?
-                $doc_info[self::SUMMARY_OFFSET] : $doc_info[self::KEY];
+            $summary_offset = $doc_info[self::KEY];
             $generation = $doc_info[self::GENERATION];
         } else {
             return false;
diff --git a/src/models/PhraseModel.php b/src/models/PhraseModel.php
index afdce9287..d7c00cb2c 100755
--- a/src/models/PhraseModel.php
+++ b/src/models/PhraseModel.php
@@ -749,7 +749,6 @@ class PhraseModel extends ParallelModel
             $quote_state = ($quote_state) ? false : true;
         }
         //stemmed, if have stemmer
-        $index_version = IndexManager::getVersion($index_name);
         $add_metas = $found_metas;
         $words = array_merge($base_words, $add_metas);
         if (count($words) == 0 && count($disallow_phrases) > 0) {
@@ -783,8 +782,7 @@ class PhraseModel extends ParallelModel
                 $this->query_info['QUERY'] .= ")<br>";
             }
         }
-        $make_term_id = ($index_version < 3) ? C\NS_LIB . "crawlHashWord" :
-            C\NS_LIB . "canonicalTerm";
+        $make_term_id = C\NS_LIB . "canonicalTerm";
         if (isset($words) && count($words) == 1 &&
             count($disallow_phrases) < 1 && !strpos($words[0], " ")) {
             $phrase_string = $words[0];
@@ -825,9 +823,6 @@ class PhraseModel extends ParallelModel
                         "<br>";
                 }
                 $disallow_keys[] = $make_term_id($disallow_stem[0]);
-                if ($index_version == 0) {
-                    $disallow_keys[] = L\crawlHash($word);
-                }
             }
             if ($word_keys !== null) {
                 $word_struct = ["KEYS" => $word_keys,
@@ -1852,9 +1847,7 @@ class PhraseModel extends ParallelModel
                 $index_name = $word_structs[0]["INDEX_NAME"];
             }
             //we assume all indexes in use of the same version
-            $index_version = IndexManager::getVersion($index_name);
-            $make_term_id = ($index_version < 3) ? C\NS_LIB . "crawlHashWord" :
-                C\NS_LIB . "canonicalTerm";
+            $make_term_id = C\NS_LIB . "canonicalTerm";
             $doc_iterate_hashes = [substr($make_term_id("site:any"), 0, 9),
                 substr(L\crawlHash("site:any"), 0, 9),
                 substr($make_term_id("site:doc"), 0, 9),
@@ -1924,8 +1917,7 @@ class PhraseModel extends ParallelModel
                         $min_group_override = true;
                     } else {
                         $distinct_key = $distinct_word_keys[$i];
-                        $distinct_key_id = ($index_version < 3) ?
-                            L\unbase64Hash($distinct_key) : $distinct_key;
+                        $distinct_key_id = $distinct_key;
                         $direction = self::ASCENDING;
                         $actual_index_name = $index_name;
                         if (($index_name[0] == "-")) {
ViewGit