Remove support for old formats in getWordInfo

Chris Pollett [2024-01-17 23:Jan:th]
Remove support for old formats in getWordInfo
Filename
src/executables/ArcTool.php
src/library/IndexManager.php
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index ab72c1c01..0ee914edc 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -422,28 +422,22 @@ class ArcTool extends DictionaryUpdater implements CrawlConstants
             exit();
         }
         $found = true;
-        echo "B+-tree node file name: ". $info['ARCHIVE_FILE'] . "\n";
         echo "\nBundle Dictionary Entries for '$word':\n";
         echo "====================================\n";
         $i = 0;
-        $archive_file = $info['ARCHIVE_FILE'];
-        $is_old_index = $index->archive_info['VERSION'] < "3.2";
+        if ($index->archive_info['VERSION'] < "4.0") {
+            echo "Cannot compute results for non-migrated indexes.\n";
+            echo "Please use ArcTool's migrate command to upgrade index\n";
+        }
         foreach ($info['ROWS'] as $record) {
             if ($start_record < 0 || $record['PARTITION'] >= $start_record) {
                 echo "RECORD: $i\n";
                 echo "PARTITION: {$record['PARTITION']}\n";
                 echo "NUMBER OF DOCS: {$record['NUM_DOCS']}\n\n";
-                if ($is_old_index) {
-                    $postings_offset = (empty($record['POSTINGS'])) ?
-                        -1: $record['POSTINGS'];
-                    $postings_len = (empty($record['LAST_BLOB_LEN'])) ?
-                        -1 : $record['LAST_BLOB_LEN'];
-                } else {
-                    $postings_offset = (empty($record['POSTINGS_OFFSET'])) ?
-                        -1: $record['POSTINGS_OFFSET'];
-                    $postings_len = (empty($record['POSTINGS_LEN']))?
-                        -1 : $record['POSTINGS_LEN'];
-                }
+                $postings_offset = (empty($record['POSTINGS_OFFSET'])) ?
+                    -1: $record['POSTINGS_OFFSET'];
+                $postings_len = (empty($record['POSTINGS_LEN']))?
+                    -1 : $record['POSTINGS_LEN'];
                 $is_postings_array = isset($record['POSTINGS']) &&
                     is_array($record['POSTINGS']);
                 if ($postings_offset == -1 && !$is_postings_array) {
@@ -454,15 +448,9 @@ class ArcTool extends DictionaryUpdater implements CrawlConstants
                     echo "By default list details of all postings\n";
                     var_dump($record['POSTINGS']);
                 } else if ($details) {
-                    if ($is_old_index) {
-                        $postings_entry = $index->dictionary->getArchive(
-                            $archive_file, $postings_offset,
-                            $postings_len);
-                    } else {
-                        $postings_entry = $index->getPostingsString(
-                            $record['PARTITION'], $postings_offset,
-                            $postings_len);
-                    }
+                    $postings_entry = $index->getPostingsString(
+                        $record['PARTITION'], $postings_offset,
+                        $postings_len);
                     $postings = $index->postings_tools->unpack($postings_entry);
                     $index->deDeltaPostingsSumFrequencies($postings);
                     var_dump($postings);
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index 03091fca5..56a23aecb 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -287,64 +287,11 @@ class IndexManager implements CrawlConstants
         }
         $index = self::getIndex($index_name);
         $pre_info = [];
-        if (!empty($index) && method_exists($index, "getWordInfo")) {
-            $start_generation = ($start_generation < 0) ? 0 : $start_generation;
-            $info_cache[$lookup_hash] = $index->getWordInfo($term_id,
-                $threshold, $start_generation, $num_distinct_generations,
-                $with_remaining_total);
-            if (count($info_cache) >= self::INFO_CACHE_SIZE) {
-                array_shift($info_cache);
-            }
-            return $info_cache[$lookup_hash];
-        } else if (!empty($index->dictionary)) {
-            $pre_info =
-                $index->dictionary->getWordInfo($term_id, true, $threshold,
-                $start_generation, $num_distinct_generations, true);
-        }
-        $last_desired_generation = $start_generation +
-            $num_distinct_generations;
-        if (isset($index->generation_info['ACTIVE'])) {
-            $active_generation = $index->generation_info['ACTIVE'];
-            if ((empty($index->generation_info['LAST_DICTIONARY_SHARD']) ||
-                $index->generation_info['LAST_DICTIONARY_SHARD'] <
-                $active_generation) && ($active_generation <
-                $last_desired_generation || $last_desired_generation < 0)) {
-                $active_shard_file = $index->dir_name .
-                    "/posting_doc_shards/index" . $active_generation;
-                if (file_exists($active_shard_file)) {
-                    if (!empty($index->non_merged_shard) &&
-                        !empty($index->non_merged_generation) &&
-                        $index->non_merged_generation == $active_generation) {
-                        $active_shard = $index->non_merged_shard;
-                    } else {
-                        $active_shard = new IndexShard($active_shard_file, 0,
-                            C\NUM_DOCS_PER_PARTITION, true);
-                        $index->non_merged_shard = $active_shard;
-                        $index->non_merged_generation = $active_generation;
-                    }
-                    $active_info = $active_shard->getWordInfo($term_id, true);
-                    if (is_array($active_info)) {
-                        if (empty($pre_info)) {
-                            $pre_info[0] = 0;
-                            $pre_info[1] = [];
-                        }
-                        $pre_info[1][] = [$active_generation,
-                            $active_info[0], $active_info[1], $active_info[2],
-                            $active_info[3]];
-                        $pre_info[0] += $active_info[2];
-                    }
-                }
-            }
-        }
-        if (!empty($pre_info[1])) {
-            list($total, $info) = $pre_info;
-        } else {
-            $total = 0;
-            $info = [];
-        }
-        $info_cache[$lookup_hash] = ($with_remaining_total) ?
-            [$total, $info] : $info;
-        if (count($info_cache) > self::INFO_CACHE_SIZE) {
+        $start_generation = ($start_generation < 0) ? 0 : $start_generation;
+        $info_cache[$lookup_hash] = $index->getWordInfo($term_id,
+            $threshold, $start_generation, $num_distinct_generations,
+            $with_remaining_total);
+        if (count($info_cache) >= self::INFO_CACHE_SIZE) {
             array_shift($info_cache);
         }
         return $info_cache[$lookup_hash];
ViewGit