Remove support for old formats in getWordInfo
Remove support for old formats in getWordInfo
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index ab72c1c01..0ee914edc 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -422,28 +422,22 @@ class ArcTool extends DictionaryUpdater implements CrawlConstants
exit();
}
$found = true;
- echo "B+-tree node file name: ". $info['ARCHIVE_FILE'] . "\n";
echo "\nBundle Dictionary Entries for '$word':\n";
echo "====================================\n";
$i = 0;
- $archive_file = $info['ARCHIVE_FILE'];
- $is_old_index = $index->archive_info['VERSION'] < "3.2";
+ if ($index->archive_info['VERSION'] < "4.0") {
+ echo "Cannot compute results for non-migrated indexes.\n";
+ echo "Please use ArcTool's migrate command to upgrade index\n";
+ }
foreach ($info['ROWS'] as $record) {
if ($start_record < 0 || $record['PARTITION'] >= $start_record) {
echo "RECORD: $i\n";
echo "PARTITION: {$record['PARTITION']}\n";
echo "NUMBER OF DOCS: {$record['NUM_DOCS']}\n\n";
- if ($is_old_index) {
- $postings_offset = (empty($record['POSTINGS'])) ?
- -1: $record['POSTINGS'];
- $postings_len = (empty($record['LAST_BLOB_LEN'])) ?
- -1 : $record['LAST_BLOB_LEN'];
- } else {
- $postings_offset = (empty($record['POSTINGS_OFFSET'])) ?
- -1: $record['POSTINGS_OFFSET'];
- $postings_len = (empty($record['POSTINGS_LEN']))?
- -1 : $record['POSTINGS_LEN'];
- }
+ $postings_offset = (empty($record['POSTINGS_OFFSET'])) ?
+ -1: $record['POSTINGS_OFFSET'];
+ $postings_len = (empty($record['POSTINGS_LEN']))?
+ -1 : $record['POSTINGS_LEN'];
$is_postings_array = isset($record['POSTINGS']) &&
is_array($record['POSTINGS']);
if ($postings_offset == -1 && !$is_postings_array) {
@@ -454,15 +448,9 @@ class ArcTool extends DictionaryUpdater implements CrawlConstants
echo "By default list details of all postings\n";
var_dump($record['POSTINGS']);
} else if ($details) {
- if ($is_old_index) {
- $postings_entry = $index->dictionary->getArchive(
- $archive_file, $postings_offset,
- $postings_len);
- } else {
- $postings_entry = $index->getPostingsString(
- $record['PARTITION'], $postings_offset,
- $postings_len);
- }
+ $postings_entry = $index->getPostingsString(
+ $record['PARTITION'], $postings_offset,
+ $postings_len);
$postings = $index->postings_tools->unpack($postings_entry);
$index->deDeltaPostingsSumFrequencies($postings);
var_dump($postings);
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index 03091fca5..56a23aecb 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -287,64 +287,11 @@ class IndexManager implements CrawlConstants
}
$index = self::getIndex($index_name);
$pre_info = [];
- if (!empty($index) && method_exists($index, "getWordInfo")) {
- $start_generation = ($start_generation < 0) ? 0 : $start_generation;
- $info_cache[$lookup_hash] = $index->getWordInfo($term_id,
- $threshold, $start_generation, $num_distinct_generations,
- $with_remaining_total);
- if (count($info_cache) >= self::INFO_CACHE_SIZE) {
- array_shift($info_cache);
- }
- return $info_cache[$lookup_hash];
- } else if (!empty($index->dictionary)) {
- $pre_info =
- $index->dictionary->getWordInfo($term_id, true, $threshold,
- $start_generation, $num_distinct_generations, true);
- }
- $last_desired_generation = $start_generation +
- $num_distinct_generations;
- if (isset($index->generation_info['ACTIVE'])) {
- $active_generation = $index->generation_info['ACTIVE'];
- if ((empty($index->generation_info['LAST_DICTIONARY_SHARD']) ||
- $index->generation_info['LAST_DICTIONARY_SHARD'] <
- $active_generation) && ($active_generation <
- $last_desired_generation || $last_desired_generation < 0)) {
- $active_shard_file = $index->dir_name .
- "/posting_doc_shards/index" . $active_generation;
- if (file_exists($active_shard_file)) {
- if (!empty($index->non_merged_shard) &&
- !empty($index->non_merged_generation) &&
- $index->non_merged_generation == $active_generation) {
- $active_shard = $index->non_merged_shard;
- } else {
- $active_shard = new IndexShard($active_shard_file, 0,
- C\NUM_DOCS_PER_PARTITION, true);
- $index->non_merged_shard = $active_shard;
- $index->non_merged_generation = $active_generation;
- }
- $active_info = $active_shard->getWordInfo($term_id, true);
- if (is_array($active_info)) {
- if (empty($pre_info)) {
- $pre_info[0] = 0;
- $pre_info[1] = [];
- }
- $pre_info[1][] = [$active_generation,
- $active_info[0], $active_info[1], $active_info[2],
- $active_info[3]];
- $pre_info[0] += $active_info[2];
- }
- }
- }
- }
- if (!empty($pre_info[1])) {
- list($total, $info) = $pre_info;
- } else {
- $total = 0;
- $info = [];
- }
- $info_cache[$lookup_hash] = ($with_remaining_total) ?
- [$total, $info] : $info;
- if (count($info_cache) > self::INFO_CACHE_SIZE) {
+ $start_generation = ($start_generation < 0) ? 0 : $start_generation;
+ $info_cache[$lookup_hash] = $index->getWordInfo($term_id,
+ $threshold, $start_generation, $num_distinct_generations,
+ $with_remaining_total);
+ if (count($info_cache) >= self::INFO_CACHE_SIZE) {
array_shift($info_cache);
}
return $info_cache[$lookup_hash];