<?php /** * SeekQuarry/Yioop -- * Open Source Pure PHP Search Engine, Crawler, and Indexer * * Copyright (C) 2009 - 2021 Chris Pollett chris@pollett.org * * LICENSE: * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * END LICENSE * * @author Chris Pollett chris@pollett.org * @license https://www.gnu.org/licenses/ GPL3 * @link https://www.seekquarry.com/ * @copyright 2009 - 2021 * @filesource */ namespace seekquarry\yioop\library\index_bundle_iterators; use seekquarry\yioop\configs as C; use seekquarry\yioop\library as L; use seekquarry\yioop\library\IndexShard; use seekquarry\yioop\library\IndexDocumentBundle; use seekquarry\yioop\library\IndexManager; use seekquarry\yioop\library\PartitionDocumentBundle; /** * Used to iterate through the documents associated with a word in * an IndexArchiveBundle. It also makes it easy to get the summaries * of these documents. * * A description of how words and the documents containing them are stored * is given in the documentation of IndexArchiveBundle. * * @author Chris Pollett * @see IndexArchiveBundle */ class WordIterator extends IndexBundleIterator { /** */ const DOC_RANK_WEIGHT = 50; /** * Host Key position + 1 (first char says doc, inlink or external link) */ const HOST_KEY_POS = 17; /** * Length of a doc key part */ const KEY_LEN = 8; /** * Word key above in our modified base 64 encoding * @var string */ public $base64_word_key; /** * The current value of the doc_offset of current posting if known * @var int */ public $current_doc_offset; /** * Numeric number of current shard * @var int */ public $current_generation; /** * The current byte offset in the IndexShard (if older index) * @var int */ public $current_offset; /** * An array of shard generation and posting list offsets, lengths, and * numbers of documents * @var array */ public $dictionary_info; /** * Keeps track of whether the word_iterator list is empty because the * word does not appear in the index shard * @var int */ public $empty; /** * Model responsible for keeping track of edited and deleted search results * @var SearchfiltersModel */ public $filter; /** * Index into dictionary_info corresponding to the current shard * @var int */ public $generation_pointer; /** * The timestamp of the index is associated with this iterator * @var string */ public $index_name; /** * */ public $index_version; /** * Whether word key corresponds to a meta word * @var string */ public $is_meta; /** * Last Offset of word occurrence in the IndexShard * @var int */ public $last_offset; /** * The next byte offset in the IndexShard * @var int */ public $next_offset; /** * Used to keep track of whether getWordInfo might still get more * data on the search terms as advance generations * @var bool */ public $no_more_generations; /** * The total number of shards that have data for this word * @var int */ public $num_generations; /** * First shard generation that word info was obtained for * @var int */ public $start_generation; /** * Starting Offset of word occurrence in the IndexShard * @var int */ public $start_offset; /** * hash of word or phrase that the iterator iterates over * @var string */ public $word_key; /** * Creates a word iterator with the given parameters. * * @param string $word_key hash of word or phrase to iterate docs of * @param string $index_name time_stamp of the to use * @param bool $raw whether the $word_key is our variant of base64 encoded * @param SearchfiltersModel $filter Model responsible for keeping track * of edited and deleted search results * @param int $results_per_block the maximum number of results that can * be returned by a findDocsWithWord call * @param int $direction when results are access from $index_name in * which order they should be presented. self::ASCENDING is from first * added to last added, self::DESCENDING is from last added to first * added. Note: this value is not saved permanently. So you * could in theory open two read only versions of the same bundle but * reading the results in different directions */ public function __construct($word_key, $index_name, $raw = false, $filter = null, $results_per_block = IndexBundleIterator::RESULTS_PER_BLOCK, $direction=self::ASCENDING) { if ($raw == false) { //get rid of our modified base64 encoding $word_key = L\unbase64Hash($word_key); } $this->is_meta = (strpos(substr($word_key, 9), ":") !== false); $this->direction = $direction; $this->filter = $filter; $this->word_key = $word_key; $this->base64_word_key = L\base64Hash($word_key); $this->index_name = $index_name; $this->termInfoIteratorFields($index_name, $word_key); $this->current_doc_offset = null; $this->results_per_block = $results_per_block; $this->current_block_fresh = false; $this->start_generation = ($direction == self::ASCENDING) ? 0 : "ACTIVE"; if (!$this->empty) { $this->reset(true); } } /** * Returns CrawlConstants::ASCENDING or CrawlConstants::DESCENDING * depending on the direction in which this iterator ttraverse the * underlying index archive bundle. * * @return int direction traversing underlying archive bundle */ public function getDirection() { return $this->direction; } /** * Resets the iterator to the first document block that it could iterate * over */ public function reset($skip_recompute_field = false) { if (!$this->empty) {//we shouldn't be called when empty - but to be safe if ($skip_recompute_field) { $this->termInfoIteratorFields($this->index_name, $this->word_key); } $info = ($this->direction == self::ASCENDING) ? $this->dictionary_info[0] : $this->dictionary_info[ $this->num_generations - 1]; if ($this->index_version < 3) { list($this->current_generation, $this->start_offset, $this->last_offset, ) = $info; } else { $this->current_generation = $info['PARTITION']; $this->start_offset = 0; $this->last_offset = $info['NUM_DOCS'] - 1; } } else { $this->start_offset = 0; $this->last_offset = -1; $this->num_generations = -1; } if ($this->direction == self::ASCENDING) { $this->current_offset = $this->start_offset; $this->generation_pointer = 0; } else { $this->current_offset = $this->last_offset; /* reset pointer to the number of gens, which in reverse is the first one we want */ $this->generation_pointer = $this->num_generations - 1; } $this->count_block = 0; $this->seen_docs = 0; $this->current_doc_offset = null; } protected function termInfoIteratorFields($index_name, $word_key) { $this->index_version = IndexManager::getVersion($index_name); $word_info = IndexManager::getWordInfo($index_name, $word_key, -1, -1, C\NUM_DISTINCT_GENERATIONS, true); if ($this->index_version < 3) { list($this->num_docs, $this->dictionary_info) = $word_info; } else { $this->total_num_docs = $word_info['TOTAL_NUM_DOCS'] ?? 0; $this->total_num_docs_and_links = $word_info['TOTAL_NUM_LINKS_AND_DOCS'] ?? 0; $this->max_items_per_partition = $word_info['MAX_ITEMS_PER_PARTITION'] ?? PartitionDocumentBundle::MAX_ITEMS_PER_FILE; $this->total_number_of_partitions = $word_info['TOTAL_NUMBER_OF_PARTITIONS'] ?? 0; $this->num_docs = $word_info['TOTAL_COUNT'] ?? 0; $this->num_occurrences = $word_info['TOTAL_OCCURRENCES'] ?? 0; $this->dictionary_info = $word_info['ROWS'] ?? []; $this->threshold_exceeded = $word_info['THESHOLD_EXCEEDED'] ?? false; $this->archive_file = $word_info['ARCHIVE_FILE'] ?? ""; } if (empty($this->dictionary_info)) { $this->empty = true; $this->num_generations = 0; } else { if ($this->index_version < 3) { ksort($this->dictionary_info); $this->dictionary_info = array_values($this->dictionary_info); } $this->num_generations = count($this->dictionary_info); $this->empty = ($this->num_generations == 0); } $this->no_more_generations = ($this->num_generations < C\NUM_DISTINCT_GENERATIONS); } /** * Hook function used by currentDocsWithWord to return the current block * of docs if it is not cached * * @return mixed doc ids and score if there are docs left, -1 otherwise */ public function findDocsWithWord() { if ($this->empty) { return -1; } $ascending = ($this->direction == self::ASCENDING); if ($ascending) { if (($this->generation_pointer >= $this->num_generations) || $this->generation_pointer == $this->num_generations - 1 && $this->current_offset > $this->last_offset) { return -1; } } else { if (($this->generation_pointer < 0) || ($this->generation_pointer == 0 && $this->current_offset < $this->start_offset)) { return -1; } } $pre_results = []; if (!$this->empty) { $pre_results = $this->getPostingsSliceResults(); } $results = []; $doc_key_len = self::KEY_LEN; foreach ($pre_results as $keys => $data) { $host_key = substr($keys, self::HOST_KEY_POS, self::KEY_LEN); if (!empty($this->filter) && $this->filter->isFiltered($host_key)) { continue; } // inlinks is the domain of the inlink $key_parts = str_split($keys, $doc_key_len); $data[self::KEY] = $keys; if (isset($key_parts[2])) { list(, $data[self::HASH], $data[self::INLINKS]) = $key_parts; } else { continue; } $data[self::CRAWL_TIME] = $this->index_name; $results[$keys] = $data; } $this->count_block = count($results); if ($this->generation_pointer == $this->num_generations - 1 && empty($pre_results)) { $results = -1; } $this->pages = $results; return $results; } /** * */ public function getPostingsSliceResults() { $this->next_offset = $this->current_offset; $index = IndexManager::getIndex($this->index_name); if ($this->index_version < 3) { $index->setCurrentShard($this->current_generation, true); //the next call also updates next offset $shard = $index->getCurrentShard(true); $pre_results = $shard->getPostingsSlice($this->start_offset, $this->next_offset, $this->last_offset, $this->results_per_block, $this->direction); return $pre_results; } if ($this->direction == self::ASCENDING) { if ($this->current_offset < $this->start_offset) { $this->current_offset = $this->start_offset; $this->next_offset = $this->current_offset; } if ($this->next_offset > $this->last_offset) { return []; } $start_slice = $this->next_offset; $num_slice = min($this->results_per_block, $this->last_offset - $this->next_offset + 1); $this->next_offset += $num_slice; } else { if ($this->current_offset > $this->last_offset) { $this->current_offset = $this->last_offset; $this->next_offset = $this->current_offset; } if ($this->next_offset < $this->start_offset) { return []; } $num_slice = max($this->results_per_block, $this->start_offset); $this->next_offset -= $num_slice; $start_slice = $this->next_offset + 1; } $postings = $this->getGenerationPostings($this->generation_pointer); $postings = array_slice($postings, $start_slice, $num_slice); $key_postings = $this->getDocKeyPositionsScoringInfo($postings, $this->current_generation); return $key_postings; } /** * */ public function getDocKeyPositionsScoringInfo($postings, $partition) { $key_postings = []; $index = IndexManager::getIndex($this->index_name); $base_folder = $index->getPartitionBaseFolder($partition); $doc_key_len = IndexDocumentBundle::DOCID_PART_LEN; $two_key_len = 2 * $doc_key_len; $doc_map_filename = $base_folder . "/" . IndexDocumentBundle::DOC_MAP_FILENAME; $doc_map_tools = $index->doc_map_tools; $doc_map = $doc_map_tools->load($doc_map_filename); $doc_keys = array_keys($doc_map); $positions_filename = $base_folder . "/" . IndexDocumentBundle::POSITIONS_FILENAME; $fh = fopen($positions_filename, "r"); $number_of_partitions = $this->total_number_of_partitions; $log_num_partitions = log($number_of_partitions, 2) + 1; $num_doc_keys = count($doc_keys); $is_ascending = ($this->direction == self::ASCENDING); $total_sum_scores = $num_doc_keys * ($num_doc_keys + 1); $num_seen_partitions = ($is_ascending) ? $partition + 1 : $number_of_partitions - $partition; $occurences_per_doc = $this->num_occurrences / $this->total_num_docs; foreach ($postings as $posting) { $posting[self::GENERATION] = $partition; if ($posting['POSITIONS_LEN'] > 0) { fseek($fh, $posting['POSITIONS_OFFSET']); $encoded_positions = fread($fh, $posting['POSITIONS_LEN']); $posting[self::POSITION_LIST] = L\decodePositionList( $encoded_positions, $posting['FREQUENCY']); } else { $posting[self::POSITION_LIST] = []; } $doc_key = $doc_keys[$posting['DOC_INDEX']]; if ($doc_key[$two_key_len] == 'd') { $posting[self::IS_DOC] = true; } $doc_info = $doc_map_tools->unpack($doc_map[$doc_key]); $time = time(); $posting[self::KEY] = $doc_key; list($posting[self::DOC_LEN], $original_score) = array_values(array_shift($doc_info)); $is_timestamp_score = ($original_score <= $time && $original_score > ($time >> 1)); if ($is_timestamp_score) { $posting[self::SCORE] = 0.5 * log($time/ (max(1, $time - $original_score)), 2); $posting[self::DOC_RANK] = $posting[self::SCORE]; } else { $posting[self::SCORE] = ($is_ascending) ? ($posting[self::SCORE] ?? 0) / $total_sum_scores : ($total_sum_scores - $original_score) / $total_sum_scores; $posting[self::DOC_RANK] = self::DOC_RANK_WEIGHT * $log_num_partitions * $posting[self::SCORE] / $num_seen_partitions; } list($posting['TITLE_LENGTH'], $num_description_scores) = array_values(array_shift($doc_info)); $posting[self::DESCRIPTION_SCORES] = array_slice($doc_info, 0, $num_description_scores); if ($posting['FREQUENCY'] > 0) { $frequency = $this->frequencyNormalization( $posting[self::DOC_LEN], $posting[self::POSITION_LIST], $posting[self::DESCRIPTION_SCORES]); $posting[self::RELEVANCE] = (log(1 + $occurences_per_doc, 2) + $frequency * log(1 + 1/$occurences_per_doc, 2))/ ($frequency + 1); } else { $posting[self::RELEVANCE] = 1; } $posting[self::SCORE] = $posting[self::DOC_RANK] + $posting[self::RELEVANCE]; $posting[self::USER_RANKS] = array_slice($doc_info, $num_description_scores); $posting[self::INDEX_VERSION] = $this->index_version; $key_postings[$doc_key] = $posting; } fclose($fh); return $key_postings; } /** * */ public function frequencyNormalization($num_words, $positions, $descriptions_scores) { $num_words = max($num_words, 1); $square_num = $num_words * $num_words; $normalization_factor = log(1 + C\MAX_DESCRIPTION_LEN/(8 * $num_words), 2); $description_index = 0; $old_pos = 0; if (empty($descriptions_scores)) { return count($positions); } $first_score = $descriptions_scores[0]['SCORE'] ?? 1; $description_pos = $descriptions_scores[$description_index]['POS']; $num_scores = count($descriptions_scores); $raw_freq_squared = 0; $weighted_frequency = 0; foreach ($positions as $position) { while ($description_pos < $position && $description_index < $num_scores) { $old_pos = $description_pos; $description_pos = $descriptions_scores[$description_index]['POS']; $description_index++; } $weight = $descriptions_scores[ max($description_index - 1, 0)]['SCORE']; $sentence_length = max($description_pos - $old_pos, 1); $weighted_frequency += $weight; } $frequency = 2 * $weighted_frequency * $normalization_factor / max($first_score, 1); return $frequency; } /** * Updates the seen_docs count during an advance() call */ public function advanceSeenDocs() { $version = $this->index_version; if ($this->current_block_fresh != true) { if ($this->direction == self::ASCENDING) { $remaining_postings = ($version < 3) ? IndexShard::numDocsOrLinks( $this->next_offset, $this->last_offset) : $this->last_offset - $this->next_offset; $num_docs = min($this->results_per_block, $remaining_postings); $delta_sign = 1; } else { if ($version < 3) { $total_guess = IndexShard::numDocsOrLinks( $this->start_offset, $this->next_offset); $num_docs = $total_guess % $this->results_per_block; if ($num_docs == 0) { $num_docs = $this->results_per_block; } else { $num_docs = IndexShard::numDocsOrLinks( $this->start_offset, $this->last_offset) % $this->results_per_block; if ($num_docs == 0) { $num_docs = $this->results_per_block; } } } else { $remaining_postings = $this->next_offset - $this->start_offset + 1; $num_docs = min($this->results_per_block, $remaining_postings); } $delta_sign = -1; } $posting_len = ($version < 3) ? IndexShard::POSTING_LEN : 1; $this->next_offset = $this->current_offset; $this->next_offset += $delta_sign * $posting_len * $num_docs; if ($num_docs <= 0) { return; } } else { $num_docs = $this->count_block; } $this->current_block_fresh = false; $this->seen_docs += $num_docs; } /** * Forwards the iterator one group of docs * @param array $gen_doc_offset a generation, doc_offset pair. If not null, * (in the ascending search case opposite for descending), the pair * must be of greater than or equal generation, and if equal the * next block must all have $doc_offsets larger than or equal to * this value. */ public function advance($gen_doc_offset = null) { if ($gen_doc_offset == null) { $this->plainAdvance(); return; } $is_ascending = ($this->direction == self::ASCENDING); $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord(); if ($cur_gen_doc_offset == -1 || $this->genDocOffsetCmp($cur_gen_doc_offset, $gen_doc_offset, $this->direction) >= 0) { return; } $advance_check = ($is_ascending) ? ($this->current_generation < $gen_doc_offset[0]) : ($this->current_generation > $gen_doc_offset[0]); if ($advance_check) { $this->advanceGeneration($gen_doc_offset[0]); $this->next_offset = $this->current_offset; } if ($this->index_version < 3) { $index = IndexManager::getIndex($this->index_name); $index->setCurrentShard($this->current_generation, true); $shard = $index->getCurrentShard(); } if ($this->current_generation == $gen_doc_offset[0]) { if ($this->index_version < 3) { $end_offset = ($is_ascending) ? $this->last_offset : $this->start_offset; $offset_pair = $shard->nextPostingOffsetDocOffset( $this->next_offset, $end_offset, $gen_doc_offset[1], $this->direction); } else { $offset_pair = $this->nextDocIndexOffsetPair( $gen_doc_offset[1]); } if ($offset_pair === false) { $this->advanceGeneration(); $this->next_offset = $this->current_offset; } else { list($this->current_offset, $this->current_doc_offset) = $offset_pair; $this->next_offset = $this->current_offset; } } $posting_len = ($this->index_version < 3) ? IndexShard::POSTING_LEN : 1; if ($is_ascending) { $this->seen_docs = ($this->current_offset - $this->start_offset) / $posting_len; } else { $this->seen_docs = ($this->last_offset - $this->current_offset) / $posting_len; } } /** * */ public function nextDocIndexOffsetPair($doc_offset) { $is_ascending = ($this->direction == self::ASCENDING); $end_offset = ($is_ascending)? $this->last_offset : $this->start_offset; $postings = $this->getGenerationPostings($this->generation_pointer); if (empty($postings[$end_offset]) ) { return false; } $last_doc = $postings[$end_offset]["DOC_INDEX"]; if (($is_ascending && $last_doc < $doc_offset) || (!$is_ascending && $last_doc > $doc_offset)) { return false; } $next_offset = ($this->next_offset ?? $this->current_offset); $last_offset = $next_offset; $next_doc = $postings[$next_offset]["DOC_INDEX"]; $cmp = ($is_ascending) ? ($next_doc < $doc_offset && $next_offset <= $end_offset): ($next_doc > $doc_offset && $next_offset >= $end_offset); $delta = ($is_ascending) ? 1 : -1; while ($cmp) { $last_offset = $next_offset; $next_offset += $delta; $delta *= 2; $next_doc = $postings[$next_offset]["DOC_INDEX"] ?? $doc_offset; $cmp = ($is_ascending) ? ($next_doc < $doc_offset && $next_offset <= $end_offset): ($next_doc > $doc_offset && $next_offset >= $end_offset); } if (($is_ascending && $next_offset > $end_offset) || (!$is_ascending && $next_offset < $end_offset)) { $next_offset = $end_offset; } while(abs($next_offset - $last_offset) > 1) { $mid_offset = ($next_offset + $last_offset) >> 1; $mid_doc = $postings[$mid_offset]["DOC_INDEX"]; $cmp = ($is_ascending) ? ($mid_doc < $doc_offset) : ($mid_doc > $doc_offset); if ($cmp) { $last_offset = $mid_offset; } else { $next_offset = $mid_offset; $next_doc = $mid_doc; } } return [$next_offset, $next_doc]; } /** * Forwards the iterator one group of docs. This is what's called * by @see advance($gen_doc_offset) if $gen_doc_offset is null */ public function plainAdvance() { $is_ascending = ($this->direction == self::ASCENDING); $this->advanceSeenDocs(); $this->current_doc_offset = null; $update_check = ($is_ascending) ? ($this->current_offset < $this->next_offset) : ($this->current_offset > $this->next_offset); if ($update_check) { $this->current_offset = $this->next_offset; $update_check = ($is_ascending) ? ($this->current_offset > $this->last_offset) : ($this->current_offset < $this->start_offset); if ($update_check) { $this->advanceGeneration(); $this->next_offset = $this->current_offset; } } else { $this->advanceGeneration(); $this->next_offset = $this->current_offset; } } /** * Switches which index shard is being used to return occurrences of * the word to the next shard containing the word * * @param int $generation generation to advance beyond */ public function advanceGeneration($generation = null) { if ($generation === null) { $generation = $this->current_generation; } $is_ascending = ($this->direction == self::ASCENDING); do { $gen_check = ($is_ascending) ? ($this->generation_pointer < $this->num_generations) : ($this->generation_pointer >= 0); if ($gen_check) { if ($is_ascending) { $this->generation_pointer++; } else { $this->generation_pointer--; } } $gen_check = ($is_ascending) ? $this->generation_pointer < $this->num_generations : $this->generation_pointer >= 0; if ($gen_check) { if ($this->index_version < 3) { list($this->current_generation, $this->start_offset, $this->last_offset, ) = $this->dictionary_info[$this->generation_pointer]; } else { $partition_info = $this->dictionary_info[$this->generation_pointer]; $this->current_generation = $partition_info['PARTITION']; $this->start_offset = 0; $this->last_offset = ($partition_info['NUM_DOCS'] ?? 1) - 1; } $this->current_offset = ($is_ascending) ? $this->start_offset: $this->last_offset; } if (!$this->no_more_generations) { $gen_check = ($is_ascending) ? ($this->current_generation < $generation && $this->generation_pointer >= $this->num_generations) : ($this->current_generation > $generation && $this->generation_pointer <= 0); if ($gen_check) { $index_info = IndexManager::getWordInfo($this->index_name, $this->word_key, 0, $this->num_generations, C\NUM_DISTINCT_GENERATIONS, true); if ($this->index_version < 3) { list($estimated_remaining_total, $info) = $index_info; } else { $estimated_remaining_total = $index_info['TOTAL_COUNT']; $info = $index_info["ROWS"]; } if (count($info) > 0) { $this->num_docs = $this->seen_docs + $estimated_remaining_total; ksort($info); $this->dictionary_info = array_merge( $this->dictionary_info, array_values($info)); $this->num_generations = count($this->dictionary_info); $this->no_more_generations = count($info) < C\NUM_DISTINCT_GENERATIONS; //will increment back to where were next loop if ($is_ascending) { $this->generation_pointer--; } else { $this->generation_pointer++; } } } } $gen_check = ($is_ascending) ? ($this->current_generation < $generation && $this->generation_pointer < $this->num_generations) : ($this->current_generation > $generation && $this->generation_pointer >= 0); } while($gen_check); } /** * */ public function getGenerationPostings($generation) { if ($this->index_version < 3 || empty($this->dictionary_info[$generation])) { return []; } $generation_info = $this->dictionary_info[$generation]; if (is_array($generation_info['POSTINGS'])) { return $generation_info['POSTINGS']; //already loaded } $index = IndexManager::getIndex($this->index_name); $postings_entry = $index->dictionary->getArchive($this->archive_file, $generation_info['POSTINGS'], $generation_info['LAST_BLOB_LEN']); $postings = $index->postings_tools->unpack($postings_entry); $index->deDeltaPostingsSumFrequencies( $postings); $this->dictionary_info[$generation]['POSTINGS'] = $postings; unset($this->dictionary_info[$generation]['LAST_BLOB_LEN']); return $postings; } /** * Gets the doc_offset and generation for the next document that * would be return by this iterator * * @return mixed an array with the desired document offset * and generation; -1 on fail */ public function currentGenDocOffsetWithWord() { if ($this->current_doc_offset !== null) { return [$this->current_generation, $this->current_doc_offset]; } $is_ascending = ($this->direction == self::ASCENDING); $offset_check = ($is_ascending) ? ($this->current_offset > $this->last_offset || $this->generation_pointer >= $this->num_generations) : ($this->current_offset < $this->start_offset|| $this->generation_pointer < -1); if ($offset_check) { return -1; } if ($this->index_version < 3) { $index = IndexManager::getIndex($this->index_name); $index->setCurrentShard($this->current_generation, true); $this->current_doc_offset = $index->getCurrentShard( )->docOffsetFromPostingOffset($this->current_offset); } else if (empty($this->dictionary_info[$this->generation_pointer])){ return -1; } else { $partition_info = $this->dictionary_info[$this->generation_pointer]; $this->current_generation = $partition_info['PARTITION']; $postings = $this->getGenerationPostings($this->generation_pointer); $this->current_doc_offset = $postings[$this->current_offset]['DOC_INDEX']; } return [$this->current_generation, $this->current_doc_offset]; } }