Moving some code of group_iterator into word_iterator, a=chris
Moving some code of group_iterator into word_iterator, a=chris
diff --git a/lib/index_bundle_iterators/group_iterator.php b/lib/index_bundle_iterators/group_iterator.php
index bf8d9555e..c4630f984 100644
--- a/lib/index_bundle_iterators/group_iterator.php
+++ b/lib/index_bundle_iterators/group_iterator.php
@@ -287,13 +287,7 @@ class GroupIterator extends IndexBundleIterator
foreach($pages as $doc_key => $doc_info) {
if(!is_array($doc_info) || $doc_info[self::SUMMARY_OFFSET] ==
self::NEEDS_OFFSET_FLAG) { continue;}
- $doc_info['KEY'] = $doc_key;
$hash_url = substr($doc_key, 0, IndexShard::DOC_KEY_LEN);
- $doc_info[self::HASH] = substr($doc_key,
- IndexShard::DOC_KEY_LEN, IndexShard::DOC_KEY_LEN);
- // inlinks is the domain of the inlink
- $doc_info[self::INLINKS] = substr($doc_key,
- 2 * IndexShard::DOC_KEY_LEN, IndexShard::DOC_KEY_LEN);
// initial aggregate domain score vector for given domain
if($doc_info[self::IS_DOC]) {
if(!isset($pre_out_pages[$hash_url])) {
@@ -453,13 +447,6 @@ class GroupIterator extends IndexBundleIterator
}
$need_docs = array_diff_key($need_docs, $this->grouped_keys);
foreach($pages as $doc_key => $doc_info) {
- $doc_info['KEY'] = $doc_key;
- $hash_url = substr($doc_key, 0, IndexShard::DOC_KEY_LEN);
- $doc_info[self::HASH] = substr($doc_key,
- IndexShard::DOC_KEY_LEN, IndexShard::DOC_KEY_LEN);
- // inlinks is the domain of the inlink
- $doc_info[self::INLINKS] = substr($doc_key,
- 2 * IndexShard::DOC_KEY_LEN, IndexShard::DOC_KEY_LEN);
$new_pages[$doc_key] = $doc_info;
if($doc_info[self::IS_DOC]) {
if(isset($need_docs[$hash_url])) {
diff --git a/lib/index_bundle_iterators/word_iterator.php b/lib/index_bundle_iterators/word_iterator.php
index d1fff9a29..cd08330c2 100644
--- a/lib/index_bundle_iterators/word_iterator.php
+++ b/lib/index_bundle_iterators/word_iterator.php
@@ -133,7 +133,7 @@ class WordIterator extends IndexBundleIterator
/** Length of a doc key*/
const KEY_LEN = 8;
-
+ static $start_time = 0;
/**
* Creates a word iterator with the given parameters.
*
@@ -249,16 +249,23 @@ class WordIterator extends IndexBundleIterator
//the next call also updates next offset
$shard = $this->index->getCurrentShard();
- $results = $shard->getPostingsSlice(
+ $pre_results = $shard->getPostingsSlice(
$this->start_offset,
$this->next_offset, $this->last_offset, $this->results_per_block);
- if($this->filter != NULL) {
- foreach($results as $keys => $data) {
- $host_key = substr($keys, self::HOST_KEY_POS, self::KEY_LEN);
- if(in_array($host_key, $this->filter) ) {
- unset($results[$keys]);
- }
+ $filter = ($this->filter == NULL) ? array() : $this->filter;
+ foreach($pre_results as $keys => $data) {
+ $host_key = substr($keys, self::HOST_KEY_POS, self::KEY_LEN);
+ if(in_array($host_key, $filter) ) {
+ continue;
}
+ $data['KEY'] = $keys;
+ $hash_url = substr($keys, 0, IndexShard::DOC_KEY_LEN);
+ $data[self::HASH] = substr($keys,
+ IndexShard::DOC_KEY_LEN, IndexShard::DOC_KEY_LEN);
+ // inlinks is the domain of the inlink
+ $data[self::INLINKS] = substr($keys,
+ 2 * IndexShard::DOC_KEY_LEN, IndexShard::DOC_KEY_LEN);
+ $results[$keys] = $data;
}
$this->count_block = count($results);
$this->pages = $results;