Tweaks and simplifications to Word and IntersectIterator, a=chris
Tweaks and simplifications to Word and IntersectIterator, a=chris
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index 405b027f8..eca590ee5 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -688,7 +688,7 @@ class IndexShard extends PersistentStructure implements CrawlConstants
$next = $posting_end + 1;
$num_docs_or_links =
self::numDocsOrLinks($start_offset, $last_offset,
- $total_posting_len/$num_postings_so_far);
+ $total_posting_len / $num_postings_so_far);
list($doc_id, , $item) =
$this->makeItem($posting, $num_docs_or_links);
$results[$doc_id] = $item;
diff --git a/src/library/index_bundle_iterators/IndexBundleIterator.php b/src/library/index_bundle_iterators/IndexBundleIterator.php
index bfb445b40..185ad5ec4 100644
--- a/src/library/index_bundle_iterators/IndexBundleIterator.php
+++ b/src/library/index_bundle_iterators/IndexBundleIterator.php
@@ -151,19 +151,20 @@ abstract class IndexBundleIterator implements CrawlConstants
*/
public function genDocOffsetCmp($gen_doc1, $gen_doc2)
{
- //less generation
+ //less generation or greater
if ($gen_doc1[0] < $gen_doc2[0]) {
return -1;
+ } else if ($gen_doc1[0] > $gen_doc2[0]) {
+ return 1;
}
- //equal generation
- if ($gen_doc1[0] == $gen_doc2[0]) {
- if ($gen_doc1[1] == $gen_doc2[1]) {
- return 0; //equal offset
- } else if ($gen_doc1[1] < $gen_doc2[1]) {
- return -1; // less offset
- }
+ //less offset or greater
+ if ($gen_doc1[1] < $gen_doc2[1]) {
+ return -1;
+ } else if ($gen_doc1[1] > $gen_doc2[1]) {
+ return 1;
}
- return 1;
+ //equal
+ return 0;
}
/**
* Gets the current block of doc ids and score associated with the
diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php
index a39dcf1a0..31a5e3736 100644
--- a/src/library/index_bundle_iterators/IntersectIterator.php
+++ b/src/library/index_bundle_iterators/IntersectIterator.php
@@ -107,7 +107,7 @@ class IntersectIterator extends IndexBundleIterator
* Number of seconds before timeout and stop
* syncGenDocOffsetsAmongstIterators if slow
*/
- const SYNC_TIMEOUT = 4;
+ const SYNC_TIMEOUT = 3;
/**
* Creates an intersect iterator with the given parameters.
*
@@ -400,13 +400,13 @@ class IntersectIterator extends IndexBundleIterator
public function syncGenDocOffsetsAmongstIterators()
{
if ($this->sync_timer_on) {
- $timer_on = true;
if ($this->sync_time === 0) {
$this->sync_time = time();
}
$time_out = self::SYNC_TIMEOUT + $this->sync_time;
} else {
- $timer_on = false;
+ //will probably never timeout this way so like no timer
+ $time_out = 2 * (self::SYNC_TIMEOUT + $this->sync_time);
}
if (($biggest_gen_offset = $this->index_bundle_iterators[
0]->currentGenDocOffsetWithWord()) == -1) {
@@ -415,16 +415,12 @@ class IntersectIterator extends IndexBundleIterator
$gen_doc_offset[0] = $biggest_gen_offset;
$all_same = true;
for ($i = 1; $i < $this->num_iterators; $i++) {
- $cur_gen_doc_offset =
- $this->index_bundle_iterators[
- $i]->currentGenDocOffsetWithWord();
- $gen_doc_offset[$i] = $cur_gen_doc_offset;
- if ($timer_on && time() > $time_out) {
- return -1;
- }
- if ($cur_gen_doc_offset == -1) {
+ if ((($cur_gen_doc_offset = $this->index_bundle_iterators[
+ $i]->currentGenDocOffsetWithWord()) == -1) ||
+ time() > $time_out) {
return -1;
}
+ $gen_doc_offset[$i] = $cur_gen_doc_offset;
$gen_doc_cmp = $this->genDocOffsetCmp($cur_gen_doc_offset,
$biggest_gen_offset);
if ($gen_doc_cmp > 0) {
@@ -440,20 +436,18 @@ class IntersectIterator extends IndexBundleIterator
$last_changed = -1;
$i = 0;
while($i != $last_changed) {
- if ($timer_on && time() > $time_out) {
+ if (time() > $time_out) {
return -1;
}
- if ($last_changed == -1) $last_changed = 0;
if ($this->genDocOffsetCmp($gen_doc_offset[$i],
$biggest_gen_offset) < 0) {
$iterator = $this->index_bundle_iterators[$i];
$iterator->advance($biggest_gen_offset);
- $cur_gen_doc_offset =
- $iterator->currentGenDocOffsetWithWord();
- $gen_doc_offset[$i] = $cur_gen_doc_offset;
- if ($cur_gen_doc_offset == -1) {
+ if( ($cur_gen_doc_offset =
+ $iterator->currentGenDocOffsetWithWord()) == -1) {
return -1;
}
+ $gen_doc_offset[$i] = $cur_gen_doc_offset;
if ($this->genDocOffsetCmp($cur_gen_doc_offset,
$biggest_gen_offset) > 0) {
$last_changed = $i;
@@ -463,6 +457,7 @@ class IntersectIterator extends IndexBundleIterator
$i++;
if ($i == $this->num_iterators) {
$i = 0;
+ $last_changed = max($last_changed, 0);
}
}
return 1;
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index a169ef11a..42c6d6052 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -409,14 +409,57 @@ class WordIterator extends IndexBundleIterator
*/
public function advance($gen_doc_offset = null)
{
- if ($gen_doc_offset != null) { //only advance if $gen_doc_offset bigger
- $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord();
- if ($cur_gen_doc_offset == -1 ||
- $this->genDocOffsetCmp($cur_gen_doc_offset,
- $gen_doc_offset) >= 0) {
- return;
+ if ($gen_doc_offset == null) {
+ $this->plainAdvance();
+ return;
+ }
+ $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord();
+ if ($cur_gen_doc_offset == -1 ||
+ $this->genDocOffsetCmp($cur_gen_doc_offset,
+ $gen_doc_offset) >= 0) {
+ return;
+ }
+ $this->plainAdvance();
+ if ($this->current_generation < $gen_doc_offset[0]) {
+ $this->advanceGeneration($gen_doc_offset[0]);
+ $this->next_offset = $this->current_offset;
+ }
+ $using_feeds = $this->using_feeds && $this->use_feeds;
+ if ($using_feeds) {
+ $shard = IndexManager::getIndex("feed");
+ $last = $this->feed_end;
+ } else {
+ $index = IndexManager::getIndex($this->index_name);
+ $index->setCurrentShard($this->current_generation, true);
+ $shard = $index->getCurrentShard();
+ $last = $this->last_offset;
+ }
+ if ($this->current_generation == $gen_doc_offset[0]) {
+ $offset_pair = $shard->nextPostingOffsetDocOffset(
+ $this->next_offset, $last, $gen_doc_offset[1]);
+ if ($offset_pair === false) {
+ $this->advanceGeneration();
+ $this->next_offset = $this->current_offset;
+ } else {
+ list($this->current_offset, $this->current_doc_offset) =
+ $offset_pair;
}
}
+ if ($this->current_generation == -1) {
+ $this->seen_docs = ($this->current_offset - $this->feed_start) /
+ IndexShard::POSTING_LEN;
+ } else {
+ $this->seen_docs = ($using_feeds) ? $this->feed_count : 0;
+ $this->seen_docs += ($this->current_offset - $this->start_offset) /
+ IndexShard::POSTING_LEN;
+ }
+ }
+ /**
+ * Forwards the iterator one group of docs. This is what's called
+ * by @see advance($gen_doc_offset) if $gen_doc_offset is null
+ */
+ public function plainAdvance()
+ {
$this->advanceSeenDocs();
$this->current_doc_offset = null;
if ($this->current_offset < $this->next_offset) {
@@ -432,45 +475,6 @@ class WordIterator extends IndexBundleIterator
$this->advanceGeneration();
$this->next_offset = $this->current_offset;
}
- if ($gen_doc_offset !== null) {
- if ($this->current_generation < $gen_doc_offset[0]) {
- $this->advanceGeneration($gen_doc_offset[0]);
- $this->next_offset = $this->current_offset;
- }
- $using_feeds = $this->using_feeds && $this->use_feeds;
- if ($using_feeds) {
- $shard = IndexManager::getIndex("feed");
- $last = $this->feed_end;
- } else {
- $index = IndexManager::getIndex($this->index_name);
- $index->setCurrentShard($this->current_generation, true);
- $shard = $index->getCurrentShard();
- $last = $this->last_offset;
- }
-
- if ($this->current_generation == $gen_doc_offset[0]) {
- $offset_pair =
- $shard->nextPostingOffsetDocOffset($this->next_offset,
- $last, $gen_doc_offset[1]);
- if ($offset_pair === false) {
- $this->advanceGeneration();
- $this->next_offset = $this->current_offset;
- } else {
- list($this->current_offset,
- $this->current_doc_offset) = $offset_pair;
- }
- }
- if ($this->current_generation == -1) {
- $this->seen_docs =
- ($this->current_offset - $this->feed_start)/
- IndexShard::POSTING_LEN;
- } else {
- $this->seen_docs = ($using_feeds) ? $this->feed_count : 0;
- $this->seen_docs +=
- ($this->current_offset - $this->start_offset)/
- IndexShard::POSTING_LEN;
- }
- }
}
/**
* Switches which index shard is being used to return occurrences of