Tweaks and simplifications to Word and IntersectIterator, a=chris

Chris Pollett [2019-07-14 01:Jul:th]
Tweaks and simplifications to Word and IntersectIterator, a=chris
Filename
src/library/IndexShard.php
src/library/index_bundle_iterators/IndexBundleIterator.php
src/library/index_bundle_iterators/IntersectIterator.php
src/library/index_bundle_iterators/WordIterator.php
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index 405b027f8..eca590ee5 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -688,7 +688,7 @@ class IndexShard extends PersistentStructure implements CrawlConstants
             $next = $posting_end + 1;
             $num_docs_or_links =
                 self::numDocsOrLinks($start_offset, $last_offset,
-                    $total_posting_len/$num_postings_so_far);
+                    $total_posting_len / $num_postings_so_far);
             list($doc_id, , $item) =
                 $this->makeItem($posting, $num_docs_or_links);
             $results[$doc_id] = $item;
diff --git a/src/library/index_bundle_iterators/IndexBundleIterator.php b/src/library/index_bundle_iterators/IndexBundleIterator.php
index bfb445b40..185ad5ec4 100644
--- a/src/library/index_bundle_iterators/IndexBundleIterator.php
+++ b/src/library/index_bundle_iterators/IndexBundleIterator.php
@@ -151,19 +151,20 @@ abstract class IndexBundleIterator implements CrawlConstants
      */
      public function genDocOffsetCmp($gen_doc1, $gen_doc2)
      {
-        //less generation
+        //less generation or greater
         if ($gen_doc1[0] < $gen_doc2[0]) {
             return -1;
+        } else if ($gen_doc1[0] > $gen_doc2[0]) {
+            return 1;
         }
-        //equal generation
-        if ($gen_doc1[0] == $gen_doc2[0]) {
-            if ($gen_doc1[1] == $gen_doc2[1]) {
-                return 0; //equal offset
-            } else if ($gen_doc1[1] < $gen_doc2[1]) {
-                return -1; // less offset
-            }
+        //less offset or greater
+        if ($gen_doc1[1] < $gen_doc2[1]) {
+            return -1;
+        } else if ($gen_doc1[1] > $gen_doc2[1]) {
+            return 1;
         }
-        return 1;
+        //equal
+        return 0;
      }
     /**
      * Gets the current block of doc ids and score associated with the
diff --git a/src/library/index_bundle_iterators/IntersectIterator.php b/src/library/index_bundle_iterators/IntersectIterator.php
index a39dcf1a0..31a5e3736 100644
--- a/src/library/index_bundle_iterators/IntersectIterator.php
+++ b/src/library/index_bundle_iterators/IntersectIterator.php
@@ -107,7 +107,7 @@ class IntersectIterator extends IndexBundleIterator
      * Number of seconds before timeout and stop
      * syncGenDocOffsetsAmongstIterators if slow
      */
-    const SYNC_TIMEOUT = 4;
+    const SYNC_TIMEOUT = 3;
     /**
      * Creates an intersect iterator with the given parameters.
      *
@@ -400,13 +400,13 @@ class IntersectIterator extends IndexBundleIterator
     public function syncGenDocOffsetsAmongstIterators()
     {
         if ($this->sync_timer_on) {
-            $timer_on = true;
             if ($this->sync_time === 0) {
                 $this->sync_time  = time();
             }
             $time_out = self::SYNC_TIMEOUT + $this->sync_time;
         } else {
-            $timer_on = false;
+            //will probably never timeout this way so like no timer
+            $time_out = 2 * (self::SYNC_TIMEOUT + $this->sync_time);
         }
         if (($biggest_gen_offset = $this->index_bundle_iterators[
              0]->currentGenDocOffsetWithWord()) == -1) {
@@ -415,16 +415,12 @@ class IntersectIterator extends IndexBundleIterator
         $gen_doc_offset[0] = $biggest_gen_offset;
         $all_same = true;
         for ($i = 1; $i < $this->num_iterators; $i++) {
-            $cur_gen_doc_offset =
-                $this->index_bundle_iterators[
-                    $i]->currentGenDocOffsetWithWord();
-            $gen_doc_offset[$i] = $cur_gen_doc_offset;
-            if ($timer_on && time() > $time_out) {
-                return -1;
-            }
-            if ($cur_gen_doc_offset == -1) {
+            if ((($cur_gen_doc_offset = $this->index_bundle_iterators[
+                $i]->currentGenDocOffsetWithWord()) == -1) ||
+                time() > $time_out) {
                 return -1;
             }
+            $gen_doc_offset[$i] = $cur_gen_doc_offset;
             $gen_doc_cmp = $this->genDocOffsetCmp($cur_gen_doc_offset,
                 $biggest_gen_offset);
             if ($gen_doc_cmp > 0) {
@@ -440,20 +436,18 @@ class IntersectIterator extends IndexBundleIterator
         $last_changed = -1;
         $i = 0;
         while($i != $last_changed) {
-            if ($timer_on && time() > $time_out) {
+            if (time() > $time_out) {
                 return -1;
             }
-            if ($last_changed == -1) $last_changed = 0;
             if ($this->genDocOffsetCmp($gen_doc_offset[$i],
                 $biggest_gen_offset) < 0) {
                 $iterator = $this->index_bundle_iterators[$i];
                 $iterator->advance($biggest_gen_offset);
-                $cur_gen_doc_offset =
-                    $iterator->currentGenDocOffsetWithWord();
-                $gen_doc_offset[$i] = $cur_gen_doc_offset;
-                if ($cur_gen_doc_offset == -1) {
+                if( ($cur_gen_doc_offset =
+                    $iterator->currentGenDocOffsetWithWord()) == -1) {
                     return -1;
                 }
+                $gen_doc_offset[$i] = $cur_gen_doc_offset;
                 if ($this->genDocOffsetCmp($cur_gen_doc_offset,
                     $biggest_gen_offset) > 0) {
                     $last_changed = $i;
@@ -463,6 +457,7 @@ class IntersectIterator extends IndexBundleIterator
             $i++;
             if ($i == $this->num_iterators) {
                 $i = 0;
+                $last_changed = max($last_changed, 0);
             }
         }
         return 1;
diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php
index a169ef11a..42c6d6052 100644
--- a/src/library/index_bundle_iterators/WordIterator.php
+++ b/src/library/index_bundle_iterators/WordIterator.php
@@ -409,14 +409,57 @@ class WordIterator extends IndexBundleIterator
      */
     public function advance($gen_doc_offset = null)
     {
-        if ($gen_doc_offset != null) { //only advance if $gen_doc_offset bigger
-            $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord();
-            if ($cur_gen_doc_offset == -1 ||
-                $this->genDocOffsetCmp($cur_gen_doc_offset,
-                $gen_doc_offset) >= 0) {
-                return;
+        if ($gen_doc_offset == null) {
+            $this->plainAdvance();
+            return;
+        }
+        $cur_gen_doc_offset = $this->currentGenDocOffsetWithWord();
+        if ($cur_gen_doc_offset == -1 ||
+            $this->genDocOffsetCmp($cur_gen_doc_offset,
+            $gen_doc_offset) >= 0) {
+            return;
+        }
+        $this->plainAdvance();
+        if ($this->current_generation < $gen_doc_offset[0]) {
+            $this->advanceGeneration($gen_doc_offset[0]);
+            $this->next_offset = $this->current_offset;
+        }
+        $using_feeds = $this->using_feeds && $this->use_feeds;
+        if ($using_feeds) {
+            $shard = IndexManager::getIndex("feed");
+            $last = $this->feed_end;
+        } else {
+            $index = IndexManager::getIndex($this->index_name);
+            $index->setCurrentShard($this->current_generation, true);
+            $shard = $index->getCurrentShard();
+            $last = $this->last_offset;
+        }
+        if ($this->current_generation == $gen_doc_offset[0]) {
+            $offset_pair = $shard->nextPostingOffsetDocOffset(
+                $this->next_offset, $last, $gen_doc_offset[1]);
+            if ($offset_pair === false) {
+                $this->advanceGeneration();
+                $this->next_offset = $this->current_offset;
+            } else {
+                list($this->current_offset, $this->current_doc_offset) =
+                    $offset_pair;
             }
         }
+        if ($this->current_generation == -1) {
+            $this->seen_docs = ($this->current_offset - $this->feed_start) /
+                IndexShard::POSTING_LEN;
+        } else {
+            $this->seen_docs = ($using_feeds) ? $this->feed_count : 0;
+            $this->seen_docs += ($this->current_offset - $this->start_offset) /
+                IndexShard::POSTING_LEN;
+        }
+    }
+    /**
+     * Forwards the iterator one group of docs. This is what's called
+     * by @see advance($gen_doc_offset) if $gen_doc_offset is null
+     */
+    public function plainAdvance()
+    {
         $this->advanceSeenDocs();
         $this->current_doc_offset = null;
         if ($this->current_offset < $this->next_offset) {
@@ -432,45 +475,6 @@ class WordIterator extends IndexBundleIterator
             $this->advanceGeneration();
             $this->next_offset = $this->current_offset;
         }
-        if ($gen_doc_offset !== null) {
-            if ($this->current_generation < $gen_doc_offset[0]) {
-                $this->advanceGeneration($gen_doc_offset[0]);
-                $this->next_offset = $this->current_offset;
-            }
-            $using_feeds = $this->using_feeds && $this->use_feeds;
-            if ($using_feeds) {
-                $shard = IndexManager::getIndex("feed");
-                $last = $this->feed_end;
-            } else {
-                $index = IndexManager::getIndex($this->index_name);
-                $index->setCurrentShard($this->current_generation, true);
-                $shard = $index->getCurrentShard();
-                $last = $this->last_offset;
-            }
-
-            if ($this->current_generation == $gen_doc_offset[0]) {
-                $offset_pair =
-                    $shard->nextPostingOffsetDocOffset($this->next_offset,
-                            $last, $gen_doc_offset[1]);
-                if ($offset_pair === false) {
-                    $this->advanceGeneration();
-                    $this->next_offset = $this->current_offset;
-                } else {
-                   list($this->current_offset,
-                        $this->current_doc_offset) = $offset_pair;
-                }
-            }
-            if ($this->current_generation == -1) {
-                $this->seen_docs =
-                    ($this->current_offset - $this->feed_start)/
-                        IndexShard::POSTING_LEN;
-            } else {
-                $this->seen_docs = ($using_feeds) ? $this->feed_count : 0;
-                $this->seen_docs +=
-                    ($this->current_offset - $this->start_offset)/
-                        IndexShard::POSTING_LEN;
-            }
-        }
     }
     /**
      * Switches which index shard is being used to return occurrences of
ViewGit