Improve decode error check for postings, a=chris

Chris Pollett [2022-08-20 21:Aug:th]
Improve decode error check for postings, a=chris
Filename
src/library/IndexDocumentBundle.php
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index e20249913..c601c0e3f 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -1415,9 +1415,15 @@ class IndexDocumentBundle implements CrawlConstants
         $sum_frequencies = 0;
         $doc_map_index = 0;
         $positions_offset = 0;
+        $len_posting_strings = strlen($postings_string);
         for ($i = 0; $i < $num_items; $i++) {
             $int_info = ord($postings_string[$current_pos]);
             $current_pos++;
+            $len_unpack_info = $unpack_len_map[$int_info];
+            if ($current_pos + $len_unpack_info >= $len_posting_strings) {
+                crawlLog("Posting decode error");
+                break; //sanity check break
+            }
             $pre_item = unpack($unpack_map[$int_info], $postings_string,
                 $current_pos);
             $item = $pre_item;
@@ -1426,12 +1432,8 @@ class IndexDocumentBundle implements CrawlConstants
             $doc_map_index += $pre_item["DOC_MAP_INDEX"];
             $positions_offset += $pre_item["POSITIONS_OFFSET"];
             $sum_frequencies += $pre_item["FREQUENCY"];
-            $current_pos += $unpack_len_map[$int_info];
+            $current_pos += $len_unpack_info;
             $items[] = $item;
-            if ($current_pos >= strlen($postings_string)) {
-                crawlLog("Posting decode error");
-                break; //sanity check break
-            }
         }
         return [$items, $sum_frequencies];
     }
ViewGit