add some index data integrity checks, a=chris

Chris Pollett [2011-08-28 15:Aug:th]
add some index data integrity checks, a=chris
Filename
bin/queue_server.php
diff --git a/bin/queue_server.php b/bin/queue_server.php
index c0d20e48f..d4bf84b8f 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -976,10 +976,17 @@ class QueueServer implements CrawlConstants
         $pos = 0;
         while($pos < $len_urls) {
             $len_site = unpackInt(substr($seen_urls_string, $pos ,4));
+            if($len_site > MAX_URL_LENGTH) {
+                crawlLog("To index data $file appears corrupted");
+                break; // data corrupted, so bail
+            }
             $pos += 4;
             $site_string = substr($seen_urls_string, $pos, $len_site);
             $pos += strlen($site_string);
-            $sites[self::SEEN_URLS][] = unserialize(gzuncompress($site_string));
+            $tmp_site = unserialize(gzuncompress($site_string));
+            if($tmp_site != "") {
+                $sites[self::SEEN_URLS][] = & $tmp_site;
+            }
         }

         $sites[self::INVERTED_INDEX] = IndexShard::load("fetcher_shard",
ViewGit