modify substitution code in case of large media wiki pages, a=chris

Chris Pollett [2013-07-17 18:Jul:th]
modify substitution code in case of large media wiki pages, a=chris
Filename
lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
diff --git a/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php b/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
index 3ae52695e..5f4a8f273 100644
--- a/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
+++ b/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
@@ -379,7 +379,16 @@ class MediaWikiArchiveBundleIterator extends TextArchiveBundleIterator
         list($pre_page, $references) = $this->makeReferences($pre_page);
         $pre_page = preg_replace_callback('/(\A|\n){\|(.*?)\n\|}/s',
             "makeTableCallback", $pre_page);
-        $pre_page = preg_replace($this->matches, $this->replaces,$pre_page);
+        if(strlen($pre_page) < PAGE_RANGE_REQUEST) {
+            $pre_page = preg_replace($this->matches, $this->replaces,$pre_page);
+        } else {
+            $num_matches = count($this->matches);
+            for($i = 0; $i < $num_matches; $i++) {
+                crawlTimeoutLog("..Doing wiki substitutions..");
+                $pre_page = preg_replace($this->matches[$i],
+                    $this->replaces[$i], $pre_page);
+            }
+        }
         $pre_page = preg_replace("/{{Other uses}}/i",
                 "<div class='indent'>\"$1\". (<a href='".
                 $site[self::URL]. "_(disambiguation)'>$pre_url</a>)</div>",
ViewGit