modify substitution code in case of large media wiki pages, a=chris
modify substitution code in case of large media wiki pages, a=chris
diff --git a/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php b/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
index 3ae52695e..5f4a8f273 100644
--- a/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
+++ b/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
@@ -379,7 +379,16 @@ class MediaWikiArchiveBundleIterator extends TextArchiveBundleIterator
list($pre_page, $references) = $this->makeReferences($pre_page);
$pre_page = preg_replace_callback('/(\A|\n){\|(.*?)\n\|}/s',
"makeTableCallback", $pre_page);
- $pre_page = preg_replace($this->matches, $this->replaces,$pre_page);
+ if(strlen($pre_page) < PAGE_RANGE_REQUEST) {
+ $pre_page = preg_replace($this->matches, $this->replaces,$pre_page);
+ } else {
+ $num_matches = count($this->matches);
+ for($i = 0; $i < $num_matches; $i++) {
+ crawlTimeoutLog("..Doing wiki substitutions..");
+ $pre_page = preg_replace($this->matches[$i],
+ $this->replaces[$i], $pre_page);
+ }
+ }
$pre_page = preg_replace("/{{Other uses}}/i",
"<div class='indent'>\"$1\". (<a href='".
$site[self::URL]. "_(disambiguation)'>$pre_url</a>)</div>",