Changed some assignment to be by reference to try to make queue_server and fetcher more memory efficient, a=cpollett

Chris Pollett [2010-09-05 05:Sep:th]
Changed some assignment to be by reference to try to make queue_server and fetcher more memory efficient, a=cpollett
Filename
bin/fetcher.php
lib/index_archive_bundle.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index e1bc72a2d..81b3cfa9b 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -36,7 +36,7 @@ define("BASE_DIR", substr(
     dirname(realpath($_SERVER['PHP_SELF'])), 0,
     -strlen("/bin")));

-ini_set("memory_limit","600M"); //so have enough memory to crawl big pages
+ini_set("memory_limit","700M"); //so have enough memory to crawl big pages

 /** Load in global configuration settings */
 require_once BASE_DIR.'/configs/config.php';
@@ -1095,7 +1095,7 @@ class Fetcher implements CrawlConstants
         }
         $this->found_duplicates = array();

-        $this->found_sites[self::INVERTED_INDEX] = $words;
+        $this->found_sites[self::INVERTED_INDEX] = & $words;

         crawlLog("  Build mini inverted index time ".
             (changeInMicrotime($start_time)));
diff --git a/lib/index_archive_bundle.php b/lib/index_archive_bundle.php
index f048e4128..4573faa62 100644
--- a/lib/index_archive_bundle.php
+++ b/lib/index_archive_bundle.php
@@ -279,10 +279,10 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants
     /**
      * Adds the provided mini inverted index data to the IndexArchiveBundle
      *
-     * @param array $index_data a mini inverted index of word_key=>doc data
+     * @param array &$index_data a mini inverted index of word_key=>doc data
      *      to add to this IndexArchiveBundle
      */
-    public function addIndexData($index_data)
+    public function addIndexData(&$index_data)
     {

         $out_data = array();
@@ -301,7 +301,7 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants

             $partition = WebArchiveBundle::selectPartition(
                  $word_key, $this->num_partitions_index);
-            $out_data[$partition][$word_key] = $docs_info;
+            $out_data[$partition][$word_key] = & $docs_info;

         }
         $this->diagnostics['SELECT_TIME'] += changeInMicrotime($start_time);
ViewGit