Updating config parameters for Version0.4, a=cpollett

Chris Pollett [2010-09-04 21:Sep:th]
Updating config parameters for Version0.4, a=cpollett
Filename
bin/fetcher.php
bin/queue_server.php
configs/config.php
index.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index 59a209e65..e1bc72a2d 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -36,7 +36,7 @@ define("BASE_DIR", substr(
     dirname(realpath($_SERVER['PHP_SELF'])), 0,
     -strlen("/bin")));

-ini_set("memory_limit","550M"); //so have enough memory to crawl big pages
+ini_set("memory_limit","600M"); //so have enough memory to crawl big pages

 /** Load in global configuration settings */
 require_once BASE_DIR.'/configs/config.php';
diff --git a/bin/queue_server.php b/bin/queue_server.php
index b47b06dc4..06882c111 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -36,7 +36,7 @@ define("BASE_DIR", substr(
     dirname(realpath($_SERVER['PHP_SELF'])), 0,
     -strlen("/bin")));

-ini_set("memory_limit","900M"); //so have enough memory to crawl big pages
+ini_set("memory_limit","950M"); //so have enough memory to crawl big pages

 /** Load in global configuration settings */
 require_once BASE_DIR.'/configs/config.php';
diff --git a/configs/config.php b/configs/config.php
index 6337fe76c..84b798c4f 100755
--- a/configs/config.php
+++ b/configs/config.php
@@ -183,7 +183,7 @@ define('COMMON_WORD_THRESHOLD', 1000);
 define('MAX_LINKS_PER_PAGE', 50);

 /**  maximum number of words from links to consider on any given page */
-define('MAX_LINKS_WORD_TEXT', 200);
+define('MAX_LINKS_WORD_TEXT', 100);

 /** request this many bytes out of a page */
 define('PAGE_RANGE_REQUEST', 50000);
@@ -258,7 +258,7 @@ $PAGE_PROCESSORS = array(   "text/html" => "HtmlProcessor",
  * How many non robot urls the fetcher successfully downloads before
  * between times data sent back to queue server
  */
-define ('SEEN_URLS_BEFORE_UPDATE_SCHEDULER', 500);
+define ('SEEN_URLS_BEFORE_UPDATE_SCHEDULER', 400);

 /** maximum number of urls to schedule to a given fetcher in one go */
 define ('MAX_FETCH_SIZE', 5000);
diff --git a/index.php b/index.php
index 42cd72b8d..bf0120a1d 100755
--- a/index.php
+++ b/index.php
@@ -42,7 +42,7 @@ define("BASE_DIR", substr($_SERVER['SCRIPT_FILENAME'], 0,-strlen("index.php")));
  * Load the configuration file
  */
 require_once(BASE_DIR.'configs/config.php');
-ini_set("memory_limit","200M");
+ini_set("memory_limit","450M");
 header("X-FRAME-OPTIONS: DENY"); //prevent click jacking
 session_name(SESSION_NAME);
 session_start();
ViewGit