Bumping memory requirements of fetcher, a=chris
Bumping memory requirements of fetcher, a=chris
diff --git a/bin/fetcher.php b/bin/fetcher.php
index b87b51f16..08c4bea72 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -42,7 +42,7 @@ define("BASE_DIR", substr(
dirname(realpath($_SERVER['PHP_SELF'])), 0,
-strlen("/bin")));
-ini_set("memory_limit","850M"); //so have enough memory to crawl big pages
+ini_set("memory_limit","1000M"); //so have enough memory to crawl big pages
/** Load in global configuration settings */
require_once BASE_DIR.'/configs/config.php';
@@ -1649,7 +1649,7 @@ class Fetcher implements CrawlConstants
global $IMAGE_TYPES;
$start_time = microtime();
-
+ crawlLog(" Start building mini inverted index ... ");
$num_seen = count($this->found_sites[self::SEEN_URLS]);
$this->num_seen_sites += $num_seen;
/*
@@ -1760,10 +1760,10 @@ class Fetcher implements CrawlConstants
$this->found_sites[self::SEEN_URLS][] = $summary;
$link_type = UrlParser::getDocumentType($url);
if(in_array($link_type, $IMAGE_TYPES)) {
+ $link_meta_ids[] = "media:image";
if(isset($safe) && !$safe) {
$link_meta_ids[] = "safe:false";
}
- $link_meta_ids[] = "media:image";
} else if(UrlParser::isVideoUrl($url)) {
$link_meta_ids[] = "media:video";
if(isset($safe) && !$safe) {
diff --git a/lib/phrase_parser.php b/lib/phrase_parser.php
index b3c00bcf8..26962f469 100755
--- a/lib/phrase_parser.php
+++ b/lib/phrase_parser.php
@@ -429,7 +429,7 @@ vaffanculo fok hoer kut lul やりまん 打っ掛け 二形 ふたなりゴッ
雞巴 鷄巴 雞雞 鷄鷄 阴茎 陰莖 胯下物
屌 吊 小鳥 龟头 龜頭 屄 鸡白 雞白 傻屄 老二 那话儿 那話兒 屄 鸡白 雞白 阴道 陰道
阴户 陰戶 大姨妈 淫蟲 老嫖 妓女 臭婊子 卖豆腐 賣豆腐 咪咪 大豆腐 爆乳 肏操
-炒饭 炒飯 cặc lồn kaltak orospu siktir sıçmak amcık ";
+炒饭 炒飯 cặc lồn kaltak orospu siktir sıçmak amcık";
static $unsafe_terms = array();
if($unsafe_terms == array()) {