More PDB parameter tweaks, fix crawlTimeout message for buildInvertedIndex, a=chris
More PDB parameter tweaks, fix crawlTimeout message for buildInvertedIndex, a=chris
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 62d9b7f86..288c64526 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -187,9 +187,6 @@ class IndexDocumentBundle implements CrawlConstants
$num_docs_per_partition,
PartitionDocumentBundle::PARTITION_SIZE_THRESHOLD,
C\NS_COMPRESSORS . "GzipCompressor");
- if (!$read_only_archive) {
- $this->documents->index_cache_size = 1;
- }
$this->doc_map_tools = new PackedTableTools([
"PRIMARY KEY" => ["DOC_KEYS", 24], "POS" => "INT",
"SCORE" => "FLOAT"], C\NS_COMPRESSORS . "GzipCompressor");
@@ -414,6 +411,7 @@ class IndexDocumentBundle implements CrawlConstants
crawlLog("Number of documents in mapped partition:" .
count($index_map));
$cnt = 0;
+ $non_aux_doc_cnt = 0;
$num_partition = count($index_map);
$total_num_words = 0;
foreach ($index_map as $hash_url => $url_info) {
@@ -569,10 +567,13 @@ class IndexDocumentBundle implements CrawlConstants
crawlLog("..Inverting " . $link_to . $site_url .
"...took > 5s.");
}
+ $non_aux_doc_cnt++;
if (crawlTimeoutLog("..Still building inverted index. Have ".
- "processed %s of %s documents.\nLast url processed was %s.",
- $cnt, $num_partition, $link_to . $site_url) &&
- $taking_too_long_touch) {
+ "processed %s of %s documents.\n" .
+ "Total links or docs processed is %s.\n" .
+ "Last url processed was %s.",
+ $non_aux_doc_cnt, $num_partition, $cnt,
+ $link_to . $site_url) && $taking_too_long_touch) {
if (file_exists($taking_too_long_touch)) {
touch($taking_too_long_touch, time());
}
diff --git a/src/library/PartitionDocumentBundle.php b/src/library/PartitionDocumentBundle.php
index 76a0145df..df28fe1e8 100644
--- a/src/library/PartitionDocumentBundle.php
+++ b/src/library/PartitionDocumentBundle.php
@@ -66,7 +66,7 @@ class PartitionDocumentBundle
/**
*
*/
- const INDEX_CACHE_SIZE = 15;
+ const INDEX_CACHE_SIZE = 10;
/**
*
*/