Tweaking file cache sizes related to indexing
Tweaking file cache sizes related to indexing
diff --git a/src/executables/ArcTool.php b/src/executables/ArcTool.php
index 0ee914edc..2c8345806 100755
--- a/src/executables/ArcTool.php
+++ b/src/executables/ArcTool.php
@@ -1481,7 +1481,8 @@ EOD;
$old_next_partition = $next_partition;
$num_forks = min($save_partition - $next_partition,
$number_of_processes);
- echo "Num forks:$num_forks, num processes $number_of_processes\n";
+ echo "Num forks:$num_forks, ".
+ "num processes $number_of_processes\n";
for ($i = 0; $i < $num_forks; $i++) {
$process_partition = $next_partition + $i;
$process_options = sprintf($options, $i,
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 81835fab3..d565177ae 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -158,6 +158,10 @@ class IndexDocumentBundle implements CrawlConstants
* addPartitionPostingsDictionary
*/
const POSTINGS_BUFFER_SIZE = 1000000;
+ /**
+ * Maximum number of posting slices to cache
+ */
+ const MAX_POSTING_CACHE_ITEMS = 100;
/**
* Holds property value pairs concerning the configuration of the
* current IndexDocumentBundle
@@ -1533,13 +1537,12 @@ class IndexDocumentBundle implements CrawlConstants
{
static $file_handles = [];
static $memory_limit = 0;
- $max_cache_size = 500;
if (!$memory_limit) {
$memory_limit =
- metricToInt(ini_get("memory_limit")) * C\MEMORY_FILL_FACTOR;
+ C\INDEX_FILE_MEMORY_LIMIT * C\MEMORY_FILL_FACTOR;
}
if (memory_get_usage() > $memory_limit ||
- count($file_handles) > $max_cache_size) {
+ count($file_handles) > self::MAX_POSTING_CACHE_ITEMS) {
array_shift($file_handles); /*just in case file handles causing
memory leak */
}
diff --git a/src/library/LSMTree.php b/src/library/LSMTree.php
index 8dc98f979..5e38aa08b 100644
--- a/src/library/LSMTree.php
+++ b/src/library/LSMTree.php
@@ -231,7 +231,13 @@ class LSMTree
$db->unlinkRecursive($tier_folder, true);
}
/**
+ * Compares the keys in two LSMTree entries and returns
+ * -1 is $entry_a < $entry_b, 0 if $entry_a == $entry_b,
+ * and 1 if $entry_a > $entry_b
*
+ * @param string $entry_a first LSMTree entry
+ * @param string $entry_b first LSMTree entry
+ * @return int result of comparison as described above
*/
public function compare($entry_a, $entry_b)
{
@@ -239,7 +245,15 @@ class LSMTree
return strncmp($entry_a, $entry_b, $key_len);
}
/**
+ * Combines two LSMTree entries with the same key value into a single
+ * entry. The values of an entry begin with number of items stored
+ * followed by items in the format specified by the LSMTree constructor.
+ * So the output items and the two number of items and concatenates the
+ * items themselves.
*
+ * @param string $entry_a first entry to combine
+ * @param string $entry_b second entry to combine
+ * @return string cobined entry
*/
public function mergeEntries($entry_a, $entry_b)
{
@@ -745,7 +759,9 @@ class Tier
}
self::$cache[$name_hash] =
explode($delimiter, file_get_contents($filename));
- if (count(self::$cache[$name_hash]) >= LSMTree::RECORD_CACHE_SIZE) {
+ if (count(self::$cache[$name_hash]) >= LSMTree::RECORD_CACHE_SIZE ||
+ memory_get_usage() >
+ C\INDEX_FILE_MEMORY_LIMIT * C\MEMORY_FILL_FACTOR) {
array_shift(self::$cache);
}
return self::$cache[$name_hash];