diff --git a/bin/fetcher.php b/bin/fetcher.php
index 1342aea94..86b377874 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -39,7 +39,7 @@ define("BASE_DIR", substr(
dirname(realpath($_SERVER['PHP_SELF'])), 0,
-strlen("/bin")));
-ini_set("memory_limit","850M"); //so have enough memory to crawl big pages
+ini_set("memory_limit","600M"); //so have enough memory to crawl big pages
/** Load in global configuration settings */
require_once BASE_DIR.'/configs/config.php';
diff --git a/bin/queue_server.php b/bin/queue_server.php
index b6a8e75d6..ce782d0cd 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -38,7 +38,7 @@ define("BASE_DIR", substr(
dirname(realpath($_SERVER['PHP_SELF'])), 0,
-strlen("/bin")));
-ini_set("memory_limit","1200M"); //so have enough memory to crawl big pages
+ini_set("memory_limit","1000M"); //so have enough memory to crawl big pages
/** Load in global configuration settings */
require_once BASE_DIR.'/configs/config.php';
diff --git a/lib/index_archive_bundle.php b/lib/index_archive_bundle.php
index a3327fcea..fb14f7932 100644
--- a/lib/index_archive_bundle.php
+++ b/lib/index_archive_bundle.php
@@ -266,7 +266,7 @@ class IndexArchiveBundle implements CrawlConstants
in case merge tiers after adding to dictionary
*/
$this->current_shard = new IndexShard(
- $current_index_shard_file, $this->generation_info['ACTIVE'],
+ $current_index_shard_file, $this->generation_info['ACTIVE'],
$this->num_docs_per_generation, true);
$this->dictionary->addShardDictionary($this->current_shard);
}
diff --git a/lib/index_dictionary.php b/lib/index_dictionary.php
index 5af0ebd71..836fa16b0 100644
--- a/lib/index_dictionary.php
+++ b/lib/index_dictionary.php
@@ -164,7 +164,7 @@ class IndexDictionary implements CrawlConstants
* @param object $index_shard the shard to add the word to the dictionary
* with
*/
- function addShardDictionary(&$index_shard)
+ function addShardDictionary($index_shard)
{
$out_slot = "A";
if(file_exists($this->dir_name."/0/0A.dic")) {
@@ -193,11 +193,11 @@ class IndexDictionary implements CrawlConstants
$first_offset_flag = false;
}
$offset -= $first_offset;
- $out = pack("N", $offset).pack("N", $count);
+ $out = pack("N", $offset) . pack("N", $count);
$last_set = $j;
$last_out = $prefix_info;
charCopy($out, $prefix_string,
- (($i << 8) + $j)*self::PREFIX_ITEM_SIZE,
+ (($i << 8) + $j) * self::PREFIX_ITEM_SIZE,
self::PREFIX_ITEM_SIZE);
}
}
@@ -210,7 +210,7 @@ class IndexDictionary implements CrawlConstants
if($last_set >= 0) {
list($offset, $count) = $last_out;
$next_offset = $base_offset + $offset +
- $count*IndexShard::WORD_ITEM_LEN;
+ $count * IndexShard::WORD_ITEM_LEN;
fwrite($fh, $index_shard->getShardSubstring($last_offset,
$next_offset - $last_offset));
}
@@ -595,7 +595,10 @@ class IndexDictionary implements CrawlConstants
}
/**
- *
+ * Looks up the shard information (which is actually embedded in
+ * the dictionary) for a info:url query
+ * @param string $hash_info_url hash of info:url meta word
+ * @return array summary (to the extent stoed in a shard) data for this url
*/
function getInfoItem($hash_info_url)
{
diff --git a/lib/index_shard.php b/lib/index_shard.php
index 2d0170235..bf6cf0c04 100644
--- a/lib/index_shard.php
+++ b/lib/index_shard.php
@@ -535,7 +535,7 @@ class IndexShard extends PersistentStructure implements
$num_docs_so_far = 0;
$results = array();
- $end = min($this->word_docs_len, $last_offset);
+ $end = min($this->file_len - $this->docids_len, $last_offset);
$num_docs_or_links =
self::numDocsOrLinks($start_offset, $last_offset);
@@ -1504,9 +1504,7 @@ class IndexShard extends PersistentStructure implements
function getDocInfoSubstring($offset, $len)
{
if($this->read_only_from_disk) {
- $base_offset = self::HEADER_LENGTH +
- $this->prefixes_len + $this->words_len + $this->word_docs_len;
-
+ $base_offset = $this->file_len - $this->docids_len;
return $this->getShardSubstring($base_offset + $offset, $len);
}
return substr($this->doc_infos, $offset, $len);