revert some unpackword code to see if more stable, more getsnippet tweaks, a=chris
revert some unpackword code to see if more stable, more getsnippet tweaks, a=chris
diff --git a/lib/index_shard.php b/lib/index_shard.php
index e47d9b8a1..ea52b0a70 100644
--- a/lib/index_shard.php
+++ b/lib/index_shard.php
@@ -1520,8 +1520,6 @@ class IndexShard extends PersistentStructure implements
}
$num_lists = count($this->words);
$cnt = 0;
- $base_offset = 0;
- $mega = 1000000;
foreach($this->words as $word_id => $postings_info) {
/* we are ignoring the first four bytes which contains
generation info
@@ -1537,14 +1535,7 @@ class IndexShard extends PersistentStructure implements
8));
if(!isset($tmp[2])) {continue; }
list(, $offset, $len) = $tmp;
- $diff_offset = $offset - $base_offset;
- $postings = substr($this->word_docs, $diff_offset, $len);
- if($diff_offset > $mega) {
- $num_megas = floor($diff_offset / $mega);
- $this->word_docs = substr($this->word_docs,
- $num_megas * $mega);
- $base_offset += $num_megas * $mega;
- }
+ $postings = substr($this->word_docs, $offset, $len);
$this->words[$word_id] = $postings;
}
$cnt++;
diff --git a/models/model.php b/models/model.php
index dbb41b5a5..ab54825b4 100755
--- a/models/model.php
+++ b/models/model.php
@@ -263,23 +263,23 @@ class Model implements CrawlConstants
}
continue;
}
-
- $word_locations = array();
+ $word_locations = array();
foreach($words as $word) {
$qword = "/".preg_quote($word)."/ui";
preg_match_all($qword, $text_source, $positions,
PREG_OFFSET_CAPTURE);
+
if(isset($positions[0]) && is_array($positions[0])) {
$positions = $positions[0];
foreach($positions as $position) {
- $word_locations[$position[1]] = $word;
+ $word_locations[] = $position[1];
}
}
}
$high = 0;
- ksort($word_locations);
- foreach($word_locations as $pos => $word) {
+ sort($word_locations);
+ foreach($word_locations as $pos) {
if($pos < $high) continue;
$pre_low = ($pos >= SNIPPET_LENGTH_LEFT) ?
$pos - SNIPPET_LENGTH_LEFT: 0;