diff --git a/bin/fetcher.php b/bin/fetcher.php
index d48d24b33..db42db84b 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -920,7 +920,8 @@ class Fetcher implements CrawlConstants
}
//if not UTF-8 convert before doing anything else
if(isset($site[self::ENCODING]) &&
- $site[self::ENCODING] != "UTF-8" &&
+ $site[self::ENCODING] != "UTF-8" &&
+ $site[self::ENCODING] != "" &&
($page_processor == "TextProcessor" ||
is_subclass_of($page_processor, "TextProcessor"))) {
if(!mb_check_encoding($site[self::PAGE],
diff --git a/lib/index_bundle_iterators/index_bundle_iterator.php b/lib/index_bundle_iterators/index_bundle_iterator.php
index c87091604..62e697e1d 100644
--- a/lib/index_bundle_iterators/index_bundle_iterator.php
+++ b/lib/index_bundle_iterators/index_bundle_iterator.php
@@ -245,7 +245,6 @@ abstract class IndexBundleIterator implements CrawlConstants
*/
function advanceSeenDocs()
{
-
if($this->current_block_fresh != true) {
$doc_block = $this->currentDocsWithWord();
if($doc_block == -1 || !is_array($doc_block) ) {
diff --git a/lib/phrase_parser.php b/lib/phrase_parser.php
index 00482cb0a..3119eb497 100755
--- a/lib/phrase_parser.php
+++ b/lib/phrase_parser.php
@@ -109,27 +109,27 @@ class PhraseParser
}
/**
- * Extracts all phrases (sequences of adjacent words) from $string of
- * length less than or equal to $len.
- *
- * @param string $string subject to extract phrases from
- * @param int $len longest length of phrases to consider
- * @param string $lang locale tag for stemming
- * @return array of phrases
- */
- static function extractPhrases($string,
- $len = MAX_PHRASE_LEN, $lang = NULL)
- {
- $phrases = array();
-
- for($i = 0; $i < $len; $i++) {
- $phrases =
- array_merge($phrases,
- self::extractPhrasesOfLength($string, $i, $lang));
- }
-
- return $phrases;
- }
+ * Extracts all phrases (sequences of adjacent words) from $string of
+ * length less than or equal to $len.
+ *
+ * @param string $string subject to extract phrases from
+ * @param int $len longest length of phrases to consider
+ * @param string $lang locale tag for stemming
+ * @return array of phrases
+ */
+ static function extractPhrases($string,
+ $len = MAX_PHRASE_LEN, $lang = NULL)
+ {
+ $phrases = array();
+
+ for($i = 0; $i < $len; $i++) {
+ $phrases =
+ array_merge($phrases,
+ self::extractPhrasesOfLength($string, $i, $lang));
+ }
+
+ return $phrases;
+ }
/**
* Extracts all phrases (sequences of adjacent words) from $string of
diff --git a/models/phrase_model.php b/models/phrase_model.php
index 979cccc68..e5b34f096 100755
--- a/models/phrase_model.php
+++ b/models/phrase_model.php
@@ -517,13 +517,9 @@ class PhraseModel extends Model
*/
$query_words = explode(" ", $phrase_string); //not stemmed
- /*$base_words = //Commented by Ravi Dhillon
- array_keys(PhraseParser::extractPhrasesAndCount($phrase_string,
- MAX_PHRASE_LEN, getLocaleTag())); //stemmed, if have stemmer
- */
- $base_words = //Added by Ravi Dhillon
- PhraseParser::extractPhrases($phrase_string,MAX_PHRASE_LEN,
- getLocaleTag()); //stemmed, if have stemmer
+ $base_words =
+ PhraseParser::extractPhrases($phrase_string,MAX_PHRASE_LEN,
+ getLocaleTag()); //stemmed, if have stemmer
$words = array_merge($base_words, $found_metas);
if(QUERY_STATISTICS) {
$this->query_info['QUERY'] .= "$in3<i>Index</i>: ".
@@ -571,7 +567,6 @@ class PhraseModel extends Model
$restrict_phrases = $quoteds;
- //$hashes = array_unique($hashes); //Commented by Ravi Dhillon
if(count($hashes) > 0) {
$word_keys = array_slice($hashes, 0, MAX_QUERY_TERMS);
} else {
@@ -825,27 +820,27 @@ class PhraseModel extends Model
foreach($word_structs as $word_struct) {
if(!is_array($word_struct)) { continue;}
$word_keys = $word_struct["KEYS"];
- $distinct_word_keys = array_unique($word_keys); //Added by Ravi Dhillon
+ $distinct_word_keys = array_unique($word_keys);
$restrict_phrases = $word_struct["RESTRICT_PHRASES"];
$disallow_keys = $word_struct["DISALLOW_KEYS"];
$index_archive = $word_struct["INDEX_ARCHIVE"];
$weight = $word_struct["WEIGHT"];
$num_word_keys = count($word_keys);
- $total_iterators = count($distinct_word_keys); //Modified by Ravi Dhillon
+ $total_iterators = count($distinct_word_keys);
$word_iterators = array();
- $word_iterator_map = array(); //Added by Ravi Dhillon
+ $word_iterator_map = array();
if($num_word_keys < 1) {continue;}
- for($i = 0; $i < $total_iterators; $i++) { //Modified by Ravi Dhillon
- $word_iterators[$i] =
- new WordIterator($distinct_word_keys[$i], $index_archive, //Modified by Ravi Dhillon
- false, $filter);
- foreach ($word_keys as $index => $key) { //Added by Ravi Dhillon
- if($key == $distinct_word_keys[$i]){
- $word_iterator_map[$index] = $i;
- }
- }
+ for($i = 0; $i < $total_iterators; $i++) {
+ $word_iterators[$i] =
+ new WordIterator($distinct_word_keys[$i], $index_archive,
+ false, $filter);
+ foreach ($word_keys as $index => $key) {
+ if($key == $distinct_word_keys[$i]){
+ $word_iterator_map[$index] = $i;
+ }
+ }
}
$num_disallow_keys = count($disallow_keys);
if($num_disallow_keys > 0) {
@@ -862,7 +857,8 @@ class PhraseModel extends Model
if($num_word_keys == 1) {
$base_iterator = $word_iterators[0];
} else {
- $base_iterator = new IntersectIterator($word_iterators,$word_iterator_map); //Modified by Ravi Dhillon
+ $base_iterator = new IntersectIterator(
+ $word_iterators,$word_iterator_map);
}
if($restrict_phrases == NULL && $disallow_keys == array() &&
$weight == 1) {