Fix some issues with require_conce in php7 setting with multiple profiles, a=chris

Chris Pollett [2015-09-03 17:Sep:rd]
Fix some issues with require_conce in php7 setting with multiple profiles, a=chris
Filename
index.php
src/controllers/RegisterController.php
src/controllers/SearchController.php
src/executables/QueueServer.php
src/library/BloomFilterFile.php
src/library/IndexArchiveBundle.php
src/library/IndexManager.php
src/library/IndexShard.php
src/library/MailServer.php
src/library/PhraseParser.php
src/library/PriorityQueue.php
src/library/StringArray.php
src/library/Utility.php
src/library/WebArchive.php
src/library/WebQueueBundle.php
src/library/archive_bundle_iterators/WebArchiveBundleIterator.php
src/models/Model.php
diff --git a/index.php b/index.php
index c65cb9e00..fc9555073 100644
--- a/index.php
+++ b/index.php
@@ -43,6 +43,6 @@ function passthruYioopRequest()
         exit();
     }
     define("seekquarry\\yioop\\configs\\REDIRECTS_ON", true);
-    require_once "src/index.php";
+    require_once __DIR__."/src/index.php";
 }
 passthruYioopRequest();
diff --git a/src/controllers/RegisterController.php b/src/controllers/RegisterController.php
index afebdf2bb..118f5348e 100755
--- a/src/controllers/RegisterController.php
+++ b/src/controllers/RegisterController.php
@@ -34,6 +34,7 @@ use seekquarry\yioop\configs as C;
 use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\CrawlConstants;
 use seekquarry\yioop\library\MailServer;
+use seekquarry\yioop\library\UrlParser;
 use seekquarry\yioop\models\LocaleModel;

 /**
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index 50d530f7d..4c5240fc7 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -1564,6 +1564,9 @@ class SearchController extends Controller implements CrawlConstants
         }
         set_error_handler(C\NS_LIB . "yioop_error_handler");
         $body =  $dom->getElementsByTagName('body')->item(0);
+        if(!$body) {
+            return $cache_file;
+        }
         //make tags in body absolute
         $body = $this->canonicalizeLinks($body, $url);
         $first_child = $body->firstChild;
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 6f4b5f6fd..95c400572 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -2017,6 +2017,8 @@ class QueueServer implements CrawlConstants, Join
      * It is possible that a large schedule file is created if someone
      * pastes more than MAX_FETCH_SIZE many urls into the initial seed sites
      * of a crawl in the  UI.
+     *
+     * @param array& $sites array containing to crawl data
      */
     public function dumpBigScheduleToSmall(&$sites)
     {
diff --git a/src/library/BloomFilterFile.php b/src/library/BloomFilterFile.php
index 07e2bcb8a..ac9217dbd 100755
--- a/src/library/BloomFilterFile.php
+++ b/src/library/BloomFilterFile.php
@@ -33,7 +33,7 @@ namespace seekquarry\yioop\library;
 /**
  * For packInt/unpackInt
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";

 /**
  * Code used to manage a bloom filter in-memory and in file.
diff --git a/src/library/IndexArchiveBundle.php b/src/library/IndexArchiveBundle.php
index a526bfad3..860ab5938 100644
--- a/src/library/IndexArchiveBundle.php
+++ b/src/library/IndexArchiveBundle.php
@@ -35,7 +35,7 @@ use seekquarry\yioop\configs as C;
 /**
  * Used for crawlLog and crawlHash
  */
-require_once 'Utility.php';
+require_once __DIR__.'/Utility.php';
 /**
  * Encapsulates a set of web page summaries and an inverted word-index of terms
  * from these summaries which allow one to search for summaries containing a
diff --git a/src/library/IndexManager.php b/src/library/IndexManager.php
index 1a0d1c3cf..e8e10f33a 100644
--- a/src/library/IndexManager.php
+++ b/src/library/IndexManager.php
@@ -35,7 +35,7 @@ use seekquarry\yioop\configs as C;
 /**
  * For crawlHash
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";
 /**
  * Class used to manage open IndexArchiveBundle's while performing
  * a query. Ensures an easy place to obtain references to these bundles
diff --git a/src/library/IndexShard.php b/src/library/IndexShard.php
index c10dc5ba3..02bb4b32b 100644
--- a/src/library/IndexShard.php
+++ b/src/library/IndexShard.php
@@ -35,7 +35,7 @@ use seekquarry\yioop\configs as C;
 /**
  * Load charCopy
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";
 /**
  * Data structure used to store one generation worth of the word document
  * index (inverted index).
diff --git a/src/library/MailServer.php b/src/library/MailServer.php
index 9d619cccc..3f06003ae 100644
--- a/src/library/MailServer.php
+++ b/src/library/MailServer.php
@@ -37,7 +37,7 @@ use seekquarry\yioop\library\MediaConstants;
 /**
  * Timing functions
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";
 /**
  * A small class for communicating with an SMTP server. Used to avoid
  * configuration issues that might be needed with PHP's built-in mail()
diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index 88d269c2d..e7c4b9c00 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -37,11 +37,11 @@ use seekquarry\yioop\library\processors\PageProcessor;
 /**
  * For crawlHash
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";
 /**
  * So know which part of speech tagger to use
  */
-require_once "LocaleFunctions.php";
+require_once __DIR__."/LocaleFunctions.php";
 /**
  * Library of functions used to manipulate words and phrases
  *
diff --git a/src/library/PriorityQueue.php b/src/library/PriorityQueue.php
index a6c9624f1..98921a00a 100755
--- a/src/library/PriorityQueue.php
+++ b/src/library/PriorityQueue.php
@@ -35,7 +35,7 @@ use seekquarry\yioop\configs as C;
 /**
  * Loaded for crawlLog function
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";
 /**
  *
  * Code used to manage a memory efficient priority queue.
diff --git a/src/library/StringArray.php b/src/library/StringArray.php
index 01f6ec2e3..ccfd367ce 100755
--- a/src/library/StringArray.php
+++ b/src/library/StringArray.php
@@ -33,7 +33,7 @@ namespace seekquarry\yioop\library;
 /**
  * Load charCopy
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";
 /**
  * Memory efficient implementation of persistent arrays
  *
diff --git a/src/library/Utility.php b/src/library/Utility.php
index eb62d0d7a..d2c137a3a 100755
--- a/src/library/Utility.php
+++ b/src/library/Utility.php
@@ -70,8 +70,10 @@ function yioop_error_handler($errno, $errstr, $errfile, $errline,
         if (isset($call['function'])) {
             $function .= $call['function'];
         }
-        echo "  $in_or_called $function, line {$call['line']}".
-            " in {$call['file']} \n";
+        $line = (isset($call['line'])) ? $call['line'] : "";
+        $file = (isset($call['file'])) ? $call['file'] : "";
+        echo "  $in_or_called $function, line $line".
+            " in $file \n";
         $in_or_called = "called from";
         $i++;
         if ($i >= $num_lines_of_backtrace) {break; }
diff --git a/src/library/WebArchive.php b/src/library/WebArchive.php
index efbad7157..5504d70c8 100755
--- a/src/library/WebArchive.php
+++ b/src/library/WebArchive.php
@@ -35,7 +35,7 @@ use seekquarry\yioop\configs as C;
 /**
  * Loads crawlLog functions if needed
  */
-require_once "Utility.php";
+require_once __DIR__."/Utility.php";
 /**
  *
  * Code used to manage web archive files
diff --git a/src/library/WebQueueBundle.php b/src/library/WebQueueBundle.php
index 5d7501449..5e9e44fa3 100755
--- a/src/library/WebQueueBundle.php
+++ b/src/library/WebQueueBundle.php
@@ -36,7 +36,7 @@ use seekquarry\yioop\library\compressors\NonCompressor;
 /**
  * Used for the crawlHash function
  */
-require_once 'Utility.php';
+require_once __DIR__.'/Utility.php';
 /**
  * Encapsulates the data structures needed to have a queue of to crawl urls
  *
diff --git a/src/library/archive_bundle_iterators/WebArchiveBundleIterator.php b/src/library/archive_bundle_iterators/WebArchiveBundleIterator.php
index 3c6314784..b03097ace 100644
--- a/src/library/archive_bundle_iterators/WebArchiveBundleIterator.php
+++ b/src/library/archive_bundle_iterators/WebArchiveBundleIterator.php
@@ -35,7 +35,7 @@ use seekquarry\yioop\library\CrawlConstants;
 use seekquarry\yioop\library\WebArchiveBundle;

 /** For crawlTimeoutLog */
-require_once BASE_DIR.'/library/Utility.php';
+require_once __DIR__.'/../Utility.php';
 /**
  * Class used to model iterating documents indexed in
  * an WebArchiveBundle. This would typically be for the purpose
diff --git a/src/models/Model.php b/src/models/Model.php
index 4fb906790..d501c4016 100755
--- a/src/models/Model.php
+++ b/src/models/Model.php
@@ -213,6 +213,9 @@ class Model implements CrawlConstants
      * @param string $text haystack to extract snippet from
      * @param array $words keywords used to make look in haystack
      * @param string $description_length length of the description desired
+     * @param bool $words_change getSnippets might be called many times on
+     *      the same search page with the same $words, if true then the
+     *      preprocessing of $words is avoided and cached versions are used
      * @return string a concatenation of the extracted snippets of each word
      */
     public function getSnippets($text, $words, $description_length,
ViewGit