diff --git a/bin/fetcher.php b/bin/fetcher.php index a616603db..6060c696f 100755 --- a/bin/fetcher.php +++ b/bin/fetcher.php @@ -414,6 +414,7 @@ class Fetcher implements CrawlConstants $this->disallowed_sites = array(); $this->page_rule_parser = NULL; + $this->video_sources = array(); $this->hosts_with_errors = array(); @@ -1594,12 +1595,10 @@ class Fetcher implements CrawlConstants $summarized_site_pages[$i][self::LINKS] = $site[self::DOC_INFO][self::LINKS]; } - if(isset($site[self::DOC_INFO][self::THUMB])) { $summarized_site_pages[$i][self::THUMB] = $site[self::DOC_INFO][self::THUMB]; } - if(isset($site[self::DOC_INFO][self::SUBDOCS])) { $this->processSubdocs($i, $site, $summarized_site_pages, $stored_site_pages); @@ -1918,7 +1917,7 @@ class Fetcher implements CrawlConstants "UrlParser::getHost"); if($from_sitemap) { $this->found_sites[self::TO_CRAWL][$part][] = - array($url, $old_weight * $sitemap_link_weight / + array($url, $old_weight * $sitemap_link_weight / (($i+1)*($i+1) * $square_factor), $site_hash.$i); } else if ($this->crawl_order == self::BREADTH_FIRST) { @@ -2204,7 +2203,7 @@ class Fetcher implements CrawlConstants crawlLog($info[self::SUMMARY]); } crawlLog("Trying again in 5 seconds..."); - if($i == 1) { + if($i == 1) { /* maybe server has limited memory and two high a post_max_size */ diff --git a/bin/queue_server.php b/bin/queue_server.php index 4e71a3c8d..2774c8788 100755 --- a/bin/queue_server.php +++ b/bin/queue_server.php @@ -1720,7 +1720,7 @@ class QueueServer implements CrawlConstants, Join foreach($to_crawl_sites as $triple) { $url = & $triple[0]; if(strlen($url) < 7) { // strlen("http://") - continue; + continue; } if($url[0] != 'h' && trim($url) == "localhost") { $url = "http://localhost/"; @@ -1730,7 +1730,7 @@ class QueueServer implements CrawlConstants, Join unset($triple[2]); // so triple is now a pair $host_url = UrlParser::getHost($url); if(strlen($host_url) < 7) { // strlen("http://") - continue; + continue; } $host_with_robots = $host_url."/robots.txt"; $robots_in_queue = @@ -2266,4 +2266,4 @@ if(!defined("UNIT_TEST_MODE")) { $queue_server->start(); } -?> +?> \ No newline at end of file diff --git a/configs/config.php b/configs/config.php index 596f4ae9f..7f188abe8 100644 --- a/configs/config.php +++ b/configs/config.php @@ -486,7 +486,7 @@ define ('EN_RATIO', 0.9); /** Number of total description deemed title */ define ('AD_HOC_TITLE_LENGTH', 10); -/** Used to say number of bytes in histogram bar (stats page) for file +/** Used to say number of bytes in histogram bar (stats page) for file download sizes */ define('DOWNLOAD_SIZE_INTERVAL', 5000); diff --git a/controllers/admin_controller.php b/controllers/admin_controller.php index 749b18e0a..1e8b28491 100755 --- a/controllers/admin_controller.php +++ b/controllers/admin_controller.php @@ -1574,7 +1574,6 @@ class AdminController extends Controller implements CrawlConstants if(!isset($_REQUEST['load_option'])) { $data = array_merge($data, $profile); } else { - $this->updateProfileFields($data, $profile, array('IP_LINK','CACHE_LINK', 'SIMILAR_LINK', 'IN_LINK', 'SIGNIN_LINK', 'SUBSEARCH_LINK','WORD_SUGGEST')); @@ -1601,8 +1600,8 @@ class AdminController extends Controller implements CrawlConstants $this->profileModel->updateProfile(WORK_DIRECTORY, array(), $profile); } - $data['INDEXED_FILE_TYPES'] = array(); + $data['INDEXED_FILE_TYPES'] = array(); $filetypes = array(); foreach($INDEXED_FILE_TYPES as $filetype) { $ison =false; diff --git a/controllers/fetch_controller.php b/controllers/fetch_controller.php index f36f45115..21ed1f6e3 100755 --- a/controllers/fetch_controller.php +++ b/controllers/fetch_controller.php @@ -598,4 +598,4 @@ class FetchController extends Controller implements CrawlConstants return $list; } } -?> \ No newline at end of file +?> diff --git a/index.php b/index.php index dadfe1715..c7a8e55c4 100755 --- a/index.php +++ b/index.php @@ -201,4 +201,4 @@ function checkAllowedController($controller_name) return in_array($controller_name, $available_controllers) ; } -?> +?> \ No newline at end of file diff --git a/lib/crawl_daemon.php b/lib/crawl_daemon.php index a5224e336..b2b5c538f 100644 --- a/lib/crawl_daemon.php +++ b/lib/crawl_daemon.php @@ -224,7 +224,7 @@ class CrawlDaemon implements CrawlConstants $php = "php"; if((isset($_SERVER['_']) && stristr($_SERVER['_'], 'hhvm')) || - (isset($_SERVER['SERVER_SOFTWARE']) && + (isset($_SERVER['SERVER_SOFTWARE']) && $_SERVER['SERVER_SOFTWARE'] == "HPHP")) { $php = 'hhvm -f'; } @@ -357,4 +357,4 @@ class CrawlDaemon implements CrawlConstants } } - ?> + ?> \ No newline at end of file diff --git a/lib/index_dictionary.php b/lib/index_dictionary.php index 5b3191b15..31cd2ee92 100644 --- a/lib/index_dictionary.php +++ b/lib/index_dictionary.php @@ -841,4 +841,4 @@ class IndexDictionary implements CrawlConstants } - ?> \ No newline at end of file + ?> diff --git a/lib/indexing_plugins/indexing_plugin.php b/lib/indexing_plugins/indexing_plugin.php index 9c8302440..cf1798595 100644 --- a/lib/indexing_plugins/indexing_plugin.php +++ b/lib/indexing_plugins/indexing_plugin.php @@ -157,4 +157,4 @@ abstract class IndexingPlugin static function getAdditionalMetaWords() {return array();} } -?> +?> \ No newline at end of file diff --git a/views/admin_view.php b/views/admin_view.php index e9f3687c0..80d22ace4 100755 --- a/views/admin_view.php +++ b/views/admin_view.php @@ -34,14 +34,13 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * View responsible for drawing the admin pages of the - * SeekQuarry search engine site + * View responsible for drawing the admin pages of the SeekQuarry search engine + * site. * * @author Chris Pollett * @package seek_quarry * @subpackage view */ - class AdminView extends View { /** This view is drawn on a web layout diff --git a/views/elements/pageoptions_element.php b/views/elements/pageoptions_element.php index ae5101eaf..ff5b26699 100644 --- a/views/elements/pageoptions_element.php +++ b/views/elements/pageoptions_element.php @@ -35,18 +35,15 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** * This element is used to render the Page Options admin activity - * This activity lets a usercontrol the amount of web pages downloaded, + * This activity lets a user control the amount of web pages downloaded, * the recrawl frequency, the file types, etc of the pages crawled * * @author Chris Pollett - * * @package seek_quarry * @subpackage element */ - class PageOptionsElement extends Element { - /** * Draws the page options element to the output buffer * @@ -402,4 +399,4 @@ class PageOptionsElement extends Element <?php } } -?> +?> \ No newline at end of file diff --git a/views/helpers/displayresults_helper.php b/views/helpers/displayresults_helper.php index 339b9b272..94bf9ed85 100644 --- a/views/helpers/displayresults_helper.php +++ b/views/helpers/displayresults_helper.php @@ -67,4 +67,4 @@ class DisplayresultsHelper extends Helper } } } -?> \ No newline at end of file +?>