diff --git a/controllers/admin_controller.php b/controllers/admin_controller.php index bf06bbd25..45a21e7a5 100755 --- a/controllers/admin_controller.php +++ b/controllers/admin_controller.php @@ -43,10 +43,6 @@ require_once BASE_DIR."/lib/page_rule_parser.php"; require_once BASE_DIR."/lib/classifiers/classifier.php"; /** Loads crawl_daemon to manage news_updater */ require_once BASE_DIR."/lib/crawl_daemon.php"; -/** get processors for different file types */ -foreach(glob(BASE_DIR."/lib/processors/*_processor.php") as $filename) { - require_once $filename; -} /** * Controller used to handle admin functionalities such as * modify login and password, CREATE, UPDATE,DELETE operations diff --git a/controllers/components/crawl_component.php b/controllers/components/crawl_component.php index 67ca557b2..5b27b2760 100644 --- a/controllers/components/crawl_component.php +++ b/controllers/components/crawl_component.php @@ -1086,18 +1086,18 @@ class CrawlComponent extends Component implements CrawlConstants tl('crawl_component_page_options_updated')."</h1>')"; } $test_processors = array( - "text/html" => "HtmlProcessor", - "text/asp" => "HtmlProcessor", - "text/xml" => "XmlProcessor", - "text/robot" => "RobotProcessor", - "application/xml" => "XmlProcessor", - "application/xhtml+xml" => "HtmlProcessor", - "application/rss+xml" => "RssProcessor", - "application/atom+xml" => "RssProcessor", - "text/rtf" => "RtfProcessor", - "text/plain" => "TextProcessor", - "text/csv" => "TextProcessor", - "text/tab-separated-values" => "TextProcessor", + "text/html" => "html", + "text/asp" => "html", + "text/xml" => "xml", + "text/robot" => "robot", + "application/xml" => "xml", + "application/xhtml+xml" => "html", + "application/rss+xml" => "rss", + "application/atom+xml" => "rss", + "text/rtf" => "rtf", + "text/plain" => "text", + "text/csv" => "text", + "text/tab-separated-values" => "text", ); $data['MIME_TYPES'] = array_keys($test_processors); $data['page_type'] = "text/html"; @@ -1132,7 +1132,11 @@ class CrawlComponent extends Component implements CrawlConstants $site[self::ENCODING] = guessEncodingHtml($_REQUEST['TESTPAGE']); } - $processor_name = $test_processors[$site[self::TYPE]]; + $prefix_name = $test_processors[$site[self::TYPE]]; + $processor_name = ucfirst($prefix_name). + "Processor"; + require_once(BASE_DIR . + "/lib/processors/{$prefix_name}_processor.php"); $plugin_processors = array(); if (isset($seed_info['indexing_plugins']['plugins'])) { foreach($seed_info['indexing_plugins']['plugins'] as $plugin) { @@ -1159,7 +1163,7 @@ class CrawlComponent extends Component implements CrawlConstants $site[self::URL]); set_error_handler("yioop_error_handler"); - if($page_processor != "RobotProcessor" && + if($processor_name != "RobotProcessor" && !isset($doc_info[self::JUST_METAS])) { $doc_info[self::LINKS] = UrlParser::pruneLinks( $doc_info[self::LINKS]); diff --git a/controllers/controller.php b/controllers/controller.php index eddb097fb..f91c0e23b 100755 --- a/controllers/controller.php +++ b/controllers/controller.php @@ -158,7 +158,7 @@ abstract class Controller } /** * Dynamic loader for Plugin objects which might live on the current - * View + * Controller * * @param string $plugin name of Plugin to return */ diff --git a/lib/processors/bmp_processor.php b/lib/processors/bmp_processor.php index f02867f21..9f0b9a17d 100644 --- a/lib/processors/bmp_processor.php +++ b/lib/processors/bmp_processor.php @@ -66,7 +66,7 @@ class BmpProcessor extends ImageProcessor */ const MAX_DIM = 1000; /** - * {@inheritdoc} + * {@inheritDoc} */ function process($page, $url) { diff --git a/lib/processors/gif_processor.php b/lib/processors/gif_processor.php index e70966104..794120941 100755 --- a/lib/processors/gif_processor.php +++ b/lib/processors/gif_processor.php @@ -50,7 +50,7 @@ require_once BASE_DIR."/lib/processors/image_processor.php"; class GifProcessor extends ImageProcessor { /** - * {@inheritdoc} + * {@inheritDoc} */ function process($page, $url) { diff --git a/lib/processors/git_xml_processor.php b/lib/processors/git_xml_processor.php index 8d6970f96..c15542c92 100755 --- a/lib/processors/git_xml_processor.php +++ b/lib/processors/git_xml_processor.php @@ -47,7 +47,7 @@ $PAGE_PROCESSORS = array_merge($PAGE_PROCESSORS, $add_types); /** * Load the base class */ -require_once BASE_DIR."/lib/processors/page_processor.php"; +require_once BASE_DIR."/lib/processors/text_processor.php"; /** * So can extract parts of the URL if need to guess lang */ diff --git a/lib/processors/java_processor.php b/lib/processors/java_processor.php index ffca06b05..213122f13 100755 --- a/lib/processors/java_processor.php +++ b/lib/processors/java_processor.php @@ -47,7 +47,7 @@ $PAGE_PROCESSORS = array_merge($PAGE_PROCESSORS, $add_types); /** * Load the base class */ -require_once BASE_DIR."/lib/processors/page_processor.php"; +require_once BASE_DIR."/lib/processors/text_processor.php"; /** * So can extract parts of the URL if need to guess lang */ diff --git a/lib/processors/jpg_processor.php b/lib/processors/jpg_processor.php index ca5f02c88..44506c8ff 100755 --- a/lib/processors/jpg_processor.php +++ b/lib/processors/jpg_processor.php @@ -53,7 +53,7 @@ require_once BASE_DIR."/lib/processors/image_processor.php"; class JpgProcessor extends ImageProcessor { /** - * {@inheritdoc} + * {@inheritDoc} */ function process($page, $url) { diff --git a/lib/processors/png_processor.php b/lib/processors/png_processor.php index 344b6cef2..ef65e5122 100755 --- a/lib/processors/png_processor.php +++ b/lib/processors/png_processor.php @@ -50,7 +50,7 @@ require_once BASE_DIR."/lib/processors/image_processor.php"; class PngProcessor extends ImageProcessor { /** - * {@inheritdoc} + * {@inheritDoc} */ function process($page, $url) { diff --git a/lib/processors/ppt_processor.php b/lib/processors/ppt_processor.php index 36db9cc94..82e8e337d 100755 --- a/lib/processors/ppt_processor.php +++ b/lib/processors/ppt_processor.php @@ -113,7 +113,7 @@ class PptProcessor extends TextProcessor } else { $state = self::PPT_IGNORING; } - break + break; case self::READ_LEN_TEXT_SEG: if($text_len_pos < 4) { $text_len += ($ascii << ($text_len_pos * 8)); diff --git a/lib/processors/python_processor.php b/lib/processors/python_processor.php index c1943d6c3..29e941860 100755 --- a/lib/processors/python_processor.php +++ b/lib/processors/python_processor.php @@ -47,7 +47,7 @@ $PAGE_PROCESSORS = array_merge($PAGE_PROCESSORS, $add_types); /** * Load the base class */ -require_once BASE_DIR."/lib/processors/page_processor.php"; +require_once BASE_DIR."/lib/processors/text_processor.php"; /** * So can extract parts of the URL if need to guess lang */ diff --git a/models/crawl_model.php b/models/crawl_model.php index d361ee52f..ed60941a4 100755 --- a/models/crawl_model.php +++ b/models/crawl_model.php @@ -65,19 +65,19 @@ class CrawlModel extends ParallelModel implements CrawlConstants */ var $suggest_url_file; /** - * {@inheritdoc} + * {@inheritDoc} */ function __construct($db_name = DB_NAME, $connect = true) { $this->suggest_url_file = WORK_DIRECTORY."/data/suggest_url.txt"; parent::__construct($db_name, $connect); } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function fromCallback($args) { return "CRAWL_MIXES"; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function rowCallback($row, $with_components) { if($with_components) { diff --git a/models/cron_model.php b/models/cron_model.php index c541ffa19..e12fd6fc1 100644 --- a/models/cron_model.php +++ b/models/cron_model.php @@ -58,7 +58,7 @@ class CronModel extends Model */ var $cron_table; /** - * {@inheritdoc} + * {@inheritDoc} */ function __construct($db_name = DB_NAME, $connect = true) { diff --git a/models/datasources/mysql_manager.php b/models/datasources/mysql_manager.php index 956c51d77..38d379123 100755 --- a/models/datasources/mysql_manager.php +++ b/models/datasources/mysql_manager.php @@ -61,7 +61,7 @@ class MysqlManager extends PdoManager * @var string */ var $special_quote = "`"; - /** {@inheritdoc} */ + /** {@inheritDoc} */ function connect($db_host = DB_HOST, $db_user = DB_USER, $db_password = DB_PASSWORD, $db_name = DB_NAME) { diff --git a/models/datasources/pdo_manager.php b/models/datasources/pdo_manager.php index 2c151dd4a..856ef5e1e 100644 --- a/models/datasources/pdo_manager.php +++ b/models/datasources/pdo_manager.php @@ -67,7 +67,7 @@ class PdoManager extends DatasourceManager * @var mixed */ var $to_upper_dbms; - /** {@inheritdoc} */ + /** {@inheritDoc} */ function connect($db_host = DB_HOST, $db_user = DB_USER, $db_password = DB_PASSWORD, $db_name = DB_NAME) { @@ -86,13 +86,13 @@ class PdoManager extends DatasourceManager } return $this->pdo; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function disconnect() { unset($this->pdo); $this->pdo = NULL; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function exec($sql, $params = array()) { static $last_sql = NULL; @@ -126,12 +126,12 @@ class PdoManager extends DatasourceManager $last_sql = $sql; return $result; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function affectedRows() { return $this->num_affected; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function insertID($table = "") { if($table && $this->to_upper_dbms == "PGSQL") { @@ -140,7 +140,7 @@ class PdoManager extends DatasourceManager } return $this->pdo->lastInsertId(); } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function fetchArray($result) { if(!$result) { @@ -156,7 +156,7 @@ class PdoManager extends DatasourceManager } return $out_row; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function escapeString($str) { return substr($this->pdo->quote($str), 1, -1); diff --git a/models/datasources/sqlite3_manager.php b/models/datasources/sqlite3_manager.php index c3f7cca56..3568704fe 100644 --- a/models/datasources/sqlite3_manager.php +++ b/models/datasources/sqlite3_manager.php @@ -50,7 +50,7 @@ require_once BASE_DIR."/models/datasources/pdo_manager.php"; */ class Sqlite3Manager extends PdoManager { - /** {@inheritdoc} */ + /** {@inheritDoc} */ function __construct() { parent::__construct(); diff --git a/models/group_model.php b/models/group_model.php index 08223770e..c6bf0fdfc 100644 --- a/models/group_model.php +++ b/models/group_model.php @@ -98,7 +98,7 @@ class GroupModel extends Model G.MEMBER_ACCESS $join_date"; return $select; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function fromCallback($args) { return "GROUPS G, USER_GROUP UG, USERS O"; diff --git a/models/locale_model.php b/models/locale_model.php index 8da7a53ae..12ff5e5c9 100755 --- a/models/locale_model.php +++ b/models/locale_model.php @@ -114,7 +114,7 @@ class LocaleModel extends Model * @var array */ var $any_fields = array("mode"); - /** {@inheritdoc} */ + /** {@inheritDoc} */ function selectCallback($args = NULL) { return "LOCALE_ID, LOCALE_TAG, LOCALE_NAME, WRITING_MODE"; diff --git a/models/machine_model.php b/models/machine_model.php index 20b4f1f14..56a37a1ec 100644 --- a/models/machine_model.php +++ b/models/machine_model.php @@ -48,7 +48,7 @@ require_once BASE_DIR."/lib/fetch_url.php"; class MachineModel extends Model { var $search_table_column_map = array("name" => "NAME"); - /** {@inheritdoc} */ + /** {@inheritDoc} */ function postQueryCallback($rows) { return $this->getMachineStatuses($rows); diff --git a/models/parallel_model.php b/models/parallel_model.php index b88c382a5..a22b6c6c6 100755 --- a/models/parallel_model.php +++ b/models/parallel_model.php @@ -85,7 +85,7 @@ class ParallelModel extends Model implements CrawlConstants */ const MIN_DESCRIPTION_LENGTH = 100; /** - * {@inheritdoc} + * {@inheritDoc} */ function __construct($db_name = DB_NAME, $connect = true) { diff --git a/models/phrase_model.php b/models/phrase_model.php index 7313e60fc..be089e6a8 100755 --- a/models/phrase_model.php +++ b/models/phrase_model.php @@ -96,7 +96,7 @@ class PhraseModel extends ParallelModel */ const NUM_CACHE_PAGES = 10; /** - * {@inheritdoc} + * {@inheritDoc} */ function __construct($db_name = DB_NAME, $connect = true) { diff --git a/models/profile_model.php b/models/profile_model.php index e3f3703b9..583aab3a3 100755 --- a/models/profile_model.php +++ b/models/profile_model.php @@ -76,7 +76,7 @@ class ProfileModel extends Model 'USER_SESSION', 'VISITOR', 'VERSION' ); /** - * {@inheritdoc} + * {@inheritDoc} */ function __construct($db_name = DB_NAME, $connect = true) { diff --git a/models/role_model.php b/models/role_model.php index 87e4813ac..240c9dcfc 100644 --- a/models/role_model.php +++ b/models/role_model.php @@ -50,7 +50,7 @@ class RoleModel extends Model * @var array */ var $search_table_column_map = array("name"=>"NAME"); - /** {@inheritdoc} */ + /** {@inheritDoc} */ function selectCallback($args = NULL) { return "NAME"; diff --git a/models/searchfilters_model.php b/models/searchfilters_model.php index d34281272..0dfc86a59 100644 --- a/models/searchfilters_model.php +++ b/models/searchfilters_model.php @@ -55,7 +55,7 @@ class SearchfiltersModel extends Model implements CrawlConstants */ var $dir_name; /** - * {@inheritdoc} + * {@inheritDoc} */ function __construct($db_name = DB_NAME, $connect = true) { diff --git a/models/user_model.php b/models/user_model.php index 0d1f6064c..59031f595 100755 --- a/models/user_model.php +++ b/models/user_model.php @@ -62,18 +62,17 @@ class UserModel extends Model * @var array */ var $any_fields = array("status"); - /** {@inheritdoc} */ + /** {@inheritDoc} */ function selectCallback($args = NULL) { return "USER_ID, USER_NAME, FIRST_NAME, LAST_NAME, EMAIL, STATUS"; } - /** {@inheritdoc} */ - */ + /** {@inheritDoc} */ function fromCallback($args = NULL) { return "USERS"; } - /** {@inheritdoc} */ + /** {@inheritDoc} */ function whereCallback($args = NULL) { return "USER_ID != '".PUBLIC_USER_ID."'";