diff --git a/src/configs/Config.php b/src/configs/Config.php index 49c8c3441..169718eeb 100755 --- a/src/configs/Config.php +++ b/src/configs/Config.php @@ -616,8 +616,8 @@ if (!nsdefined("MAX_VIDEO_CONVERT_SIZE")) { } /** * The maximum time limit in seconds where if a file is not converted by the - * time it will be picked up again by the slave media updater - * This value largely depends on the no of slave media updaters that we have + * time it will be picked up again by the client media updater + * This value largely depends on the no of client media updaters that we have * and also the maximum video size that would be uploaded to yioop. * This value should be kept more than the sleeping time of media updater * loop to avoid conversion of same file multiple times. diff --git a/src/controllers/JobsController.php b/src/controllers/JobsController.php index 3ab323b22..36b08b8db 100644 --- a/src/controllers/JobsController.php +++ b/src/controllers/JobsController.php @@ -37,7 +37,15 @@ use seekquarry\yioop\library\MediaConstants; use seekquarry\yioop\library\UrlParser; /** - * + * This class is used to handle requests from a MediaUpdater to a name server + * There are three main types of requests: getUpdateProperties, and + * for any job that the MediaUpdater might be running, its getTasks, and + * putTasks request. getUpdateProperties is supposed to provide configuration + * settings for the MediaUpdater. A MediaUpdater might be running several + * periodic jobs. The getTasks requests of a job is used to see if there + * is any new work available of that job type on the name server. A + * putTasks request is used to handle any computed data sent back from a + * MediaUpdater to the name server. * * @author Chris Pollett */ @@ -51,7 +59,7 @@ class JobsController extends Controller implements CrawlConstants, public $activities = ["getUpdateProperties"]; /** * Checks that the request seems to be coming from a legitimate - * media_updater then determines which job's activity is being + * MediaUpdater then determines which job's activity is being * requested and calls that activity for processing. * */ diff --git a/src/library/media_jobs/BulkEmailJob.php b/src/library/media_jobs/BulkEmailJob.php index 614b73d97..b53fc3702 100644 --- a/src/library/media_jobs/BulkEmailJob.php +++ b/src/library/media_jobs/BulkEmailJob.php @@ -22,7 +22,8 @@ * * END LICENSE * - * @author Chris Pollett chris@pollett.org + * @author Chris Pollett chris@pollett.org (initial MediaJob class + * and subclasses based on work of Pooja Mishra for her master's) * @license http://www.gnu.org/licenses/ GPL3 * @link http://www.seekquarry.com/ * @copyright 2009 - 2015 @@ -35,7 +36,9 @@ use seekquarry\yioop\library as L; use seekquarry\yioop\library\MailServer; /** - * + * MediaJob class for sending out emails from a Yioop instance (either in + * response to account registrations or in response to group posts and similar + * activities) */ class BulkEmailJob extends MediaJob { @@ -45,7 +48,7 @@ class BulkEmailJob extends MediaJob */ public $mail_server; /** - * + * Set up the MailServer object used to actually send mail */ public function init() { @@ -54,7 +57,10 @@ class BulkEmailJob extends MediaJob C\MAIL_SECURITY); } /** + * Bulk mail runs if the media updater is in distributed mode or if + * Yioop configured to send mail from media updater * + * @return true if bulk mail task should be run. */ public function checkPrerequisites() { @@ -102,13 +108,18 @@ class BulkEmailJob extends MediaJob } } /** + * Emails a list of emails provided by the name server to the media updater + * client * + * @param array $tasks contains emails which should be sent out + * @return mixed data to send back to name server (in this case the name + * of the email file that was completely sent) */ public function doTasks($tasks) { if (!isset($tasks["name"]) || !isset($tasks["data"])) { L\crawlLog("...Email Task received incomplete !"); - return; + return null; } L\crawlLog("----Email file name: {$tasks['name']}"); $emails = explode(self::MESSAGE_SEPARATOR, $tasks["data"]); @@ -128,6 +139,9 @@ class BulkEmailJob extends MediaJob * previously or not. If it was then it is skipped. * Otherwise new file is sent for sending emails and new text file * with taken prepended to the file name is generated. + * + * @param int $machine_id + * @param array $data */ public function getTasks($machine_id, $data = null) { @@ -165,6 +179,9 @@ class BulkEmailJob extends MediaJob /** * Handles request to unlock the mailing list file * and delete it. + * + * @param int $machine_id id of machine which is done sending emails + * @param array $data file name to unlock */ public function putTasks($machine_id, $data = null) { diff --git a/src/library/media_jobs/MediaJob.php b/src/library/media_jobs/MediaJob.php index afde9db85..7c67688c8 100644 --- a/src/library/media_jobs/MediaJob.php +++ b/src/library/media_jobs/MediaJob.php @@ -22,7 +22,8 @@ * * END LICENSE * - * @author Chris Pollett chris@pollett.org + * @author Chris Pollett chris@pollett.org (initial MediaJob class + * and subclasses based on work of Pooja Mishra for her master's) * @license http://www.gnu.org/licenses/ GPL3 * @link http://www.seekquarry.com/ * @copyright 2009 - 2015 @@ -37,47 +38,77 @@ use seekquarry\yioop\library\FetchUrl; use seekquarry\yioop\library\MediaConstants; /** - * + * Base class for jobs to be carried out by a MediaUpdater process + * Subclasses of this class correspond to specific jobs for MediaUpdater. + * Subclasses should implement methods they use among init(), + * checkPrerequisites(), nondistributedTasks(), prepareTasks(), finishTasks(), + * getTasks(), doTasks(), and putTask(). MediaUpdating can be configured to + * run in either distributed or nameserver only mode. In the former mode, + * prepareTasks(), finishTasks() run on the name server, getTasks() and + * putTask() run in the name server's web app, and doTasks() run on + * any MediaUpdater clients. In the latter mode, only the method + * nondistributedTasks() is called by the MediaUpdater and by only the updater + * on the name server. */ class MediaJob implements CrawlConstants, MediaConstants { /** - * + * If MediaJob was instantiated in the web app, the controller that + * instatiated it + * @var object */ - public $media_updater; + public $controller; /** - * + * If the MediaJob was instantiated in a MediaUpdater, this is a reference + * to that updater + * @var object */ - public $name_server_does_slave_tasks; + public $media_updater; /** + * Whether to run the job's client tasks on the name server in addition to + * prepareTasks and finishTasks * + * @var bool */ - public $name_server_does_slave_tasks_only; + public $name_server_does_client_tasks; /** - * + * Whether this MediaJob performs name server only tasks + * @var bool */ - public $tasks; + public $name_server_does_client_tasks_only; /** - * + * The most recently received from the name server tasks for this MediaJob + * @var array */ - public $controller; + public $tasks; /** + * Instiates the MediaJob with a reference to the object that instatiated it * + * @param object $media_updater a reference to the media updater that + * instatiated this object (if being run in MediaUpdater) + * @param object $controller a reference to the controller that + * instatiated this object (if being run in the web app) */ public function __construct($media_updater = null, $controller = null) { $this->media_updater = $media_updater; $this->controller = $controller; $this->tasks = []; - $this->name_server_does_slave_tasks = false; - $this->name_server_does_slave_tasks_only = false; + $this->name_server_does_client_tasks = false; + $this->name_server_does_client_tasks_only = false; $this->init(); } + /** + * Overridable methods in which a job can carry out any initialization + * needed before it is run + */ public function init() { } /** - * + * Method executed by MediaUpdater to perform the MediaJob. This method + * shouldn't need to be overriden. Instead, the various callbacks it calls + * (listed in the class description) wshould be overriden. */ public function run() { @@ -93,15 +124,15 @@ class MediaJob implements CrawlConstants, MediaConstants L\crawlLog("Running Job: $job_name"); L\crawlLog("Current Machine: $current_machine"); if ($this->media_updater->media_mode == 'distributed') { - $name_server_does_slave_tasks = false; - if ($is_name_server && ! $this->name_server_does_slave_tasks_only) { + $name_server_does_client_tasks = false; + if ($is_name_server && ! $this->name_server_does_client_tasks_only){ L\crawlLog("--Preparing job $job_name tasks on Name Server"); $this->prepareTasks(); L\crawlLog("--Finishing job $job_name tasks on Name Server"); $this->finishTasks(); } if (!$is_name_server || ($is_name_server && - $this->name_server_does_slave_tasks)) { + $this->name_server_does_client_tasks)) { L\crawlLog("--Checking for $job_name tasks to do"); $this->tasks = $this->execNameServer("getTasks"); if ($this->tasks) { @@ -129,54 +160,84 @@ class MediaJob implements CrawlConstants, MediaConstants L\crawlLog("Finished job: $job_name"); } /** + * Checks if the preconditions for the current job's task have been + * met. If yes, the run() method will then invoke methods to carry them + * out. * + * @return bool whether or not the prerequisites have been met for + * the job's tasks to be performed. */ public function checkPrerequisites() { return true; } /** - * + * Tasks done by this job when run in nondistributed mode */ public function nondistributedTasks() { } /** - * + * This method is called on the name server to prepare data for + * any MediaUpdater clients. */ public function prepareTasks() { } /** - * + * This method is called on the name server to finish processing any + * data returned by MediaUpdater clients. */ public function finishTasks() { } /** + * This method is run on MediaUpdater client with data gotten from the + * name server by getTasks. The idea is the client is supposed to then + * this information and if need be send the results back to the name server * + * @param array $tasks data that the MediaJob running on a client + * MediaUpdater needs to process + * @return mixed the result of carrying out that processing */ public function doTasks($tasks) { } /** + * Method called from JobController when a MediaUpdater client contacts + * the name server's web app. This method is supposed to marshal any + * data on the name server that the requesting client should process. * + * @param int $machine_id id of client requesting data + * @param array $data any additional info about data being requested + * @return array work for the client to process */ public function getTasks($machine_id, $data = null) { } /** + * After a MediaUpdater client is done with the task given to it by the + * name server's media updater, the client contact the name server's + * web app. The name servers web app's JobController then calls this + * method to receive the data on the name server * + * @param int $machine_id id of client that is sending data to name server + * @param mixed $data results of computation done by client + * @return array any response information to send back to the client */ public function putTasks($machine_id, $data) { } /** - * @param string $command the ParallelModel method to invoke on the remote - * Yioop instances - * @param string $arg additional arguments to be passed to the remote - * machine - * @return array a list of outputs from each machine that was called. + * Executes a method on the name server's JobController. + * It will typically execute either getTask or putTask for a specific + * Mediajob or getUpdateProperties to find out the current MediaUpdater + * should be configured. + * + * @param string $command the method to invoke on the name server + * @param string $arg additional arguments to be passed to the name + * server + * @return array data returned by the name server. */ public static function execNameServer($command, $args = null) { @@ -208,7 +269,10 @@ class MediaJob implements CrawlConstants, MediaConstants return $output; } /** + * Gets the class name (less namespace and the word Job ) + * of the current MediaJob * + * @return string name of the current job */ public static function getJobName() { @@ -216,7 +280,7 @@ class MediaJob implements CrawlConstants, MediaConstants if (substr($class_name, -3) == "Job") { return substr($class_name, strrpos($class_name, "\\") + 1, -3); } - return false; + return ""; } /** * Returns a hash of the url of the current machine based on the value diff --git a/src/library/media_jobs/NewsUpdateJob.php b/src/library/media_jobs/NewsUpdateJob.php index 15d6adadd..fb3adaadd 100644 --- a/src/library/media_jobs/NewsUpdateJob.php +++ b/src/library/media_jobs/NewsUpdateJob.php @@ -22,7 +22,8 @@ * * END LICENSE * - * @author Chris Pollett chris@pollett.org + * @author Chris Pollett chris@pollett.org (initial MediaJob class + * and subclasses based on work of Pooja Mishra for her master's) * @license http://www.gnu.org/licenses/ GPL3 * @link http://www.seekquarry.com/ * @copyright 2009 - 2015 @@ -40,12 +41,14 @@ use seekquarry\yioop\library\UrlParser; use seekquarry\yioop\Models; /** - * + * A media job to download and index feeds from various search sources (RSS, + * HTML scraper, etc). Idea is that this job runs once an hour to get the + * latest news from those sources. */ class NewsUpdateJob extends MediaJob { /** - * + * how long in seconds before a news item expires */ const ITEM_EXPIRES_TIME = C\ONE_WEEK; /** @@ -53,27 +56,35 @@ class NewsUpdateJob extends MediaJob */ const MAX_FEEDS_ONE_GO = 100; /** - * + * Time in current epoch when news last updated + * @var int */ public $update_time; /** - * + * Datasource object used to run db queries related to news items + * (for storing and updating them) + * @var object */ public $db; /** - * + * Initializes the last update time to far in the past so, news will get + * immediately updated. Sets up connect to DB to store news items, and + * makes it so the same media job runs both on name server and client + * Media Updaters */ public function init() { $this->update_time = 0; - $this->name_server_does_slave_tasks = true; - $this->name_server_does_slave_tasks_only = true; + $this->name_server_does_client_tasks = true; + $this->name_server_does_client_tasks_only = true; $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS). "Manager"; $this->db = new $db_class(); $this->db->connect(); } /** + * Only update if its been more than an hour since the last update * + * @return bool whether its been an hour since the last update */ public function checkPrerequisites() { @@ -88,7 +99,8 @@ class NewsUpdateJob extends MediaJob return false; } /** - * + * Get the media sources from the local database and use those to run the + * the same task as in the distributed setting */ public function nondistributedTasks() { @@ -122,14 +134,11 @@ class NewsUpdateJob extends MediaJob } /** * For each feed source downloads the feeds, checks which items are - * not in the database, adds them. This method does not update - * the inverted index shard. + * not in the database, adds them. Then calls the method to rebuild the + * inverted index shard for news * - * @param int $age how many seconds old records should be ignored - * @param string $media_mode might be one of name_server or distributed -- - * this controls whether the name server should be contacted to get - * a list of news sources (distributed case) or whether this is the - * non-distributed case and data should be gotten from local machine + * @param array $tasks array of news feed info (url to download, paths to + * extract etc) */ public function doTasks($tasks) { @@ -159,6 +168,11 @@ class NewsUpdateJob extends MediaJob * Handles the request to get the array of news feed sources which hash to * a particular value i.e. match with the index of requesting machine's * hashed url/name from array of available machines hash + * + * @param int $machine_id id of machine making request for news feeds + * @param array $data not used but inherited from the base MediaJob + * class as a parameter (so will alwasys be null in this case) + * @return array of feed urls and paths to extract from them */ public function getTasks($machine_id, $data = null) { diff --git a/src/library/media_jobs/VideoConvertJob.php b/src/library/media_jobs/VideoConvertJob.php index 8bd241c8a..6634d73b4 100644 --- a/src/library/media_jobs/VideoConvertJob.php +++ b/src/library/media_jobs/VideoConvertJob.php @@ -22,7 +22,8 @@ * * END LICENSE * - * @author Chris Pollett chris@pollett.org + * @author Chris Pollett chris@pollett.org (initial MediaJob class + * and subclasses based on work of Pooja Mishra for her master's) * @license http://www.gnu.org/licenses/ GPL3 * @link http://www.seekquarry.com/ * @copyright 2009 - 2015 @@ -35,7 +36,8 @@ use seekquarry\yioop\library as L; use seekquarry\yioop\library\UrlParser; /** - * + * Media Job used to convert videos uploaded to the wiki or group feeds to + * a common format (mp4) */ class VideoConvertJob extends MediaJob { @@ -45,35 +47,39 @@ class VideoConvertJob extends MediaJob */ public $video_convert_types = ["mov", "avi"]; /** - * + * Datasource used to do directory level file manipulations (delete or + * traverse) + * @var object */ public $db; /** - * + * Sets up the datasource used for the video convert directories */ public function init() { - $this->update_time = 0; $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS). "Manager"; $this->db = new $db_class(); $this->db->connect(); } /** - * + * Only run the VideoConvertJob if in distributed mode */ public function checkPrerequisites() { return $this->media_updater->media_mode == 'distributed'; } /** - * + * Check for videos to convert. If found split to a common size to + * send to client media updaters. (Run on name server) */ public function prepareTasks() { $this->splitVideos(); } /** - * + * Checks if video convert task is complete for a video. If so, moves + * movie segments to a converted folder, assembles the segments into + * a single video file, and moves the result to the desired place. */ public function finishTasks() { @@ -84,7 +90,7 @@ class VideoConvertJob extends MediaJob /** * Checks name server for a video segment to convert. If there are * converts the mov or avi segment file to an mp4 file - * This function would only be called by slave media updaters. + * This function would only be called by client media updaters. */ public function doTasks($tasks) { @@ -135,7 +141,6 @@ class VideoConvertJob extends MediaJob return false; } } - /** * Generates a thumbnail from a video file assuming FFMPEG * @@ -247,7 +252,7 @@ class VideoConvertJob extends MediaJob } } /** - * Function to look through all the converted.video directories present in + * Function to look through all the converted video directories present in * media and generate the assemble video files needed for concatenating the * converted splitfiles. */ diff --git a/src/models/GroupModel.php b/src/models/GroupModel.php index 6a54499ed..88e58f89a 100644 --- a/src/models/GroupModel.php +++ b/src/models/GroupModel.php @@ -1278,22 +1278,27 @@ class GroupModel extends Model implements MediaConstants $mime_type = $mime_type_parts[0]; $resource_url = $this->getGroupPageResourceUrl($group_id, $current_page_id, $resource_name); + $video_type_extensions = ['video/mp4' => "mp4", + 'video/ogg' => "ogv", + 'video/avi' => 'avi', 'video/quicktime' => 'mov', + 'video/x-flv' => 'flv', + 'video/x-ms-wmv' => 'wmv', 'video/webm' => 'webm', + 'application/ogg' => 'ogv']; if (in_array($mime_type, ['image/png', 'image/gif', 'image/jpeg', 'image/bmp', 'image/svg+xml'])) { $replace_string = "<img src='$resource_url' ". " alt='$resource_description' />"; $parsed_page = preg_replace('/'.preg_quote($match_string).'/u', $replace_string, $parsed_page); - } elseif (in_array($mime_type, ['video/mp4', 'video/ogg', - 'video/avi', 'video/quicktime', 'video/x-flv', - 'video/x-ms-wmv', 'video/webm', 'application/ogg'])) { + } elseif (in_array($mime_type, + array_keys($video_type_extensions))) { $replace_string = "<video style='width:100%' ". "controls='controls' >\n". "<source src='$resource_url' type='$mime_type'/>\n"; $multi_source_types = ["mp4", "webm", "ogg"]; - $current_extension = substr($mime_type, strlen('video/')); + $current_extension = $video_type_extensions[$mime_type]; $add_sources = []; - if (in_array($current_extension, $multi_source_types)) { + if (!in_array($current_extension, $multi_source_types)) { $add_sources = array_diff($multi_source_types, [$current_extension]); }