Adds documentation for new MediaJob classes, a=chris

Chris Pollett [2015-10-04 16:Oct:th]
Adds documentation for new MediaJob classes, a=chris
Filename
src/configs/Config.php
src/controllers/JobsController.php
src/library/media_jobs/BulkEmailJob.php
src/library/media_jobs/MediaJob.php
src/library/media_jobs/NewsUpdateJob.php
src/library/media_jobs/VideoConvertJob.php
src/models/GroupModel.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index 49c8c3441..169718eeb 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -616,8 +616,8 @@ if (!nsdefined("MAX_VIDEO_CONVERT_SIZE")) {
 }
 /**
  * The maximum time limit in seconds where if a file is not converted by the
- * time it will be picked up again by the slave media updater
- * This value largely depends on the no of slave media updaters that we have
+ * time it will be picked up again by the client media updater
+ * This value largely depends on the no of client media updaters that we have
  * and also the maximum video size that would be uploaded to yioop.
  * This value should be kept more than the sleeping time of media updater
  * loop to avoid conversion of same file multiple times.
diff --git a/src/controllers/JobsController.php b/src/controllers/JobsController.php
index 3ab323b22..36b08b8db 100644
--- a/src/controllers/JobsController.php
+++ b/src/controllers/JobsController.php
@@ -37,7 +37,15 @@ use seekquarry\yioop\library\MediaConstants;
 use seekquarry\yioop\library\UrlParser;

 /**
- *
+ * This class is used to handle requests from a MediaUpdater to a name server
+ * There are three main types of requests: getUpdateProperties, and
+ * for any job that the MediaUpdater might be running, its getTasks, and
+ * putTasks request. getUpdateProperties is supposed to provide configuration
+ * settings for the MediaUpdater. A MediaUpdater might be running several
+ * periodic jobs. The getTasks requests of a job is used to see if there
+ * is any new work available of that job type on the name server. A
+ * putTasks request is used to handle any computed data sent back from a
+ * MediaUpdater to the name server.
  *
  * @author Chris Pollett
  */
@@ -51,7 +59,7 @@ class JobsController extends Controller implements CrawlConstants,
     public $activities = ["getUpdateProperties"];
     /**
      * Checks that the request seems to be coming from a legitimate
-     * media_updater then determines which job's activity is being
+     * MediaUpdater then determines which job's activity is being
      * requested and calls that activity for processing.
      *
      */
diff --git a/src/library/media_jobs/BulkEmailJob.php b/src/library/media_jobs/BulkEmailJob.php
index 614b73d97..b53fc3702 100644
--- a/src/library/media_jobs/BulkEmailJob.php
+++ b/src/library/media_jobs/BulkEmailJob.php
@@ -22,7 +22,8 @@
  *
  * END LICENSE
  *
- * @author Chris Pollett chris@pollett.org
+ * @author Chris Pollett chris@pollett.org (initial MediaJob class
+ *      and subclasses based on work of Pooja Mishra for her master's)
  * @license http://www.gnu.org/licenses/ GPL3
  * @link http://www.seekquarry.com/
  * @copyright 2009 - 2015
@@ -35,7 +36,9 @@ use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\MailServer;

 /**
- *
+ * MediaJob class for sending out emails from a Yioop instance (either in
+ * response to account registrations or in response to group posts and similar
+ * activities)
  */
 class BulkEmailJob extends MediaJob
 {
@@ -45,7 +48,7 @@ class BulkEmailJob extends MediaJob
      */
     public $mail_server;
     /**
-     *
+     * Set up the MailServer object used to actually send mail
      */
     public function init()
     {
@@ -54,7 +57,10 @@ class BulkEmailJob extends MediaJob
             C\MAIL_SECURITY);
     }
     /**
+     * Bulk mail runs if the media updater is in distributed mode or if
+     * Yioop configured to send mail from media updater
      *
+     * @return true if bulk mail task should be run.
      */
     public function checkPrerequisites()
     {
@@ -102,13 +108,18 @@ class BulkEmailJob extends MediaJob
         }
     }
     /**
+     * Emails a list of emails provided by the name server to the media updater
+     * client
      *
+     * @param array $tasks contains emails which should be sent out
+     * @return mixed data to send back to name server (in this case the name
+     *      of the email file that was completely sent)
      */
     public function doTasks($tasks)
     {
         if (!isset($tasks["name"]) || !isset($tasks["data"])) {
             L\crawlLog("...Email Task received incomplete !");
-            return;
+            return null;
         }
         L\crawlLog("----Email file name: {$tasks['name']}");
         $emails = explode(self::MESSAGE_SEPARATOR, $tasks["data"]);
@@ -128,6 +139,9 @@ class BulkEmailJob extends MediaJob
      * previously or not. If it was then it is skipped.
      * Otherwise new file is sent for sending emails and new text file
      * with taken prepended to the file name is generated.
+     *
+     * @param int $machine_id
+     * @param array $data
      */
     public function getTasks($machine_id, $data = null)
     {
@@ -165,6 +179,9 @@ class BulkEmailJob extends MediaJob
     /**
      * Handles request to unlock the mailing list file
      * and delete it.
+     *
+     * @param int $machine_id id of machine which is done sending emails
+     * @param array $data file name to unlock
      */
     public function putTasks($machine_id, $data = null)
     {
diff --git a/src/library/media_jobs/MediaJob.php b/src/library/media_jobs/MediaJob.php
index afde9db85..7c67688c8 100644
--- a/src/library/media_jobs/MediaJob.php
+++ b/src/library/media_jobs/MediaJob.php
@@ -22,7 +22,8 @@
  *
  * END LICENSE
  *
- * @author Chris Pollett chris@pollett.org
+ * @author Chris Pollett chris@pollett.org (initial MediaJob class
+ *      and subclasses based on work of Pooja Mishra for her master's)
  * @license http://www.gnu.org/licenses/ GPL3
  * @link http://www.seekquarry.com/
  * @copyright 2009 - 2015
@@ -37,47 +38,77 @@ use seekquarry\yioop\library\FetchUrl;
 use seekquarry\yioop\library\MediaConstants;

 /**
- *
+ * Base class for jobs to be carried out by a MediaUpdater process
+ * Subclasses of this class correspond to specific jobs for MediaUpdater.
+ * Subclasses should implement methods they use among init(),
+ * checkPrerequisites(), nondistributedTasks(), prepareTasks(), finishTasks(),
+ * getTasks(), doTasks(), and putTask(). MediaUpdating can be configured to
+ * run in either distributed or nameserver only mode. In the former mode,
+ * prepareTasks(), finishTasks() run on the name server, getTasks() and
+ * putTask() run in the name server's web app, and doTasks() run on
+ * any MediaUpdater clients. In the latter mode, only the method
+ * nondistributedTasks() is called by the MediaUpdater and by only the updater
+ * on the name server.
  */
 class MediaJob implements CrawlConstants, MediaConstants
 {
     /**
-     *
+     * If MediaJob was instantiated in the web app, the controller that
+     * instatiated it
+     * @var object
      */
-    public $media_updater;
+    public $controller;
     /**
-     *
+     * If the MediaJob was instantiated in a MediaUpdater, this is a reference
+     * to that updater
+     * @var object
      */
-    public $name_server_does_slave_tasks;
+    public $media_updater;
     /**
+     * Whether to run the job's client tasks on the name server in addition to
+     * prepareTasks and finishTasks
      *
+     * @var bool
      */
-    public $name_server_does_slave_tasks_only;
+    public $name_server_does_client_tasks;
     /**
-     *
+     * Whether this MediaJob performs name server only tasks
+     * @var bool
      */
-    public $tasks;
+    public $name_server_does_client_tasks_only;
     /**
-     *
+     * The most recently received from the name server tasks for this MediaJob
+     * @var array
      */
-    public $controller;
+    public $tasks;
     /**
+     * Instiates the MediaJob with a reference to the object that instatiated it
      *
+     * @param object $media_updater a reference to the media updater that
+     *      instatiated this object (if being run in MediaUpdater)
+     * @param object $controller  a reference to the controller that
+     *      instatiated this object (if being run in the web app)
      */
     public function __construct($media_updater = null, $controller = null)
     {
         $this->media_updater = $media_updater;
         $this->controller = $controller;
         $this->tasks = [];
-        $this->name_server_does_slave_tasks = false;
-        $this->name_server_does_slave_tasks_only = false;
+        $this->name_server_does_client_tasks = false;
+        $this->name_server_does_client_tasks_only = false;
         $this->init();
     }
+    /**
+     * Overridable methods in which a job can carry out any initialization
+     * needed before it is run
+     */
     public function init()
     {
     }
     /**
-     *
+     * Method executed by MediaUpdater to perform the MediaJob. This method
+     * shouldn't need to be overriden. Instead, the various callbacks it calls
+     * (listed in the class description) wshould be overriden.
      */
     public function run()
     {
@@ -93,15 +124,15 @@ class MediaJob implements CrawlConstants, MediaConstants
         L\crawlLog("Running Job: $job_name");
         L\crawlLog("Current Machine: $current_machine");
         if ($this->media_updater->media_mode == 'distributed') {
-            $name_server_does_slave_tasks = false;
-            if ($is_name_server && ! $this->name_server_does_slave_tasks_only) {
+            $name_server_does_client_tasks = false;
+            if ($is_name_server && ! $this->name_server_does_client_tasks_only){
                 L\crawlLog("--Preparing job $job_name tasks on Name Server");
                 $this->prepareTasks();
                 L\crawlLog("--Finishing job $job_name tasks on Name Server");
                 $this->finishTasks();
             }
             if (!$is_name_server || ($is_name_server &&
-                $this->name_server_does_slave_tasks)) {
+                $this->name_server_does_client_tasks)) {
                 L\crawlLog("--Checking for $job_name tasks to do");
                 $this->tasks = $this->execNameServer("getTasks");
                 if ($this->tasks) {
@@ -129,54 +160,84 @@ class MediaJob implements CrawlConstants, MediaConstants
         L\crawlLog("Finished job: $job_name");
     }
     /**
+     * Checks if the preconditions for the current job's task have been
+     * met. If yes, the run() method will then invoke methods to carry them
+     * out.
      *
+     * @return bool whether or not the prerequisites have been met for
+     *      the job's tasks to be performed.
      */
     public function checkPrerequisites()
     {
         return true;
     }
     /**
-     *
+     * Tasks done by this job when run in nondistributed mode
      */
     public function nondistributedTasks()
     {
     }
     /**
-     *
+     * This method is called on the name server to prepare data for
+     * any MediaUpdater clients.
      */
     public function prepareTasks()
     {
     }
     /**
-     *
+     * This method is called on the name server to finish processing any
+     * data returned by MediaUpdater clients.
      */
     public function finishTasks()
     {
     }
     /**
+     * This method is run on MediaUpdater client with data gotten from the
+     * name server by getTasks. The idea is the client is supposed to then
+     * this information and if need be send the results back to the name server
      *
+     * @param array $tasks data that the MediaJob running on a client
+     *      MediaUpdater needs to process
+     * @return mixed the result of carrying out that processing
      */
     public function doTasks($tasks)
     {
     }
     /**
+     * Method called from JobController when a MediaUpdater client contacts
+     * the name server's web app. This method is supposed to marshal any
+     * data on the name server that the requesting client should process.
      *
+     * @param int $machine_id id of client requesting data
+     * @param array $data any additional info about data being requested
+     * @return array work for the client to process
      */
     public function getTasks($machine_id, $data = null)
     {
     }
     /**
+     * After a MediaUpdater client is done with the task given to it by the
+     * name server's media updater, the client contact the name server's
+     * web app. The name servers web app's JobController then calls this
+     * method to receive the data on the name server
      *
+     * @param int $machine_id id of client that is sending data to name server
+     * @param mixed $data results of computation done by client
+     * @return array any response information to send back to the client
      */
     public function putTasks($machine_id, $data)
     {
     }
     /**
-     * @param string $command the ParallelModel method to invoke on the remote
-     *     Yioop instances
-     * @param string $arg additional arguments to be passed to the remote
-     *      machine
-     * @return array a list of outputs from each machine that was called.
+     * Executes a method on the name server's JobController.
+     * It will typically execute either getTask or putTask for a specific
+     * Mediajob or getUpdateProperties to find out the current MediaUpdater
+     * should be configured.
+     *
+     * @param string $command the method to invoke on the name server
+     * @param string $arg additional arguments to be passed to the name
+     *      server
+     * @return array data returned by the name server.
      */
     public static function execNameServer($command, $args = null)
     {
@@ -208,7 +269,10 @@ class MediaJob implements CrawlConstants, MediaConstants
         return $output;
     }
     /**
+     * Gets the class name (less namespace and the word Job )
+     * of the current MediaJob
      *
+     * @return string name of the current job
      */
     public static function getJobName()
     {
@@ -216,7 +280,7 @@ class MediaJob implements CrawlConstants, MediaConstants
         if (substr($class_name, -3) == "Job") {
             return substr($class_name, strrpos($class_name, "\\") + 1, -3);
         }
-        return false;
+        return "";
     }
     /**
      * Returns a hash of the url of the current machine based on the value
diff --git a/src/library/media_jobs/NewsUpdateJob.php b/src/library/media_jobs/NewsUpdateJob.php
index 15d6adadd..fb3adaadd 100644
--- a/src/library/media_jobs/NewsUpdateJob.php
+++ b/src/library/media_jobs/NewsUpdateJob.php
@@ -22,7 +22,8 @@
  *
  * END LICENSE
  *
- * @author Chris Pollett chris@pollett.org
+ * @author Chris Pollett chris@pollett.org (initial MediaJob class
+ *      and subclasses based on work of Pooja Mishra for her master's)
  * @license http://www.gnu.org/licenses/ GPL3
  * @link http://www.seekquarry.com/
  * @copyright 2009 - 2015
@@ -40,12 +41,14 @@ use seekquarry\yioop\library\UrlParser;
 use seekquarry\yioop\Models;

 /**
- *
+ * A media job to download and index feeds from various search sources (RSS,
+ * HTML scraper, etc). Idea is that this job runs once an hour to get the
+ * latest news from those sources.
  */
 class NewsUpdateJob extends MediaJob
 {
     /**
-     *
+     * how long in seconds before a news item expires
      */
     const ITEM_EXPIRES_TIME = C\ONE_WEEK;
     /**
@@ -53,27 +56,35 @@ class NewsUpdateJob extends MediaJob
      */
     const MAX_FEEDS_ONE_GO = 100;
     /**
-     *
+     * Time in current epoch when news last updated
+     * @var int
      */
     public $update_time;
     /**
-     *
+     * Datasource object used to run db queries related to news items
+     * (for storing and updating them)
+     * @var object
      */
     public $db;
     /**
-     *
+     * Initializes the last update time to far in the past so, news will get
+     * immediately updated. Sets up connect to DB to store news items, and
+     * makes it so the same media job runs both on name server and client
+     * Media Updaters
      */
     public function init()
     {
         $this->update_time = 0;
-        $this->name_server_does_slave_tasks = true;
-        $this->name_server_does_slave_tasks_only = true;
+        $this->name_server_does_client_tasks = true;
+        $this->name_server_does_client_tasks_only = true;
         $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS). "Manager";
         $this->db = new $db_class();
         $this->db->connect();
     }
     /**
+     * Only update if its been more than an hour since the last update
      *
+     * @return bool whether its been an hour since the last update
      */
     public function checkPrerequisites()
     {
@@ -88,7 +99,8 @@ class NewsUpdateJob extends MediaJob
         return false;
     }
     /**
-     *
+     * Get the media sources from the local database and use those to run the
+     * the same task as in the distributed setting
      */
     public function nondistributedTasks()
     {
@@ -122,14 +134,11 @@ class NewsUpdateJob extends MediaJob
     }
     /**
      * For each feed source downloads the feeds, checks which items are
-     * not in the database, adds them. This method does not update
-     * the inverted index shard.
+     * not in the database, adds them. Then calls the method to rebuild the
+     * inverted index shard for news
      *
-     * @param int $age how many seconds old records should be ignored
-     * @param string $media_mode might be one of name_server or distributed --
-     *      this controls whether the name server should be contacted to get
-     *      a list of news sources (distributed case) or whether this is the
-     *      non-distributed case and data should be gotten from local machine
+     * @param array $tasks array of news feed info (url to download, paths to
+     *  extract etc)
      */
     public function doTasks($tasks)
     {
@@ -159,6 +168,11 @@ class NewsUpdateJob extends MediaJob
      * Handles the request to get the  array of news feed sources which hash to
      * a particular value i.e. match with the index of requesting machine's
      * hashed url/name from array of available machines hash
+     *
+     * @param int $machine_id id of machine making request for news feeds
+     * @param array $data not used but inherited from the base MediaJob
+     *      class as a parameter (so will alwasys be null in this case)
+     * @return array of feed urls and paths to extract from them
      */
     public function getTasks($machine_id, $data = null)
     {
diff --git a/src/library/media_jobs/VideoConvertJob.php b/src/library/media_jobs/VideoConvertJob.php
index 8bd241c8a..6634d73b4 100644
--- a/src/library/media_jobs/VideoConvertJob.php
+++ b/src/library/media_jobs/VideoConvertJob.php
@@ -22,7 +22,8 @@
  *
  * END LICENSE
  *
- * @author Chris Pollett chris@pollett.org
+ * @author Chris Pollett chris@pollett.org (initial MediaJob class
+ *      and subclasses based on work of Pooja Mishra for her master's)
  * @license http://www.gnu.org/licenses/ GPL3
  * @link http://www.seekquarry.com/
  * @copyright 2009 - 2015
@@ -35,7 +36,8 @@ use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\UrlParser;

 /**
- *
+ * Media Job used to convert videos uploaded to the wiki or group feeds to
+ * a common format (mp4)
  */
 class VideoConvertJob extends MediaJob
 {
@@ -45,35 +47,39 @@ class VideoConvertJob extends MediaJob
      */
     public $video_convert_types = ["mov", "avi"];
     /**
-     *
+     * Datasource used to do directory level file manipulations (delete or
+     * traverse)
+     * @var object
      */
     public $db;
     /**
-     *
+     * Sets up the datasource used for the video convert directories
      */
     public function init()
     {
-        $this->update_time = 0;
         $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS). "Manager";
         $this->db = new $db_class();
         $this->db->connect();
     }
     /**
-     *
+     * Only run the VideoConvertJob if in distributed mode
      */
     public function checkPrerequisites()
     {
         return $this->media_updater->media_mode == 'distributed';
     }
     /**
-     *
+     * Check for videos to convert. If found split to a common size to
+     * send to client media updaters. (Run on name server)
      */
     public function prepareTasks()
     {
         $this->splitVideos();
     }
     /**
-     *
+     * Checks if video convert task is complete for a video. If so, moves
+     * movie segments to a converted folder, assembles the segments into
+     * a single video file, and moves the result to the desired place.
      */
     public function finishTasks()
     {
@@ -84,7 +90,7 @@ class VideoConvertJob extends MediaJob
     /**
      * Checks name server for a video segment to convert. If there are
      * converts the mov or avi segment file to an mp4 file
-     * This function would only be called by slave media updaters.
+     * This function would only be called by client media updaters.
      */
     public function doTasks($tasks)
     {
@@ -135,7 +141,6 @@ class VideoConvertJob extends MediaJob
             return false;
         }
     }
-
     /**
      * Generates a thumbnail from a video file assuming FFMPEG
      *
@@ -247,7 +252,7 @@ class VideoConvertJob extends MediaJob
         }
     }
     /**
-     * Function to look through all the converted.video directories present in
+     * Function to look through all the converted video directories present in
      * media and generate the assemble video files needed for concatenating the
      * converted splitfiles.
      */
diff --git a/src/models/GroupModel.php b/src/models/GroupModel.php
index 6a54499ed..88e58f89a 100644
--- a/src/models/GroupModel.php
+++ b/src/models/GroupModel.php
@@ -1278,22 +1278,27 @@ class GroupModel extends Model implements MediaConstants
             $mime_type = $mime_type_parts[0];
             $resource_url = $this->getGroupPageResourceUrl($group_id,
                 $current_page_id, $resource_name);
+            $video_type_extensions = ['video/mp4' => "mp4",
+                'video/ogg' => "ogv",
+                'video/avi' => 'avi', 'video/quicktime' => 'mov',
+                'video/x-flv' => 'flv',
+                'video/x-ms-wmv' => 'wmv', 'video/webm' => 'webm',
+                'application/ogg' => 'ogv'];
             if (in_array($mime_type, ['image/png', 'image/gif',
                 'image/jpeg', 'image/bmp', 'image/svg+xml'])) {
                 $replace_string = "<img src='$resource_url' ".
                     " alt='$resource_description' />";
                 $parsed_page = preg_replace('/'.preg_quote($match_string).'/u',
                     $replace_string, $parsed_page);
-            } elseif (in_array($mime_type, ['video/mp4', 'video/ogg',
-                'video/avi', 'video/quicktime', 'video/x-flv',
-                'video/x-ms-wmv', 'video/webm', 'application/ogg'])) {
+            } elseif (in_array($mime_type,
+                array_keys($video_type_extensions))) {
                 $replace_string = "<video style='width:100%' ".
                     "controls='controls' >\n".
                     "<source src='$resource_url' type='$mime_type'/>\n";
                 $multi_source_types = ["mp4", "webm", "ogg"];
-                $current_extension = substr($mime_type, strlen('video/'));
+                $current_extension = $video_type_extensions[$mime_type];
                 $add_sources = [];
-                if (in_array($current_extension, $multi_source_types)) {
+                if (!in_array($current_extension, $multi_source_types)) {
                     $add_sources = array_diff($multi_source_types,
                         [$current_extension]);
                 }
ViewGit