Last commit for src/library/media_jobs/MediaJob.php: 2addb500315b7393a90fe66431d7832b1e7386c7

Adjust copyrights years

Chris Pollett [2024-01-03 21:Jan:rd]
Adjust copyrights years
<?php
/**
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 * Copyright (C) 2009 - 2015  Chris Pollett chris@pollett.org
 *
 * LICENSE:
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * END LICENSE
 *
 * @author Chris Pollett chris@pollett.org
 * @license http://www.gnu.org/licenses/ GPL3
 * @link http://www.seekquarry.com/
 * @copyright 2009 - 2015
 * @filesource
 */
namespace seekquarry\yioop\library\media_jobs;

use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\library\CrawlConstants;
use seekquarry\yioop\library\FetchUrl;
use seekquarry\yioop\library\MediaConstants;

/**
 *
 */
class MediaJob implements CrawlConstants, MediaConstants
{
    /**
     *
     */
    public $media_updater;
    /**
     *
     */
    public $name_server_does_slave_tasks;
    /**
     *
     */
    public $name_server_does_slave_tasks_only;
    /**
     *
     */
    public $tasks;
    /**
     *
     */
    public $controller;
    /**
     *
     */
    public function __construct($media_updater = null, $controller = null)
    {
        $this->media_updater = $media_updater;
        $this->controller = $controller;
        $this->tasks = [];
        $this->name_server_does_slave_tasks = false;
        $this->name_server_does_slave_tasks_only = false;
        $this->init();
    }
    public function init()
    {
    }
    /**
     *
     */
    public function run()
    {
        if (!$this->checkPrerequisites()) {
            return;
        }
        $current_machine = $this->getCurrentmachine();
        $is_name_server = ($current_machine == L\crawlHash(C\NAME_SERVER));
        if ($is_name_server) {
            $current_machine = "NAME SERVER";
        }
        $job_name = $this->getJobName();
        L\crawlLog("Running Job: $job_name");
        L\crawlLog("Current Machine: $current_machine");
        if ($this->media_updater->media_mode == 'distributed') {
            $name_server_does_slave_tasks = false;
            if ($is_name_server && ! $this->name_server_does_slave_tasks_only) {
                L\crawlLog("--Preparing job $job_name tasks on Name Server");
                $this->prepareTasks();
                L\crawlLog("--Finishing job $job_name tasks on Name Server");
                $this->finishTasks();
            }
            if (!$is_name_server || ($is_name_server &&
                $this->name_server_does_slave_tasks)) {
                L\crawlLog("--Checking for $job_name tasks to do");
                $response = $this->execNameServer("getTasks");
                $tasks = false;
                if (isset($response[0][CrawlConstants::PAGE])) {
                    $this->tasks = unserialize(L\webdecode(
                        $response[0][CrawlConstants::PAGE]));
                }
                if ($this->tasks) {
                    L\crawlLog("--Executing tasks for job $job_name");
                    $results = $this->doTasks($this->tasks);
                    if ($results) {
                        L\crawlLog("--Sending task results for job $job_name".
                            " to name server");
                        $response =
                            $this->execNameServer("putTasks",
                            $results);
                        if (isset($response[0][CrawlConstants::PAGE])) {
                            $response = unserialize(L\webdecode(
                                $response[0][CrawlConstants::PAGE]));
                            if (is_array($response)) {
                                $response = print_r($response, true);
                            }
                        }
                        L\crawlLog("--Name server response was:\n" . $response);
                    }
                } else {
                    L\crawlLog("--No tasks found for job $job_name");
                }
            }
        } else {
            if ($is_name_server) {
                L\crawlLog("Executing job: $job_name in nondistributed mode.");
                $this->nondistributedTasks();
            }
        }
        L\crawlLog("Finished job: $job_name");
    }
    /**
     *
     */
    public function checkPrerequisites()
    {
        return true;
    }
    /**
     *
     */
    public function nondistributedTasks()
    {
    }
    /**
     *
     */
    public function prepareTasks()
    {
    }
    /**
     *
     */
    public function finishTasks()
    {
    }
    /**
     *
     */
    public function doTasks($tasks)
    {
    }
    /**
     *
     */
    public function getTasks($machine_id, $data = null)
    {
    }
    /**
     *
     */
    public function putTasks($machine_id, $data)
    {
    }
    /**
     * @param string $command the ParallelModel method to invoke on the remote
     *     Yioop instances
     * @param string $arg additional arguments to be passed to the remote
     *      machine
     * @return array a list of outputs from each machine that was called.
     */
    public static function execNameServer($command, $arg = null)
    {
        $time = time();
        $session = md5($time . C\AUTH_KEY);
        $query = "c=jobs&a=$command&time=$time&session=$session";
        if ($arg != null) {
            $arg = L\webencode(serialize($arg));
            $query .= "&arg=$arg";
        }
        $job_name = self::getJobName();
        if($job_name) {
            $query .= "&job=$job_name";
        }
        $sites = [];
        $post_data = [];
        $current_machine = self::getCurrentmachine();
        $sites[0][CrawlConstants::URL] = C\NAME_SERVER;
        $post_data[0] = $query."&machine_id=" . L\webencode($current_machine);
        L\crawlLog("Contacting Name server...".
            "The url, '?', and first 256 bytes of posted query are:\n" .
            C\NAME_SERVER . "?" . substr($post_data[0], 0, 256));
        $outputs = FetchUrl::getPages($sites, false, 0, null, self::URL,
            self::PAGE, true, $post_data);
        return $outputs;
    }
    /**
     *
     */
    public static function getJobName()
    {
        $class_name = get_called_class();
        if (substr($class_name, -3) == "Job") {
            return substr($class_name, strrpos($class_name, "\\") + 1, -3);
        }
        return false;
    }
    /**
     * Returns a hash of the url of the current machine based on the value
     * saved to current_machine_info.txt by a machine statuses request
     *
     * @return string hash of current machine url
     */
     public static function getCurrentMachine()
     {
         $current_machine_path = C\WORK_DIRECTORY .
            "/schedules/current_machine_info.txt";
         if (file_exists($current_machine_path)) {
             $current_machine = file_get_contents($current_machine_path);
         } else {
             $current_machine = L\crawlHash(C\NAME_SERVER);
         }
         return $current_machine;
     }
}
ViewGit