Adjust copyrights years
<?php
/**
* SeekQuarry/Yioop --
* Open Source Pure PHP Search Engine, Crawler, and Indexer
*
* Copyright (C) 2009 - 2015 Chris Pollett chris@pollett.org
*
* LICENSE:
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* END LICENSE
*
* @author Chris Pollett chris@pollett.org
* @license http://www.gnu.org/licenses/ GPL3
* @link http://www.seekquarry.com/
* @copyright 2009 - 2015
* @filesource
*/
namespace seekquarry\yioop\library\media_jobs;
use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\library\CrawlConstants;
use seekquarry\yioop\library\FetchUrl;
use seekquarry\yioop\library\MediaConstants;
/**
*
*/
class MediaJob implements CrawlConstants, MediaConstants
{
/**
*
*/
public $media_updater;
/**
*
*/
public $name_server_does_slave_tasks;
/**
*
*/
public $name_server_does_slave_tasks_only;
/**
*
*/
public $tasks;
/**
*
*/
public $controller;
/**
*
*/
public function __construct($media_updater = null, $controller = null)
{
$this->media_updater = $media_updater;
$this->controller = $controller;
$this->tasks = [];
$this->name_server_does_slave_tasks = false;
$this->name_server_does_slave_tasks_only = false;
$this->init();
}
public function init()
{
}
/**
*
*/
public function run()
{
if (!$this->checkPrerequisites()) {
return;
}
$current_machine = $this->getCurrentmachine();
$is_name_server = ($current_machine == L\crawlHash(C\NAME_SERVER));
if ($is_name_server) {
$current_machine = "NAME SERVER";
}
$job_name = $this->getJobName();
L\crawlLog("Running Job: $job_name");
L\crawlLog("Current Machine: $current_machine");
if ($this->media_updater->media_mode == 'distributed') {
$name_server_does_slave_tasks = false;
if ($is_name_server && ! $this->name_server_does_slave_tasks_only) {
L\crawlLog("--Preparing job $job_name tasks on Name Server");
$this->prepareTasks();
L\crawlLog("--Finishing job $job_name tasks on Name Server");
$this->finishTasks();
}
if (!$is_name_server || ($is_name_server &&
$this->name_server_does_slave_tasks)) {
L\crawlLog("--Checking for $job_name tasks to do");
$response = $this->execNameServer("getTasks");
$tasks = false;
if (isset($response[0][CrawlConstants::PAGE])) {
$this->tasks = unserialize(L\webdecode(
$response[0][CrawlConstants::PAGE]));
}
if ($this->tasks) {
L\crawlLog("--Executing tasks for job $job_name");
$results = $this->doTasks($this->tasks);
if ($results) {
L\crawlLog("--Sending task results for job $job_name".
" to name server");
$response =
$this->execNameServer("putTasks",
$results);
if (isset($response[0][CrawlConstants::PAGE])) {
$response = unserialize(L\webdecode(
$response[0][CrawlConstants::PAGE]));
if (is_array($response)) {
$response = print_r($response, true);
}
}
L\crawlLog("--Name server response was:\n" . $response);
}
} else {
L\crawlLog("--No tasks found for job $job_name");
}
}
} else {
if ($is_name_server) {
L\crawlLog("Executing job: $job_name in nondistributed mode.");
$this->nondistributedTasks();
}
}
L\crawlLog("Finished job: $job_name");
}
/**
*
*/
public function checkPrerequisites()
{
return true;
}
/**
*
*/
public function nondistributedTasks()
{
}
/**
*
*/
public function prepareTasks()
{
}
/**
*
*/
public function finishTasks()
{
}
/**
*
*/
public function doTasks($tasks)
{
}
/**
*
*/
public function getTasks($machine_id, $data = null)
{
}
/**
*
*/
public function putTasks($machine_id, $data)
{
}
/**
* @param string $command the ParallelModel method to invoke on the remote
* Yioop instances
* @param string $arg additional arguments to be passed to the remote
* machine
* @return array a list of outputs from each machine that was called.
*/
public static function execNameServer($command, $arg = null)
{
$time = time();
$session = md5($time . C\AUTH_KEY);
$query = "c=jobs&a=$command&time=$time&session=$session";
if ($arg != null) {
$arg = L\webencode(serialize($arg));
$query .= "&arg=$arg";
}
$job_name = self::getJobName();
if($job_name) {
$query .= "&job=$job_name";
}
$sites = [];
$post_data = [];
$current_machine = self::getCurrentmachine();
$sites[0][CrawlConstants::URL] = C\NAME_SERVER;
$post_data[0] = $query."&machine_id=" . L\webencode($current_machine);
L\crawlLog("Contacting Name server...".
"The url, '?', and first 256 bytes of posted query are:\n" .
C\NAME_SERVER . "?" . substr($post_data[0], 0, 256));
$outputs = FetchUrl::getPages($sites, false, 0, null, self::URL,
self::PAGE, true, $post_data);
return $outputs;
}
/**
*
*/
public static function getJobName()
{
$class_name = get_called_class();
if (substr($class_name, -3) == "Job") {
return substr($class_name, strrpos($class_name, "\\") + 1, -3);
}
return false;
}
/**
* Returns a hash of the url of the current machine based on the value
* saved to current_machine_info.txt by a machine statuses request
*
* @return string hash of current machine url
*/
public static function getCurrentMachine()
{
$current_machine_path = C\WORK_DIRECTORY .
"/schedules/current_machine_info.txt";
if (file_exists($current_machine_path)) {
$current_machine = file_get_contents($current_machine_path);
} else {
$current_machine = L\crawlHash(C\NAME_SERVER);
}
return $current_machine;
}
}