Last commit for src/executables/ClassifierTrainer.php: 2addb500315b7393a90fe66431d7832b1e7386c7

Adjust copyrights years

Chris Pollett [2024-01-03 21:Jan:rd]
Adjust copyrights years
<?php
/**
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 * Copyright (C) 2009 - 2023  Chris Pollett chris@pollett.org
 *
 * LICENSE:
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * END LICENSE
 *
 * @author Chris Pollett chris@pollett.org
 * @license https://www.gnu.org/licenses/ GPL3
 * @link https://www.seekquarry.com/
 * @copyright 2009 - 2023
 * @filesource
 */
namespace seekquarry\yioop\executables;

use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\library\CrawlDaemon;
use seekquarry\yioop\library\classifiers\Classifier;

if (php_sapi_name() != 'cli' ||
    defined("seekquarry\\yioop\\configs\\IS_OWN_WEB_SERVER")) {
    echo "BAD REQUEST"; exit();
}
/*
   We must specify that we want logging enabled
 */
$_SERVER["NO_LOGGING"] = true;
/*
   For crawlLog and Yioop Constants
 */
require_once __DIR__.'/../library/Utility.php';
if (!C\PROFILE) {
    echo "Please configure the search engine instance by visiting" .
        "its web interface on localhost.\n";
    exit();
}
/*
   If possible, set the memory limit high enough to fit all of the features and
   training documents into memory.
 */
ini_set("memory_limit", C\CLASSIFIER_TRAINER_LIMIT);
/*
    We'll set up multi-byte string handling to use UTF-8
 */
mb_internal_encoding("UTF-8");
mb_regex_encoding("UTF-8");
/**
 * This class is used to finalize a classifier via the web interface.
 *
 * Because finalizing involves training a logistic regression classifier on a
 * potentially-large set of training examples, it can take much longer than
 * would be allowed by the normal web execution time limit. So instead of
 * trying to finalize a classifier directly in the controller that handles the
 * web request, the controller kicks off a daemon that simply loads the
 * classifier, finalizes it, and saves it back to disk.
 *
 * The classifier to finalize is specified by its class label, passed as the
 * second command-line argument. The following command would be used to run
 * this script directly from the command-line:
 *
 *    $ php ClassifierTrainer.php terminal LABEL
 *
 * @author Shawn Tice
 */
class ClassifierTrainer
{
    /**
     * This is the function that should be called to get the
     * ClassifierTrainer to start training a logistic regression instance for
     * a particular classifier. The class label corresponding to the
     * classifier to be finalized should be passed as the second command-line
     * argument.
     */
    public function start()
    {
        global $argv;
        CrawlDaemon::init($argv, "ClassifierTrainer");
        $label = $argv[2] ?? "";
        $classifier = null;
        if (!empty($label)) {
            L\crawlLog("Initializing classifier trainer log..",
                $label . '-ClassifierTrainer', true);
            $classifier = Classifier::getClassifier($label);
        }
        if (!empty($classifier)) {
            $classifier->prepareToFinalize();
            $classifier->finalize();
            Classifier::setClassifier($classifier);
            L\crawlLog("Training complete.\n");
        }
        CrawlDaemon::stop('ClassifierTrainer', $label);
    }
}
$classifier_trainer = new ClassifierTrainer();
$classifier_trainer->start();
ViewGit