Adjust copyrights years
<?php
/**
* SeekQuarry/Yioop --
* Open Source Pure PHP Search Engine, Crawler, and Indexer
*
* Copyright (C) 2009 - 2023 Chris Pollett chris@pollett.org
*
* LICENSE:
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* END LICENSE
*
* @author Chris Pollett chris@pollett.org
* @license https://www.gnu.org/licenses/ GPL3
* @link https://www.seekquarry.com/
* @copyright 2009 - 2023
* @filesource
*/
namespace seekquarry\yioop\examples;
use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;
use seekquarry\yioop\library\CrawlConstants;
use seekquarry\yioop\controllers\SearchController;
/**
* This file contains an example script to show the different
* methods of the Yioop! search api
*/
// this example should be only run from the command-line
if (php_sapi_name() != 'cli' ||
defined("seekquarry\\yioop\\configs\\IS_OWN_WEB_SERVER")) {
echo "BAD REQUEST"; exit();
}
/** Calculate base directory of script @ignore
* If you have Yioop! in a separate folder from your web-site
* You should change BASE_DIR to the location of the Yioop! directory
*/
define("seekquarry\\yioop\\configs\\PARENT_DIR",
substr(dirname(realpath($_SERVER['PHP_SELF'])), 0,
-strlen("/src/examples")));
define("seekquarry\\yioop\\configs\\BASE_DIR", C\PARENT_DIR . "/src");
/** Load in global configuration settings; you need this*/
require_once C\BASE_DIR . '/configs/Config.php';
if (!C\PROFILE) {
echo "Please configure the search engine instance by visiting" .
"its web interface on localhost.\n";
exit();
}
/*
* We now move the search API test index over to the WORK_DIRECTORY
* if it isn't already there. In a real-world set-up a user would have
* put a crawl into the WORK_DIRECTORY and that would be used to make the
* query.
*/
$archive_timestamp = "1660526790";
$index_archive = C\BASE_DIR . "/examples/IndexData$archive_timestamp.zip";
$extract_folder = C\CRAWL_DIR . "/cache";
if (!file_exists($index_archive)) {
echo "\nSearch API test index doesn't exist, so can't run demo\n\n";
exit();
}
if (class_exists("\ZipArchive")) {
$zip = new \ZipArchive();
$zip_h = $zip->open($index_archive);
$zip->extractTo($extract_folder);
$zip->close();
} else {
exec("unzip $index_archive -d $extract_folder");
}
// Create a SearchController to do queries with
$controller = new SearchController();
/*
Now we can do queries! First do a simple search on art and print the results
*/
echo "\n\n\nAn example of a query request with the search API:\n";
echo "Total rows numbers are high because by default grouping is done.\n";
$query = "yioop i:$archive_timestamp";
/* i:$archive_timestamp is the timestamp of the index to use.
API requires that a default index be set even though the query might
specify to use a different one. The query string we pass to the
API can be anything you can type into Yioop! search box.
*/
$num_results = 10; // how many results to get back
$first_result_to_return = 0;
// what ranked results show be the first to be returned (0 = highest ranked)
$data = $controller->queryRequest($query, $num_results,
$first_result_to_return);
outputQueryData($data);
/*
next we do a related search (as our index only has two pages in it,
seekquarry and yioop) the only related pages to seekquarry.com is yioop.com
and seekquarry.com
*/
echo "\n\n\nAn example of making a related query request with the search API\n";
$url = "https://www.yioop.com/";
$num_results = 10; // how many results to get back
$first_result_to_return = 0;
$index_timestamp = $archive_timestamp;
$data = $controller->relatedRequest($url, $num_results,
$first_result_to_return, $index_timestamp);
outputQueryData($data);
/*
Finally, we give an example of requesting the cached version of
a downloaded page...
*/
echo "\n\n\nAn example of making a cached of page request".
" with the search API:\n";
$url = "https://www.seekquarry.com/";
$ui_flags = [];
$search_terms = "seekquarry"; // these words will be highlighted
$data = $controller->cacheRequest($url, $ui_flags,
$search_terms, $index_timestamp);
echo $data;
/*
We now delete the example index to clean-up our test. In real-life
you wouldn't want to delete your query index after making one query
*/
unlinkRecursive(C\CRAWL_DIR."/cache/IndexData$archive_timestamp");
// demo over, bye-bye for now!
exit();
/**
* Short function to pretty-print the data gotten back from a Yioop! query
* @param array $data what we got back from doing a query
*/
function outputQueryData($data)
{
// Now to print out info in the result
foreach ($data['PAGES'] as $page) {
echo "============\n";
echo "TITLE: ". trim($page[CrawlConstants::TITLE]). "\n";
echo "URL: ". trim($page[CrawlConstants::URL]). "\n";
echo "DESCRIPTION:".
wordwrap(trim($page[CrawlConstants::DESCRIPTION]))."\n";
echo "Rank: ".$page[CrawlConstants::DOC_RANK]."\n";
echo "Relevance: ".$page[CrawlConstants::RELEVANCE]."\n";
echo "Proximity: ".$page[CrawlConstants::PROXIMITY]."\n";
echo "Score: ".$page[CrawlConstants::SCORE]."\n";
echo "============\n\n";
}
echo "QUERY STATISTICS\n";
echo "============\n";
echo "LOW: ".$data['LIMIT']."\n";
echo "HIGH: ".min($data['TOTAL_ROWS'],
$data['LIMIT'] + $data['RESULTS_PER_PAGE'])."\n";
echo "TOTAL ROWS: ".$data['TOTAL_ROWS']."\n";
}
/**
* Recursively delete a directory
*
* @param string $dir Directory name
* @param boolean $deleteRootToo Delete specified top directory as well
*/
function unlinkRecursive($dir, $deleteRootToo = true)
{
traverseDirectory($dir, C\NS_LIB . "deleteFileOrDir", $deleteRootToo);
}
/**
* Recursively traverse a directory structure and call a callback function
*
* @param string $dir Directory name
* @param function $callback Function to call as traverse structure
* @param boolean $rootToo do op on top-level directory as well
*/
function traverseDirectory($dir, $callback, $rootToo = true)
{
if (!$dh = @opendir($dir)) {
return;
}
while (false !== ($obj = readdir($dh))) {
if ($obj == '.' || $obj == '..') {
continue;
}
if (is_dir($dir . '/' . $obj)) {
traverseDirectory($dir.'/'.$obj, $callback, true);
}
@$callback($dir . '/' . $obj);
}
closedir($dh);
if ($rootToo) {
@$callback($dir);
}
}