Last commit for src/library/processors/ImageProcessor.php: 2addb500315b7393a90fe66431d7832b1e7386c7

Adjust copyrights years

Chris Pollett [2024-01-03 21:Jan:rd]
Adjust copyrights years
 * SeekQuarry/Yioop --
 * Open Source Pure PHP Search Engine, Crawler, and Indexer
 * Copyright (C) 2009 - 2024  Chris Pollett
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <>.
 * @author Chris Pollett
 * @license GPL3
 * @link
 * @copyright 2009 - 2024
 * @filesource
namespace seekquarry\yioop\library\processors;

use seekquarry\yioop\configs as C;
use seekquarry\yioop\library as L;

 * Base abstract class common to all processors used to create crawl summary
 * information from images
 * @author Chris Pollett
class ImageProcessor extends PageProcessor
     * Extract summary data from the image provided in $page together the url
     *     in $url where it was downloaded from
     * ImageProcessor class defers a proper implementation of this method to
     *     subclasses
     * @param string $page  the image represented as a character string
     * @param string $url  the url where the image was downloaded from
     * @return array summary information including a thumbnail and a
     *     description (where the description is just the url)
    public function process($page, $url)
        return null;
     * Used to save a temporary file with the data downloaded for a url
     * while carrying out image processing
     * @param string $page contains data about an image that one needs to save
     * @param string $url where $page data came from
     * @param string $file_extension to be associated with the $page data
    public function saveTempFile($page, $url, $file_extension)
        static $call_count = 0;
        $temp_dir = C\TEMP_DIR . "/";
        if (!file_exists($temp_dir)) {
        if (!file_exists($temp_dir)) {
            return null;
        $temp_file = $temp_dir . $call_count .
            L\crawlHash($url) . " . $file_extension";
        if ($call_count > 1000000000) {
            /* just fixing some value to cycle back to 0 rather than
               rely on OS wrapping.
            $call_count = 0;
        file_put_contents($temp_file, $page);
        return $temp_file;
     * Given an $image_string determines if possible its width and height
     * then assigns the values into the CrawlConstants:WIDTH,
     *  CrawlConstants:HEIGHT fields of $summary
     * @param array &$summary to write the width and height into
     * @param string $image_string  the image represented as a character string
     * @return array summary information including a thumbnail and a
     *     description (where the description is just the url)
    public function addWidthHeightSummary(&$summary, $image_string)
        $image_info = @getimagesizefromstring($image_string);
        set_error_handler(C\NS_CONFIGS . "yioop_error_handler");
        if (!empty($image_info[0]) && !empty($image_info[1])) {
            list($summary[self::WIDTH], $summary[self::HEIGHT], ) =
     * Given an image try to extract and XMP info from it.
     * @param string $image_string  the image represented as a character string
     * @return array XMP data converted from XML format to an array-like format
    public function getXmpData($image_string)
        $xmp_data = "";
        if (function_exists("simplexml_load_string") &&
            preg_match('/\<x\:xmpmeta.+\<\/x\:xmpmeta\>/s', $image_string,
            $match)) {
            $xml_no_ns = preg_replace("/\<\/\w+\:/", "</", $match[0]);
            $xml_no_ns = preg_replace("/\<\w+\:/", "<", $xml_no_ns);
            $xmp_xml = @simplexml_load_string($xml_no_ns);
            set_error_handler(C\NS_CONFIGS . "yioop_error_handler");
            $xmp_array = [];
            if (!empty($xmp_xml) ) {
                $xmp_array = json_decode(json_encode($xmp_xml), true);
            $xmp_data = print_r($xmp_array, true);
        return $xmp_data;
     * Used to create a thumbnail from an image object
     * @param object $image  image object with image
     * @param int $width = width in pixels of thumb if width is a negative
     *  value and height positive, then this dimension will be set to be
     *  proportional based on the input images width versus height
     * @param int $height = height in pixels of thumb if height is a negative
     *  value and width positive, then this dimension will be set to be
     *  proportional based on the input images width versus height
     * @return string of webp image if this string would have been non-blank
     *      empty string otherwise
    public static function createThumb($image, $width = C\THUMB_DIM,
        $height = C\THUMB_DIM)
        if (empty($image) || ($width == 0 && $height == 0)) {
            return "";
        $size_x = imagesx($image);
        $size_y = imagesy($image);
        if ($height > 0 && $width <= 0) {
            $width = intval(($size_x * $height)/$size_y);
        if ($height <= 0 && $width > 0) {
            $height = intval(($size_y * $width)/$size_x);
        $thumb = imagecreatetruecolor($width, $height);
        if (empty($thumb)) {
            return "";
        imagesavealpha($thumb, true);
        $trans_colour = imagecolorallocatealpha($thumb, 255, 255, 255, 127);
        imagefill($thumb, 0, 0, $trans_colour);
            $image, 0, 0, 0, 0, $width, $height, $size_x, $size_y);
        set_error_handler(C\NS_CONFIGS . "yioop_error_handler");
        if (empty($thumb)) {
            return "";
        $number_of_sample_points = 200;
        $is_blank = true;
        $old_color = false;
        for ($i = 0; $i < $number_of_sample_points; $i++) {
            $x = rand(0, $width - 1);
            $y = rand(0, $height - 1);
            $color = imagecolorat($thumb, $x, $y);
            if (!empty($old_color) && $old_color != $color) {
                $is_blank = false;
            $old_color = $color;
        if ($is_blank) {
            return "";
        $thumb_string = ob_get_contents();
        return $thumb_string;
     * Computes the average RGBA pixel value over an image
     * by resampling the image down to a 1x1 pixel image, then
     * extracting its rgba value as a vector
     * @param GdImage $image object to calculate average color for
     * @return array a 4-tuple with components [red, green, blue, alpha]
    public static function averageColor($image)
        $size_x = imagesx($image);
        $size_y = imagesy($image);
        $pixel_image = imagecreatetruecolor(1, 1);
        if (empty($pixel_image)) {
            return false;
        imagesavealpha($pixel_image, true);
        $trans_colour = imagecolorallocatealpha($pixel_image,
            255, 255, 255, 127);
        imagefill($pixel_image, 0, 0, $trans_colour);
            $image, 0, 0, 0, 0, 1, 1, $size_x, $size_y);
        set_error_handler(C\NS_CONFIGS . "yioop_error_handler");
        $color_int = imagecolorat($pixel_image, 0, 0);
        $color_vector = array_values(imagecolorsforindex($pixel_image,
        return $color_vector;
     * Checks if an image is Black and White (really gray scale) by
     * sampling 200 points and check that for each point the rgb values are
     * the same.
     * @param GdImage $image object to check if black white
     * @return bool true if black and white
    public static function isBlackAndWhite($image)
        $size_x = imagesx($image);
        $size_y = imagesy($image);
        $number_of_sample_points = 200;
        for ($i = 0; $i < $number_of_sample_points; $i++) {
            $x = rand(0, $size_x - 1);
            $y = rand(0, $size_y - 1);
            $color_int = imagecolorat($image, $x, $y);
            $color_vector = array_values(imagecolorsforindex($image,
            $first_color = $color_vector[0];
            for($j = 1; $j < 3; $j++) {
                if ($first_color != $color_vector[$j]) {
                    return false;
        return true;