<?php /** * SeekQuarry/Yioop -- * Open Source Pure PHP Search Engine, Crawler, and Indexer * * Copyright (C) 2009 - 2024 Chris Pollett chris@pollett.org * * LICENSE: * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * END LICENSE * * @author Chris Pollett chris@pollett.org * @license https://www.gnu.org/licenses/ GPL3 * @link https://www.seekquarry.com/ * @copyright 2009 - 2024 * @filesource */ namespace seekquarry\yioop\library\processors; use seekquarry\yioop\configs as C; use seekquarry\yioop\library as L; /** * Base abstract class common to all processors used to create crawl summary * information from images * * @author Chris Pollett */ class ImageProcessor extends PageProcessor { /** * Extract summary data from the image provided in $page together the url * in $url where it was downloaded from * * ImageProcessor class defers a proper implementation of this method to * subclasses * * @param string $page the image represented as a character string * @param string $url the url where the image was downloaded from * @return array summary information including a thumbnail and a * description (where the description is just the url) */ public function process($page, $url) { return null; } /** * Used to save a temporary file with the data downloaded for a url * while carrying out image processing * * @param string $page contains data about an image that one needs to save * @param string $url where $page data came from * @param string $file_extension to be associated with the $page data */ public function saveTempFile($page, $url, $file_extension) { static $call_count = 0; $temp_dir = C\TEMP_DIR . "/"; if (!file_exists($temp_dir)) { mkdir($temp_dir); } if (!file_exists($temp_dir)) { return null; } $temp_file = $temp_dir . $call_count . L\crawlHash($url) . " . $file_extension"; $call_count++; if ($call_count > 1000000000) { /* just fixing some value to cycle back to 0 rather than rely on OS wrapping. */ $call_count = 0; } file_put_contents($temp_file, $page); return $temp_file; } /** * Given an $image_string determines if possible its width and height * then assigns the values into the CrawlConstants:WIDTH, * CrawlConstants:HEIGHT fields of $summary * * @param array &$summary to write the width and height into * @param string $image_string the image represented as a character string * @return array summary information including a thumbnail and a * description (where the description is just the url) */ public function addWidthHeightSummary(&$summary, $image_string) { set_error_handler(null); $image_info = @getimagesizefromstring($image_string); set_error_handler(C\NS_CONFIGS . "yioop_error_handler"); if (!empty($image_info[0]) && !empty($image_info[1])) { list($summary[self::WIDTH], $summary[self::HEIGHT], ) = $image_info; } } /** * Given an image try to extract and XMP info from it. * * @param string $image_string the image represented as a character string * @return array XMP data converted from XML format to an array-like format */ public function getXmpData($image_string) { $xmp_data = ""; if (function_exists("simplexml_load_string") && preg_match('/\<x\:xmpmeta.+\<\/x\:xmpmeta\>/s', $image_string, $match)) { $xml_no_ns = preg_replace("/\<\/\w+\:/", "</", $match[0]); $xml_no_ns = preg_replace("/\<\w+\:/", "<", $xml_no_ns); set_error_handler(null); $xmp_xml = @simplexml_load_string($xml_no_ns); set_error_handler(C\NS_CONFIGS . "yioop_error_handler"); $xmp_array = []; if (!empty($xmp_xml) ) { $xmp_array = json_decode(json_encode($xmp_xml), true); } $xmp_data = print_r($xmp_array, true); } return $xmp_data; } /** * Used to create a thumbnail from an image object * * @param object $image image object with image * @param int $width = width in pixels of thumb if width is a negative * value and height positive, then this dimension will be set to be * proportional based on the input images width versus height * @param int $height = height in pixels of thumb if height is a negative * value and width positive, then this dimension will be set to be * proportional based on the input images width versus height * * @return string of webp image if this string would have been non-blank * empty string otherwise */ public static function createThumb($image, $width = C\THUMB_DIM, $height = C\THUMB_DIM) { if (empty($image) || ($width == 0 && $height == 0)) { return ""; } $size_x = imagesx($image); $size_y = imagesy($image); if ($height > 0 && $width <= 0) { $width = intval(($size_x * $height)/$size_y); } if ($height <= 0 && $width > 0) { $height = intval(($size_y * $width)/$size_x); } $thumb = imagecreatetruecolor($width, $height); if (empty($thumb)) { return ""; } imagesavealpha($thumb, true); $trans_colour = imagecolorallocatealpha($thumb, 255, 255, 255, 127); imagefill($thumb, 0, 0, $trans_colour); set_error_handler(null); @imagecopyresampled($thumb, $image, 0, 0, 0, 0, $width, $height, $size_x, $size_y); set_error_handler(C\NS_CONFIGS . "yioop_error_handler"); imagedestroy($image); if (empty($thumb)) { return ""; } $number_of_sample_points = 200; $is_blank = true; $old_color = false; for ($i = 0; $i < $number_of_sample_points; $i++) { $x = rand(0, $width - 1); $y = rand(0, $height - 1); $color = imagecolorat($thumb, $x, $y); if (!empty($old_color) && $old_color != $color) { $is_blank = false; break; } $old_color = $color; } if ($is_blank) { return ""; } ob_start(); imagewebp($thumb); $thumb_string = ob_get_contents(); ob_end_clean(); imagedestroy($thumb); return $thumb_string; } /** * Computes the average RGBA pixel value over an image * by resampling the image down to a 1x1 pixel image, then * extracting its rgba value as a vector * * @param GdImage $image object to calculate average color for * @return array a 4-tuple with components [red, green, blue, alpha] */ public static function averageColor($image) { $size_x = imagesx($image); $size_y = imagesy($image); $pixel_image = imagecreatetruecolor(1, 1); if (empty($pixel_image)) { return false; } imagesavealpha($pixel_image, true); $trans_colour = imagecolorallocatealpha($pixel_image, 255, 255, 255, 127); imagefill($pixel_image, 0, 0, $trans_colour); set_error_handler(null); @imagecopyresampled($pixel_image, $image, 0, 0, 0, 0, 1, 1, $size_x, $size_y); set_error_handler(C\NS_CONFIGS . "yioop_error_handler"); $color_int = imagecolorat($pixel_image, 0, 0); $color_vector = array_values(imagecolorsforindex($pixel_image, $color_int)); imagedestroy($pixel_image); return $color_vector; } /** * Checks if an image is Black and White (really gray scale) by * sampling 200 points and check that for each point the rgb values are * the same. * * @param GdImage $image object to check if black white * @return bool true if black and white */ public static function isBlackAndWhite($image) { $size_x = imagesx($image); $size_y = imagesy($image); $number_of_sample_points = 200; for ($i = 0; $i < $number_of_sample_points; $i++) { $x = rand(0, $size_x - 1); $y = rand(0, $size_y - 1); $color_int = imagecolorat($image, $x, $y); $color_vector = array_values(imagecolorsforindex($image, $color_int)); $first_color = $color_vector[0]; for($j = 1; $j < 3; $j++) { if ($first_color != $color_vector[$j]) { return false; } } } return true; } }