diff --git a/src/configs/Config.php b/src/configs/Config.php index e56c442ca..0a6ea48ce 100755 --- a/src/configs/Config.php +++ b/src/configs/Config.php @@ -640,13 +640,13 @@ nsconddefine('THUMB_DIM', 128); */ nsconddefine('THUMB_SIZE', 1000000); /** Characters we view as not part of words, not same as POSIX [:punct:]*/ -nsconddefine ('PUNCT', "\.|\,|\:|\;|\"|\'|\[|\/|\%|\?|-|" . +nsconddefine('PUNCT', "\.|\,|\:|\;|\"|\'|\[|\/|\%|\?|-|" . "\]|\{|\}|\(|\)|\!|\||\&|\`|" . "\’|\‘|©|®|™|℠|…|\/|\>|,|\=|。|)|:|、|" . "”|“|《|》|(|「|」|★|【|】|·|\+|\*|;". "|!|—|―|?|!|،|؛|؞|؟|٪|٬|٭"); /** Number of total description deemed title */ -nsconddefine ('AD_HOC_TITLE_LENGTH', 50); +nsconddefine('AD_HOC_TITLE_LENGTH', 50); /** Used to say number of bytes in histogram bar (stats page) for file download sizes */ @@ -695,19 +695,24 @@ nsconddefine('MIN_QUERY_CACHE_TIME', ONE_HOUR); //one hour * Default number of items to page through for users,roles, mixes, etc * on the admin screens */ -nsconddefine ('DEFAULT_ADMIN_PAGING_NUM', 50); +nsconddefine('DEFAULT_ADMIN_PAGING_NUM', 50); /** Maximum number of bytes that the file that the suggest-a-url form * send data to can be. */ -nsconddefine ('MAX_SUGGEST_URL_FILE_SIZE', 100000); +nsconddefine('MAX_SUGGEST_URL_FILE_SIZE', 100000); /** Maximum number of a user can suggest to the suggest-a-url form in one day */ -nsconddefine ('MAX_SUGGEST_URLS_ONE_DAY', 10); +nsconddefine('MAX_SUGGEST_URLS_ONE_DAY', 10); +/** Directly add suggested urls to crawl options and inject them into any + * active crawl. If false, these are stored in a file and the user has to + * click a button to add them. + */ +nsconddefine('DIRECT_ADD_SUGGEST', false); /** * Length after which to truncate names for users/groups/roles when * they are displayed (not in DB) */ -nsconddefine ('NAME_TRUNCATE_LEN', 7); +nsconddefine('NAME_TRUNCATE_LEN', 7); /** USER STATUS value used for someone who is not in a group by can browse*/ nsdefine('NOT_MEMBER_STATUS', -1); /** USER STATUS value used for a user who can log in and perform activities */ @@ -853,11 +858,11 @@ nsdefine('ADVERTISEMENT_COMPLETED_STATUS',4); * Adjustable AD RELATED defines * /** Truncate length for ad description and keywords*/ -nsdefine ('ADVERTISEMENT_TRUNCATE_LEN', 8); +nsdefine('ADVERTISEMENT_TRUNCATE_LEN', 8); /** Initial bid amount for advertisement keyword */ -nsconddefine ('AD_KEYWORD_INIT_BID',1); +nsconddefine('AD_KEYWORD_INIT_BID',1); /** advertisement date format for start date and end date*/ -nsconddefine ('AD_DATE_FORMAT','Y-m-d'); +nsconddefine('AD_DATE_FORMAT','Y-m-d'); /** advertisement logo*/ nsconddefine('AD_LOGO','resources/adv-logo.png'); diff --git a/src/controllers/RegisterController.php b/src/controllers/RegisterController.php index 118f5348e..bdd899f36 100755 --- a/src/controllers/RegisterController.php +++ b/src/controllers/RegisterController.php @@ -637,6 +637,7 @@ class RegisterController extends Controller implements CrawlConstants { $data["REFRESH"] = "suggest"; $visitor_model = $this->model("visitor"); + $crawl_model = $this->model("crawl"); $clear = false; if (C\CAPTCHA_MODE != C\IMAGE_CAPTCHA) { unset($_SESSION["captcha_text"]); @@ -797,7 +798,28 @@ class RegisterController extends Controller implements CrawlConstants break; } // Handle cases where captcha was okay - if (!$this->model("crawl")->appendSuggestSites($url)) { + if (C\DIRECT_ADD_SUGGEST) { + $machine_urls = $this->model("machine")->getQueueServerUrls(); + $status = $crawl_model->crawlStatus($machine_urls); + if (empty($status['CRAWL_TIME'])) { + $seed_info = $crawl_model->getSeedInfo(); + $seed_info['seed_sites']['url'][] = "#\n#" . + date('r')."\n#"; + $seed_info['seed_sites']['url'][] = $url; + $crawl_model->setSeedInfo($seed_info); + } else { + $timestamp = $status['CRAWL_TIME']; + $seed_info = $crawl_model->getCrawlSeedInfo( + $timestamp, $machine_urls); + $seed_info['seed_sites']['url'][] = "#\n#" . + date('r')."\n#"; + $seed_info['seed_sites']['url'][] = $url; + $crawl_model->setCrawlSeedInfo($timestamp, + $seed_info, $machine_urls); + $crawl_model->injectUrlsCurrentCrawl( + $timestamp, [$url], $machine_urls); + } + } else if (!$crawl_model->appendSuggestSites($url)) { $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >". tl('register_controller_suggest_full')."</h1>');"; return $data; diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php index f08f9ccde..eb4ed3e7a 100755 --- a/src/controllers/SearchController.php +++ b/src/controllers/SearchController.php @@ -799,7 +799,7 @@ class SearchController extends Controller implements CrawlConstants $data['ELEMENT'] = "displayadvertisement"; $advertisement_model = $this->model("advertisement"); if (isset($_REQUEST['a']) && - $_REQUEST['a'] == C\AD_SAVE_CLICK) { + $_REQUEST['a'] == 'recordClick') { $advertisement_model->addClick($arg); } else { $data['RELEVANT_ADVERTISEMENT'] = diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php index f4fa887b1..340ccb952 100644 --- a/src/controllers/components/CrawlComponent.php +++ b/src/controllers/components/CrawlComponent.php @@ -407,7 +407,8 @@ class CrawlComponent extends Component implements CrawlConstants } else { $seed_info = $crawl_model->getSeedInfo(); } - if (isset($_REQUEST['suggest']) && $_REQUEST['suggest'] == 'add') { + if (!C\DIRECT_ADD_SUGGEST && + isset($_REQUEST['suggest']) && $_REQUEST['suggest'] == 'add') { $suggest_urls = $crawl_model->getSuggestSites(); if (isset($_REQUEST['ts'])) { $new_urls = []; @@ -551,7 +552,7 @@ class CrawlComponent extends Component implements CrawlConstants $seed_info, $machine_urls); if ($inject_urls != [] && $crawl_model->injectUrlsCurrentCrawl( - $timestamp, $inject_urls, $machine_urls)) { + $timestamp, $inject_urls, $machine_urls)) { $add_message = "<br />". tl('crawl_component_urls_injected'); if (isset($_REQUEST['use_suggest']) && @@ -1732,4 +1733,4 @@ class CrawlComponent extends Component implements CrawlConstants "switchSourceType()"; return $data; } -} \ No newline at end of file +} diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php index 2e9137aac..1018e3e13 100755 --- a/src/executables/QueueServer.php +++ b/src/executables/QueueServer.php @@ -674,7 +674,7 @@ class QueueServer implements CrawlConstants, Join L\crawlLog("Checking for robots.txt files to process..."); $robot_dir = C\CRAWL_DIR."/schedules/". - self::robot_data_base_name.$this->crawl_time; + self::robot_data_base_name . $this->crawl_time; $this->processDataFile($robot_dir, "processRecrawlRobotArchive"); L\crawlLog("done. "); diff --git a/src/views/elements/CrawloptionsElement.php b/src/views/elements/CrawloptionsElement.php index a44d213d9..ba7467695 100644 --- a/src/views/elements/CrawloptionsElement.php +++ b/src/views/elements/CrawloptionsElement.php @@ -136,35 +136,39 @@ class CrawloptionsElement extends Element <textarea class="short-text-area" id="disallowed-sites" name="disallowed_sites" ><?=$data['disallowed_sites'] ?></textarea> <?php - if (!isset($data['ts'])) { - ?> + if (!isset($data['ts'])) { ?> <div class="top-margin"><label for="seed-sites"><b><?= - tl('crawloptions_element_seed_sites')?></b></label> - [<a href="<?=$admin_url ?>&a=manageCrawls&arg=options<?= - '&' . C\CSRF_TOKEN . '=' . $data[C\CSRF_TOKEN] - ?>&suggest=add"><?= - tl('crawloptions_element_add_suggest_urls') ?></a>] <?= - $this->view->helper("helpbutton")->render( - "Seed Sites and URL Suggestions", $data[C\CSRF_TOKEN]) ?> + tl('crawloptions_element_seed_sites')?></b></label><?php + if(!C\DIRECT_ADD_SUGGEST) { + ?>[<a href="<?=$admin_url + ?>&a=manageCrawls&arg=options<?= + '&' . C\CSRF_TOKEN . '=' . $data[C\CSRF_TOKEN] + ?>&suggest=add"><?= + tl('crawloptions_element_add_suggest_urls') ?></a>] <?= + $this->view->helper("helpbutton")->render( + "Seed Sites and URL Suggestions", $data[C\CSRF_TOKEN]) + ?><?php + } + ?> </div> <textarea class="tall-text-area" id="seed-sites" name="seed_sites" ><?= $data['seed_sites'] - ?></textarea> - <?php - } else { - ?> + ?></textarea><?php + } else { ?> <div class="top-margin"><label for="inject-sites"><b><?= - tl('crawloptions_element_inject_sites')?></b></label> - [<a href="?c=admin&a=manageCrawls&arg=options<?= - '&'.C\CSRF_TOKEN.'='.$data[C\CSRF_TOKEN].'&ts='. - $data['ts'] ?>&suggest=add"><?= - tl('crawloptions_element_add_suggest_urls') ?></a>] - </div></div> - <?php - if ($data['INJECT_SITES'] != "") { - ?> - <input type="hidden" name="use_suggest" value="true" /> + tl('crawloptions_element_inject_sites')?></b></label><?php + if(!C\DIRECT_ADD_SUGGEST) { + ?>[<a href="?c=admin&a=manageCrawls&arg=options<?= + '&'.C\CSRF_TOKEN.'='.$data[C\CSRF_TOKEN].'&ts='. + $data['ts'] ?>&suggest=add"><?= + tl('crawloptions_element_add_suggest_urls') ?></a>] + </div> <?php + if ($data['INJECT_SITES'] != "") { + ?> + <input type="hidden" name="use_suggest" value="true" /> + <?php + } } ?> <textarea class="short-text-area" id="inject-sites"