Support direct injection of suggest urls, a=chris

Chris Pollett [2015-12-25 04:Dec:th]
Support direct injection of suggest urls, a=chris
Filename
src/configs/Config.php
src/controllers/RegisterController.php
src/controllers/SearchController.php
src/controllers/components/CrawlComponent.php
src/executables/QueueServer.php
src/views/elements/CrawloptionsElement.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index e56c442ca..0a6ea48ce 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -640,13 +640,13 @@ nsconddefine('THUMB_DIM', 128);
  */
 nsconddefine('THUMB_SIZE', 1000000);
 /** Characters we view as not part of words, not same as POSIX [:punct:]*/
-nsconddefine ('PUNCT', "\.|\,|\:|\;|\"|\'|\[|\/|\%|\?|-|" .
+nsconddefine('PUNCT', "\.|\,|\:|\;|\"|\'|\[|\/|\%|\?|-|" .
     "\]|\{|\}|\(|\)|\!|\||\&|\`|" .
     "\’|\‘|©|®|™|℠|…|\/|\>|,|\=|。|)|:|、|" .
     "”|“|《|》|(|「|」|★|【|】|·|\+|\*|;".
         "|!|—|―|?|!|،|؛|؞|؟|٪|٬|٭");
 /** Number of total description deemed title */
-nsconddefine ('AD_HOC_TITLE_LENGTH', 50);
+nsconddefine('AD_HOC_TITLE_LENGTH', 50);
 /** Used to say number of bytes in histogram bar (stats page) for file
     download sizes
  */
@@ -695,19 +695,24 @@ nsconddefine('MIN_QUERY_CACHE_TIME', ONE_HOUR); //one hour
  * Default number of items to page through for users,roles, mixes, etc
  * on the admin screens
  */
-nsconddefine ('DEFAULT_ADMIN_PAGING_NUM', 50);
+nsconddefine('DEFAULT_ADMIN_PAGING_NUM', 50);
 /** Maximum number of bytes that the file that the suggest-a-url form
  * send data to can be.
  */
-nsconddefine ('MAX_SUGGEST_URL_FILE_SIZE', 100000);
+nsconddefine('MAX_SUGGEST_URL_FILE_SIZE', 100000);
 /** Maximum number of a user can suggest to the suggest-a-url form in one day
  */
-nsconddefine ('MAX_SUGGEST_URLS_ONE_DAY', 10);
+nsconddefine('MAX_SUGGEST_URLS_ONE_DAY', 10);
+/** Directly add suggested urls to crawl options and inject them into any
+ *  active crawl. If false, these are stored in a file and the user has to
+ *  click a button to add them.
+ */
+nsconddefine('DIRECT_ADD_SUGGEST', false);
 /**
  * Length after which to truncate names for users/groups/roles when
  * they are displayed (not in DB)
  */
-nsconddefine ('NAME_TRUNCATE_LEN', 7);
+nsconddefine('NAME_TRUNCATE_LEN', 7);
 /** USER STATUS value used for someone who is not in a group by can browse*/
 nsdefine('NOT_MEMBER_STATUS', -1);
 /** USER STATUS value used for a user who can log in and perform activities */
@@ -853,11 +858,11 @@ nsdefine('ADVERTISEMENT_COMPLETED_STATUS',4);
  * Adjustable AD RELATED defines
  *
  /** Truncate length for ad description and keywords*/
-nsdefine ('ADVERTISEMENT_TRUNCATE_LEN', 8);
+nsdefine('ADVERTISEMENT_TRUNCATE_LEN', 8);

 /** Initial bid amount for advertisement keyword */
-nsconddefine ('AD_KEYWORD_INIT_BID',1);
+nsconddefine('AD_KEYWORD_INIT_BID',1);
 /** advertisement date format for start date and end date*/
-nsconddefine ('AD_DATE_FORMAT','Y-m-d');
+nsconddefine('AD_DATE_FORMAT','Y-m-d');
 /** advertisement logo*/
 nsconddefine('AD_LOGO','resources/adv-logo.png');
diff --git a/src/controllers/RegisterController.php b/src/controllers/RegisterController.php
index 118f5348e..bdd899f36 100755
--- a/src/controllers/RegisterController.php
+++ b/src/controllers/RegisterController.php
@@ -637,6 +637,7 @@ class RegisterController extends Controller implements CrawlConstants
     {
         $data["REFRESH"] = "suggest";
         $visitor_model = $this->model("visitor");
+        $crawl_model = $this->model("crawl");
         $clear = false;
         if (C\CAPTCHA_MODE != C\IMAGE_CAPTCHA) {
             unset($_SESSION["captcha_text"]);
@@ -797,7 +798,28 @@ class RegisterController extends Controller implements CrawlConstants
                     break;
             }
             // Handle cases where captcha was okay
-            if (!$this->model("crawl")->appendSuggestSites($url)) {
+            if (C\DIRECT_ADD_SUGGEST) {
+                $machine_urls = $this->model("machine")->getQueueServerUrls();
+                $status = $crawl_model->crawlStatus($machine_urls);
+                if (empty($status['CRAWL_TIME'])) {
+                    $seed_info = $crawl_model->getSeedInfo();
+                    $seed_info['seed_sites']['url'][] = "#\n#" .
+                        date('r')."\n#";
+                    $seed_info['seed_sites']['url'][] = $url;
+                    $crawl_model->setSeedInfo($seed_info);
+                } else {
+                    $timestamp = $status['CRAWL_TIME'];
+                    $seed_info = $crawl_model->getCrawlSeedInfo(
+                        $timestamp, $machine_urls);
+                    $seed_info['seed_sites']['url'][] = "#\n#" .
+                        date('r')."\n#";
+                    $seed_info['seed_sites']['url'][] = $url;
+                    $crawl_model->setCrawlSeedInfo($timestamp,
+                        $seed_info, $machine_urls);
+                    $crawl_model->injectUrlsCurrentCrawl(
+                        $timestamp, [$url], $machine_urls);
+                }
+            } else if (!$crawl_model->appendSuggestSites($url)) {
                 $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >".
                 tl('register_controller_suggest_full')."</h1>');";
                 return $data;
diff --git a/src/controllers/SearchController.php b/src/controllers/SearchController.php
index f08f9ccde..eb4ed3e7a 100755
--- a/src/controllers/SearchController.php
+++ b/src/controllers/SearchController.php
@@ -799,7 +799,7 @@ class SearchController extends Controller implements CrawlConstants
                     $data['ELEMENT'] = "displayadvertisement";
                     $advertisement_model = $this->model("advertisement");
                     if (isset($_REQUEST['a']) &&
-                        $_REQUEST['a'] == C\AD_SAVE_CLICK) {
+                        $_REQUEST['a'] == 'recordClick') {
                         $advertisement_model->addClick($arg);
                     } else {
                         $data['RELEVANT_ADVERTISEMENT'] =
diff --git a/src/controllers/components/CrawlComponent.php b/src/controllers/components/CrawlComponent.php
index f4fa887b1..340ccb952 100644
--- a/src/controllers/components/CrawlComponent.php
+++ b/src/controllers/components/CrawlComponent.php
@@ -407,7 +407,8 @@ class CrawlComponent extends Component implements CrawlConstants
         } else {
             $seed_info = $crawl_model->getSeedInfo();
         }
-        if (isset($_REQUEST['suggest']) && $_REQUEST['suggest'] == 'add') {
+        if (!C\DIRECT_ADD_SUGGEST &&
+            isset($_REQUEST['suggest']) && $_REQUEST['suggest'] == 'add') {
             $suggest_urls = $crawl_model->getSuggestSites();
             if (isset($_REQUEST['ts'])) {
                 $new_urls = [];
@@ -551,7 +552,7 @@ class CrawlComponent extends Component implements CrawlConstants
                     $seed_info, $machine_urls);
                 if ($inject_urls != [] &&
                     $crawl_model->injectUrlsCurrentCrawl(
-                    $timestamp, $inject_urls, $machine_urls)) {
+                        $timestamp, $inject_urls, $machine_urls)) {
                     $add_message = "<br />".
                         tl('crawl_component_urls_injected');
                     if (isset($_REQUEST['use_suggest']) &&
@@ -1732,4 +1733,4 @@ class CrawlComponent extends Component implements CrawlConstants
             "switchSourceType()";
         return $data;
     }
-}
\ No newline at end of file
+}
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index 2e9137aac..1018e3e13 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -674,7 +674,7 @@ class QueueServer implements CrawlConstants, Join
         L\crawlLog("Checking for robots.txt files to process...");
         $robot_dir =
             C\CRAWL_DIR."/schedules/".
-                self::robot_data_base_name.$this->crawl_time;
+                self::robot_data_base_name . $this->crawl_time;

         $this->processDataFile($robot_dir, "processRecrawlRobotArchive");
         L\crawlLog("done. ");
diff --git a/src/views/elements/CrawloptionsElement.php b/src/views/elements/CrawloptionsElement.php
index a44d213d9..ba7467695 100644
--- a/src/views/elements/CrawloptionsElement.php
+++ b/src/views/elements/CrawloptionsElement.php
@@ -136,35 +136,39 @@ class CrawloptionsElement extends Element
         <textarea class="short-text-area" id="disallowed-sites"
             name="disallowed_sites" ><?=$data['disallowed_sites'] ?></textarea>
         <?php
-        if (!isset($data['ts'])) {
-            ?>
+        if (!isset($data['ts'])) { ?>
             <div class="top-margin"><label for="seed-sites"><b><?=
-                tl('crawloptions_element_seed_sites')?></b></label>
-                [<a href="<?=$admin_url ?>&amp;a=manageCrawls&amp;arg=options<?=
-                '&amp;' . C\CSRF_TOKEN . '=' . $data[C\CSRF_TOKEN]
-                ?>&amp;suggest=add"><?=
-                tl('crawloptions_element_add_suggest_urls') ?></a>] <?=
-                $this->view->helper("helpbutton")->render(
-                "Seed Sites and URL Suggestions", $data[C\CSRF_TOKEN]) ?>
+                tl('crawloptions_element_seed_sites')?></b></label><?php
+                if(!C\DIRECT_ADD_SUGGEST) {
+                    ?>[<a href="<?=$admin_url
+                    ?>&amp;a=manageCrawls&amp;arg=options<?=
+                    '&amp;' . C\CSRF_TOKEN . '=' . $data[C\CSRF_TOKEN]
+                    ?>&amp;suggest=add"><?=
+                    tl('crawloptions_element_add_suggest_urls') ?></a>] <?=
+                    $this->view->helper("helpbutton")->render(
+                    "Seed Sites and URL Suggestions", $data[C\CSRF_TOKEN])
+                    ?><?php
+                 }
+                 ?>
             </div>
             <textarea class="tall-text-area" id="seed-sites"
                 name="seed_sites" ><?= $data['seed_sites']
-            ?></textarea>
-        <?php
-        } else {
-            ?>
+            ?></textarea><?php
+        } else { ?>
             <div class="top-margin"><label for="inject-sites"><b><?=
-                tl('crawloptions_element_inject_sites')?></b></label>
-            [<a href="?c=admin&amp;a=manageCrawls&amp;arg=options<?=
-                '&amp;'.C\CSRF_TOKEN.'='.$data[C\CSRF_TOKEN].'&amp;ts='.
-                $data['ts'] ?>&amp;suggest=add"><?=
-            tl('crawloptions_element_add_suggest_urls') ?></a>]
-            </div></div>
-            <?php
-            if ($data['INJECT_SITES'] != "") {
-                ?>
-                <input type="hidden" name="use_suggest" value="true" />
+                tl('crawloptions_element_inject_sites')?></b></label><?php
+            if(!C\DIRECT_ADD_SUGGEST) {
+                ?>[<a href="?c=admin&amp;a=manageCrawls&amp;arg=options<?=
+                    '&amp;'.C\CSRF_TOKEN.'='.$data[C\CSRF_TOKEN].'&amp;ts='.
+                    $data['ts'] ?>&amp;suggest=add"><?=
+                tl('crawloptions_element_add_suggest_urls') ?></a>]
+                </div>
                 <?php
+                if ($data['INJECT_SITES'] != "") {
+                    ?>
+                    <input type="hidden" name="use_suggest" value="true" />
+                    <?php
+                }
             }
             ?>
             <textarea class="short-text-area" id="inject-sites"
ViewGit