Tweaks to recipe and Address plugin after rechecking if working, slight improvement to how elements extracted from news feeds, a=chris

Chris Pollett [2018-05-21 01:May:st]
Tweaks to recipe and Address plugin after rechecking if working, slight improvement to how elements extracted from news feeds, a=chris
Filename
src/library/CrawlDaemon.php
src/library/indexing_plugins/AddressesPlugin.php
src/library/indexing_plugins/RecipePlugin.php
src/library/media_jobs/FeedsUpdateJob.php
src/locale/ar/configure.ini
src/locale/bn/configure.ini
src/locale/de/configure.ini
src/locale/en_US/configure.ini
src/locale/es/configure.ini
src/locale/fa/configure.ini
src/locale/fr_FR/configure.ini
src/locale/he/configure.ini
src/locale/hi/configure.ini
src/locale/in_ID/configure.ini
src/locale/it/configure.ini
src/locale/ja/configure.ini
src/locale/kn/configure.ini
src/locale/ko/configure.ini
src/locale/nl/configure.ini
src/locale/pl/configure.ini
src/locale/pt/configure.ini
src/locale/ru/configure.ini
src/locale/te/configure.ini
src/locale/th/configure.ini
src/locale/tr/configure.ini
src/locale/vi_VN/configure.ini
src/locale/zh_CN/configure.ini
diff --git a/src/library/CrawlDaemon.php b/src/library/CrawlDaemon.php
index f1f2fab8d..6ac0ce9d1 100644
--- a/src/library/CrawlDaemon.php
+++ b/src/library/CrawlDaemon.php
@@ -252,7 +252,7 @@ class CrawlDaemon implements CrawlConstants
             $process_user_info = posix_getpwuid(posix_getuid());
             $process_home = $process_user_info['dir'];
             if (C\nsdefined("FORCE_HHVM") || (
-                stristr(phpversion(), "hhvm") !==false &&
+                stristr(phpversion(), "hhvm") !== false &&
                 posix_access($process_home, POSIX_W_OK))) {
                 $php = 'hhvm -f ';
                 if (C\nsdefined("HHVM_PATH") ) {
diff --git a/src/library/indexing_plugins/AddressesPlugin.php b/src/library/indexing_plugins/AddressesPlugin.php
index def10ed56..3339059b3 100644
--- a/src/library/indexing_plugins/AddressesPlugin.php
+++ b/src/library/indexing_plugins/AddressesPlugin.php
@@ -142,6 +142,7 @@ class AddressesPlugin extends IndexingPlugin implements CrawlConstants
         "TANZANIA, UNITED REPUBLIC OF" => "TZ",
         "UKRAINE" => "UA","UGANDA" => "UG",
         "UNITED STATES MINOR OUTLYING ISLANDS" => "UM",
+        "UNITED STATES OF AMERICA" => "USA",
         "UNITED STATES" => "US","URUGUAY" => "UY","UZBEKISTAN" => "UZ",
         "VATICAN CITY" => "VA","SAINT VINCENT AND THE GRENADINES" => "VC",
         "VENEZUELA, BOLIVARIAN REPUBLIC OF" => "VE",
@@ -395,7 +396,7 @@ class AddressesPlugin extends IndexingPlugin implements CrawlConstants
                     }
                 break;
                 case "maybe":
-                    if ($len_about_right){
+                    if ($len_about_right) {
                         if ($num_lines < $max_lines) {
                             $current_candidate[] = $line;
                             $num_lines++;
@@ -406,7 +407,8 @@ class AddressesPlugin extends IndexingPlugin implements CrawlConstants
                         }
                     } else {
                         $state = "dont";
-                        if ($num_lines <= $max_lines&&$num_lines >= $min_lines){
+                        if ($num_lines <= $max_lines &&
+                            $num_lines >= $min_lines) {
                             $current_candidate = $this->checkCandidate(
                                 $current_candidate);
                             if ($current_candidate) {
@@ -443,8 +445,8 @@ class AddressesPlugin extends IndexingPlugin implements CrawlConstants
         $address = false;
         $found_count = 0;
         $num_lines = count($pre_address);
-        $check_array = ["checkCountry"=>"checkCountry",
-            "checkStreet"=>"checkStreet",
+        $check_array = ["checkCountry" => "checkCountry",
+            "checkStreet" => "checkStreet",
             "checkPhoneOrEmail" => "checkPhoneOrEmail",
             "checkRegion" => "checkRegion",
             "checkZipPostalCodeWords" => "checkZipPostalCodeWords"];
@@ -563,7 +565,6 @@ class AddressesPlugin extends IndexingPlugin implements CrawlConstants
         $num_parts = count($line_parts);
         $line = mb_strtoupper(trim($line_parts[$num_parts - 1]));
         $countries = $this->countries;
-
         $country_codes = array_flip($countries);
         if (strlen($line) == 2) {
             $line = substr($line, 0, 2);
diff --git a/src/library/indexing_plugins/RecipePlugin.php b/src/library/indexing_plugins/RecipePlugin.php
index 30e84a4fe..b351be772 100644
--- a/src/library/indexing_plugins/RecipePlugin.php
+++ b/src/library/indexing_plugins/RecipePlugin.php
@@ -58,11 +58,10 @@ require_once C\BASE_DIR . "/library/LocaleFunctions.php";
  * looking at what was needed to screen scrape recipes from the
  * following sites:
  *
- * http://allrecipes.com/
- * http://www.food.com/
+ * https://allrecipes.com/
+ * http://www.geniuskitchen.com/
  * http://www.betterrecipes.com/
- * http://www.foodnetwork.com/
- * http://www.bettycrocker.com/
+ * https://www.bettycrocker.com/
  *
  *
  * @author Priya Gangaraju, Chris Pollett (re-organized, added documentation,
@@ -130,25 +129,28 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
         //detect recipes
         $recipes_per_page = $xpath->evaluate(
             /*allr, f.com, brec, fnet*/
-            "/html//header[@class='recipe']/h1|
-            /html//*[@id='recipe_title']|
-            /html//*[@itemtype='http://schema.org/Recipe']");
+            "//header[contains(@class, 'recipe-header')]/h1|" .
+            "/html//*[@id='recipe_title']|" .
+            "/html//*[@itemtype='http://schema.org/Recipe']");
         $recipe = [];
         $subdocs_description = [];
         if (is_object($recipes_per_page) && $recipes_per_page->length != 0) {
             $recipes_count = $recipes_per_page->length;
             $titles = $xpath->evaluate(
                /* allr, f.com, brec, fnet   */
-               "/html//*[@id = 'recipe_title']|
-               /html//header[@class = 'recipe']/h1|
-               /html//*[@itemprop='name']");
+               "/html//*[@id = 'recipe_title']|" .
+               "//header[contains(@class, 'recipe-header')]/h1|" .
+               "/html//*[@itemprop = 'name']");
+            if ($titles->length == 0) {
+                return $subdocs_description;
+            }
             for ($i = 0; $i < $recipes_count; $i++) {
                 $ingredients = $xpath->evaluate(
                     /*allr*, fcomm, brec, fnet*/
-                    "/html//ul[@class = 'ingredient-wrap']/li |
-                    /html//li[@data-ingredient]|
-                    /html//*[@itemprop ='ingredient']|
-                    /html//*[@itemprop='ingredients']");
+                    "/html//ul[@class = 'ingredient-wrap']/li|" .
+                    "/html//li[@data-ingredient]|" .
+                    "/html//*[@itemprop ='ingredient']|" .
+                    "/html//*[@itemprop='ingredients']");
                 $ingredients_result = "";
                 if (is_object($ingredients) && $ingredients->length != 0){
                     $lastIngredient = end($ingredients);
diff --git a/src/library/media_jobs/FeedsUpdateJob.php b/src/library/media_jobs/FeedsUpdateJob.php
index 28f77214d..0f664d1d1 100644
--- a/src/library/media_jobs/FeedsUpdateJob.php
+++ b/src/library/media_jobs/FeedsUpdateJob.php
@@ -263,10 +263,16 @@ class FeedsUpdateJob extends MediaJob
                 //strip namespaces
                 $page = preg_replace('@<(/?)(\w+\s*)\:@u', '<$1',
                     $feed[CrawlConstants::PAGE]);
+                if (empty($page)) {
+                    $page = $feed[CrawlConstants::PAGE];
+                }
             }
             if (isset($feed['IMAGE_XPATH']) && !$is_regex) {
                 $feed['IMAGE_XPATH'] = preg_replace('@/(\s*\w+\s*)\:@u', '/',
                     $feed['IMAGE_XPATH']);
+                if (empty($feed['IMAGE_XPATH'])) {
+                    $feed['IMAGE_XPATH'] = $feed['IMAGE_XPATH'];
+                }
             }
             if ($is_html) {
                 @$dom->loadHTML($page);
diff --git a/src/locale/ar/configure.ini b/src/locale/ar/configure.ini
index 6c3965dca..7afb07372 100755
--- a/src/locale/ar/configure.ini
+++ b/src/locale/ar/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/bn/configure.ini b/src/locale/bn/configure.ini
index 68a7468fb..22371535a 100755
--- a/src/locale/bn/configure.ini
+++ b/src/locale/bn/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/de/configure.ini b/src/locale/de/configure.ini
index 441748a3b..44abbd47d 100755
--- a/src/locale/de/configure.ini
+++ b/src/locale/de/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/en_US/configure.ini b/src/locale/en_US/configure.ini
index 884460e7f..4d8ecbda9 100644
--- a/src/locale/en_US/configure.ini
+++ b/src/locale/en_US/configure.ini
@@ -832,17 +832,17 @@ botstory_element_delete_operation = "Do you really want to delete this pattern?"
 botstory_element_deletepattern = "Delete"
 ;
 ; ManagecreditsElement.php
-managecredits_element_purchase_credits = "Purchase Ad Credits"
+managecredits_element_purchase_credits = "Purchase Credits"
 managecredit_element_num_credits = "Quantity"
 managecredit_element_card_number = "Credit Card Number"
 managecredit_element_cvc = "CVC"
 managecredit_element_expiration = "Expiration"
 managecredits_element_charge_warning = "Using the Purchase button charges the above card the Quantity field&#039;s amount in US dollars and agrees to the "
-managecredits_element_program_terms = "Ad Program Terms"
+managecredits_element_program_terms = "Program Terms"
 managecredits_element_purchase = "Purchase"
 managecredits_element_script_failure = "No Working Credit Card Script Found!"
 managecredits_element_balance = "Balance: %s credits"
-managecredits_element_transactions = "Ad Credit Transactions"
+managecredits_element_transactions = "Credit Transactions"
 managecredits_element_type = "Type"
 managecredits_element_amount = "Amount"
 managecredits_element_date = "Date"
@@ -866,7 +866,6 @@ manageaccount_element_username = "Username"
 manageaccount_element_firstname = "First Name"
 manageaccount_element_lastname = "Last Name"
 manageaccount_element_email = "Email"
-manageaccount_element_enable_store = "Enable Store:"
 manageaccount_element_is_bot = "Bot User:"
 manageaccount_element_bot_unique_token = "Bot Unique Token"
 manageaccount_element_bot_callback_url = "Bot Callback URL"
@@ -1441,7 +1440,7 @@ manageadvertisements_element_keyword_bid_amount = "Minimum Bid Required"
 manageadvertisements_element_expensive_word = "Expensive word"
 manageadvertisements_element_calculate_bid = "Calculate Bid"
 manageadvertisements_element_update = "Update"
-manageadvertisements_element_buy_info = "Using the Purchase button deducts the Budget field amount from your %s available ad credits."
+manageadvertisements_element_buy_info = "Using the Purchase button deducts the Budget field amount from your %s available credits."
 manageadvertisements_element_buy_credits = "Buy more ad credits"
 manageadvertisements_element_edit_ad = "Edit Ad"
 manageadvertisements_element_purchase = "Purchase"
diff --git a/src/locale/es/configure.ini b/src/locale/es/configure.ini
index 8c59fc6c9..57efd1d02 100755
--- a/src/locale/es/configure.ini
+++ b/src/locale/es/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/fa/configure.ini b/src/locale/fa/configure.ini
index 962eca19c..c73c7a95b 100755
--- a/src/locale/fa/configure.ini
+++ b/src/locale/fa/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/fr_FR/configure.ini b/src/locale/fr_FR/configure.ini
index d9e7f4133..09574c152 100755
--- a/src/locale/fr_FR/configure.ini
+++ b/src/locale/fr_FR/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/he/configure.ini b/src/locale/he/configure.ini
index cc6c7c4ea..c8fa6b01b 100755
--- a/src/locale/he/configure.ini
+++ b/src/locale/he/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/hi/configure.ini b/src/locale/hi/configure.ini
index edb69231b..5d9b1dba4 100755
--- a/src/locale/hi/configure.ini
+++ b/src/locale/hi/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/in_ID/configure.ini b/src/locale/in_ID/configure.ini
index 92876bebf..2acc2dd67 100755
--- a/src/locale/in_ID/configure.ini
+++ b/src/locale/in_ID/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/it/configure.ini b/src/locale/it/configure.ini
index bd17e7756..2004095e2 100755
--- a/src/locale/it/configure.ini
+++ b/src/locale/it/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/ja/configure.ini b/src/locale/ja/configure.ini
index e17739e95..c07c80463 100755
--- a/src/locale/ja/configure.ini
+++ b/src/locale/ja/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/kn/configure.ini b/src/locale/kn/configure.ini
index 0fba115e2..66abefcb7 100755
--- a/src/locale/kn/configure.ini
+++ b/src/locale/kn/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/ko/configure.ini b/src/locale/ko/configure.ini
index ba58e6672..1734b5a6b 100755
--- a/src/locale/ko/configure.ini
+++ b/src/locale/ko/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/nl/configure.ini b/src/locale/nl/configure.ini
index f7656da22..bf4a694da 100644
--- a/src/locale/nl/configure.ini
+++ b/src/locale/nl/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/pl/configure.ini b/src/locale/pl/configure.ini
index b9bfaf355..42144e947 100755
--- a/src/locale/pl/configure.ini
+++ b/src/locale/pl/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/pt/configure.ini b/src/locale/pt/configure.ini
index 234c4de17..a703975b6 100755
--- a/src/locale/pt/configure.ini
+++ b/src/locale/pt/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/ru/configure.ini b/src/locale/ru/configure.ini
index c42d70bf5..345a881a0 100755
--- a/src/locale/ru/configure.ini
+++ b/src/locale/ru/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/te/configure.ini b/src/locale/te/configure.ini
index 9334339f7..f7f7d2c51 100644
--- a/src/locale/te/configure.ini
+++ b/src/locale/te/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/th/configure.ini b/src/locale/th/configure.ini
index b25150cce..bb94f8fbb 100755
--- a/src/locale/th/configure.ini
+++ b/src/locale/th/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/tr/configure.ini b/src/locale/tr/configure.ini
index 322d8ac58..7488d07d0 100755
--- a/src/locale/tr/configure.ini
+++ b/src/locale/tr/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/vi_VN/configure.ini b/src/locale/vi_VN/configure.ini
index c71f56873..5e46a5366 100755
--- a/src/locale/vi_VN/configure.ini
+++ b/src/locale/vi_VN/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
diff --git a/src/locale/zh_CN/configure.ini b/src/locale/zh_CN/configure.ini
index de9e9d386..1bf771e54 100755
--- a/src/locale/zh_CN/configure.ini
+++ b/src/locale/zh_CN/configure.ini
@@ -866,7 +866,6 @@ manageaccount_element_username = ""
 manageaccount_element_firstname = ""
 manageaccount_element_lastname = ""
 manageaccount_element_email = ""
-manageaccount_element_enable_store = ""
 manageaccount_element_is_bot = ""
 manageaccount_element_bot_unique_token = ""
 manageaccount_element_bot_callback_url = ""
ViewGit