Modify search api example for case where ZipArchive doesnt exist, a=chris

Chris Pollett [2013-04-06 19:Apr:th]

Modify search api example for case where ZipArchive doesnt exist, a=chris

Filename
bin/arc_tool.php
bin/fetcher.php
bin/news_updater.php
configs/config.php
controllers/admin_controller.php
controllers/fetch_controller.php
controllers/search_controller.php
examples/search_api.php
index.php
lib/archive_bundle_iterators/database_bundle_iterator.php
lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
lib/archive_bundle_iterators/text_archive_bundle_iterator.php
lib/archive_bundle_iterators/warc_archive_bundle_iterator.php
lib/crawl_daemon.php
lib/index_bundle_iterators/group_iterator.php
lib/index_bundle_iterators/network_iterator.php
lib/index_dictionary.php
lib/indexing_plugins/recipe_plugin.php
lib/page_rule_parser.php
lib/phrase_parser.php
lib/processors/rss_processor.php
lib/utility.php
locale/en-US/pages/bot.thtml
locale/fr-FR/configure.ini
models/crawl_model.php
models/locale_model.php
models/machine_model.php
models/model.php
models/phrase_model.php
models/profile_model.php
models/source_model.php
models/user_model.php
scripts/suggest.js
views/elements/configure_element.php
views/elements/pageoptions_element.php
views/machinestatus_view.php

diff --git a/bin/arc_tool.php b/bin/arc_tool.php
index 1e913649e..bbcc5a564 100755
--- a/bin/arc_tool.php
+++ b/bin/arc_tool.php
@@ -814,7 +814,7 @@ class ArcTool implements CrawlConstants
      * @param string $archive_name name or path to what was supposed to be
      *      an archive
      */
-    function badFormatMessageAndExit($archive_name,
+    function badFormatMessageAndExit($archive_name,
         $allowed_archives = "web or index")
     {
         echo <<< EOD
@@ -858,14 +858,14 @@ php arc_tool.php mergetiers bundle_name max_tier
 php arc_tool.php posting bundle_name generation offset
     or
 php arc_tool.php posting bundle_name generation offset num
-    /* returns info about the posting (num many postings) in bundle_name at
+    /* returns info about the posting (num many postings) in bundle_name at
        the given generation and offset */

 php arc_tool.php reindex bundle_name
     // reindex the word dictionary in bundle_name

 php arc_tool.php show bundle_name start num
-    /* outputs items start through num from bundle_name or name of
+    /* outputs items start through num from bundle_name or name of
        non-Yioop archive crawl folder */


@@ -876,4 +876,4 @@ EOD;

 $arc_tool =  new ArcTool();
 $arc_tool->start();
-?>
+?>
\ No newline at end of file
diff --git a/bin/fetcher.php b/bin/fetcher.php
index 04a63907e..93a309f36 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -503,7 +503,7 @@ class Fetcher implements CrawlConstants
                     $this->to_crawl = array();
                 }
             } else if ($this->crawl_type == self::ARCHIVE_CRAWL &&
-                    $this->arc_type != "WebArchiveBundle" &&
+                    $this->arc_type != "WebArchiveBundle" &&
                     $this->arc_type != "") { /* case(2) */
                 // An archive crawl with data coming from the name server.
                 crawlLog("MAIN LOOP CASE 2 -- ARCHIVE SCHEDULER (NOT RECRAWL)");
@@ -605,7 +605,7 @@ class Fetcher implements CrawlConstants
             crawlLog("Number of summarized pages ".
                 count($summarized_site_pages));

-            $force_send = (isset($info[self::END_ITERATOR]) &&
+            $force_send = (isset($info[self::END_ITERATOR]) &&
                 $info[self::END_ITERATOR]) ? true : false;
             $this->updateFoundSites($summarized_site_pages, $force_send);

@@ -905,7 +905,7 @@ class Fetcher implements CrawlConstants
                 } else {
                     $update_num = SEEN_URLS_BEFORE_UPDATE_SCHEDULER;
                     crawlLog("Fetch on crawl {$this->crawl_time} was not ".
-                        "halted properly.");
+                        "halted properly.");
                     crawlLog("  Dumping $update_num from old fetch ".
                         "to try to make a clean re-start.");
                     $count = count($this->to_crawl);
@@ -917,7 +917,7 @@ class Fetcher implements CrawlConstants
                     }
                 }
             }
-            if(general_is_a($this->arc_type."Iterator",
+            if(general_is_a($this->arc_type."Iterator",
                     "TextArchiveBundleIterator")) {
                 $result_dir = WORK_DIRECTORY . "/schedules/" .
                     $prefix.self::fetch_archive_iterator.$this->crawl_time;
@@ -1034,7 +1034,7 @@ class Fetcher implements CrawlConstants
         $this->selectCurrentServerAndUpdateIfNeeded(false);

         $chunk = false;
-        if(general_is_a($this->arc_type."Iterator",
+        if(general_is_a($this->arc_type."Iterator",
             "TextArchiveBundleIterator")) {
             $archive_iterator = $this->archive_iterator;
             $chunk = true;
@@ -1043,10 +1043,10 @@ class Fetcher implements CrawlConstants
                 TextArchiveBundleIterator::MAX_RECORD_SIZE;
             if($archive_iterator->buffer_fh && $archive_iterator->current_offset
                 < $max_offset) {
-                crawlLog("Local Iterator Offset: ".
+                crawlLog("Local Iterator Offset: ".
                     $archive_iterator->current_offset);
                 crawlLog("Local Max Offset: ". $max_offset);
-                $info[self::ARC_DATA] =
+                $info[self::ARC_DATA] =
                     $archive_iterator->nextPages(ARCHIVE_BATCH_SIZE);
                 crawlLog("Time to get archive data from local buffer ".
                     changeInMicrotime($start_time));
@@ -1097,8 +1097,8 @@ class Fetcher implements CrawlConstants
                     if($pages[self::ARC_DATA]) {
                         $archive_iterator->makeBuffer($pages[self::ARC_DATA]);
                     }
-                    if(isset($pages[self::HEADER]) &&
-                        is_array($pages[self::HEADER]) &&
+                    if(isset($pages[self::HEADER]) &&
+                        is_array($pages[self::HEADER]) &&
                         $pages[self::HEADER] != array()) {
                         $archive_iterator->header = $pages[self::HEADER];
                     }
@@ -1189,7 +1189,7 @@ class Fetcher implements CrawlConstants
             'crawl_order', self::CACHE_PAGES => 'cache_pages',
             self::INDEXED_FILE_TYPES => 'indexed_file_types',
             self::RESTRICT_SITES_BY_URL => 'restrict_sites_by_url',
-            self::ALLOWED_SITES => 'allowed_sites',
+            self::ALLOWED_SITES => 'allowed_sites',
             self::DISALLOWED_SITES => 'disallowed_sites');
         foreach($update_fields as $info_field => $field) {
             if(isset($info[$info_field])) {
@@ -1200,7 +1200,7 @@ class Fetcher implements CrawlConstants
         if(isset($info[self::PAGE_RULES]) ){
             $rule_string = implode("\n", $info[self::PAGE_RULES]);
             $rule_string = html_entity_decode($rule_string, ENT_QUOTES);
-            $this->page_rule_parser =
+            $this->page_rule_parser =
                 new PageRuleParser($rule_string);
         }
         if(isset($info[self::VIDEO_SOURCES])) {
@@ -1609,7 +1609,7 @@ class Fetcher implements CrawlConstants
      * This method attempts to cull from the doc_info struct the
      * best MAX_LINKS_PER_PAGE. Currently, this is done by first removing
      * links which of filetype or sites the crawler is forbidden from crawl.
-     * Then a crude estimate of the informaation contained in the links test:
+     * Then a crude estimate of the informaation contained in the links test:
      * strlen(gzip(text)) is used to extract the best remaining links.
      *
      * @param array &$doc_info a string with a CrawlConstants::LINKS subarray
@@ -1829,7 +1829,7 @@ class Fetcher implements CrawlConstants
             crawlLog($site_index.". $subdoc_info ".$site[self::URL]);

         } // end for
-        if($force_send || ($this->crawl_type == self::WEB_CRAWL &&
+        if($force_send || ($this->crawl_type == self::WEB_CRAWL &&
             count($this->to_crawl) <= 0 && count($this->to_crawl_again) <= 0) ||
                 (isset($this->found_sites[self::SEEN_URLS]) &&
                 count($this->found_sites[self::SEEN_URLS]) >
@@ -2174,7 +2174,7 @@ class Fetcher implements CrawlConstants
                     crawlLog("Trouble sending to the scheduler, response was:");
                     crawlLog("$info_string");
                     $info = unserialize($info_string);
-                    if(isset($info[self::STATUS]) &&
+                    if(isset($info[self::STATUS]) &&
                         $info[self::STATUS] == self::REDO_STATE) {
                         crawlLog("Server requested last item to be re-sent...");
                         if(isset($info[self::SUMMARY])) {
@@ -2422,4 +2422,4 @@ $fetcher =  new Fetcher($PAGE_PROCESSORS, NAME_SERVER,
     PAGE_RANGE_REQUEST, $INDEXED_FILE_TYPES);
 $fetcher->start();

-?>
+?>
\ No newline at end of file
diff --git a/bin/news_updater.php b/bin/news_updater.php
index bb25982df..593d28dc7 100644
--- a/bin/news_updater.php
+++ b/bin/news_updater.php
@@ -88,6 +88,20 @@ require_once BASE_DIR."/controllers/search_controller.php";
 mb_internal_encoding("UTF-8");
 mb_regex_encoding("UTF-8");

+if (function_exists('lcfirst') === false) {
+    /**
+     *  Lower cases the first letter in a string
+     *
+     *  This function is only defined if the PHP version is before 5.3
+     *  @param string $str  string to be lower cased
+     *  @return string the lower cased string
+     */
+    function lcfirst( $str )
+    {
+        return (string)(strtolower(substr($str, 0, 1)).substr($str, 1));
+    }
+}
+
 /**
  *  Separate process/command-line script which can be used to update
  *  news sources for Yioop. This is as an alternative to using the web app
@@ -111,8 +125,8 @@ class NewsUpdater implements CrawlConstants
     }

     /**
-     *  This is the function that should be called to get the newsupdater to
-     *  start to start updating. Calls init to handle the command-line
+     *  This is the function that should be called to get the newsupdater to
+     *  start to start updating. Calls init to handle the command-line
      *  arguments then enters news_updaters main loop
      */
     function start()
@@ -169,4 +183,4 @@ class NewsUpdater implements CrawlConstants
 $news_updater =  new NewsUpdater();
 $news_updater->start();

-?>
+?>
\ No newline at end of file
diff --git a/configs/config.php b/configs/config.php
index c060656c4..5ee27d755 100644
--- a/configs/config.php
+++ b/configs/config.php
@@ -79,7 +79,7 @@ if(MAINTENANCE_MODE && $_SERVER["SERVER_ADDR"] != $_SERVER["REMOTE_ADDR"]) {
 }

 if(!defined('WORK_DIRECTORY')) {
-/*+++ The next block of code is machine edited, change at
+/*+++ The next block of code is machine edited, change at
 your own risk, please use configure web page instead +++*/
 define('WORK_DIRECTORY', '');
 /*++++++*/
@@ -504,4 +504,4 @@ define ('NUM_RESULTS_PER_PAGE', 10);

 /** Number of recently crawled urls to display on admin screen */
 define ('NUM_RECENT_URLS_TO_DISPLAY', 10);
-?>
+?>
\ No newline at end of file
diff --git a/controllers/admin_controller.php b/controllers/admin_controller.php
index 2e4935c8e..9d4e429d4 100755
--- a/controllers/admin_controller.php
+++ b/controllers/admin_controller.php
@@ -336,7 +336,7 @@ class AdminController extends Controller implements CrawlConstants
             "news_process" => tl('admin_controller_news_process'),
         );
         $profile =  $this->profileModel->getProfile(WORK_DIRECTORY);
-        $data['NEWS_MODE'] = isset($profile['NEWS_MODE']) ?
+        $data['NEWS_MODE'] = isset($profile['NEWS_MODE']) ?
             $profile['NEWS_MODE']: "";
         return $data;
     }
@@ -928,7 +928,7 @@ class AdminController extends Controller implements CrawlConstants
         $crawl_params[self::DISALLOWED_SITES] =
             isset($seed_info['disallowed_sites']['url']) ?
             $seed_info['disallowed_sites']['url'] : array();
-        $crawl_params[self::PAGE_RULES] =
+        $crawl_params[self::PAGE_RULES] =
             isset($seed_info['page_rules']['rule']) ?
             $seed_info['page_rules']['rule'] : array();

@@ -1178,7 +1178,7 @@ class AdminController extends Controller implements CrawlConstants
     }
     /**
      * Cleans a string consisting of lines, typically of urls into an array of
-     * clean lines. This is used in handling data from the crawl options
+     * clean lines. This is used in handling data from the crawl options
      * text areas.
      *
      * @param string $str contains the url data
@@ -1494,13 +1494,13 @@ class AdminController extends Controller implements CrawlConstants
             foreach($copy_options as $main_option => $sub_options) {
                 foreach($sub_options as $sub_option) {
                     if(isset($seed_loaded[$main_option][$sub_option])) {
-                        $seed_info[$main_option][$sub_option] =
+                        $seed_info[$main_option][$sub_option] =
                             $seed_loaded[$main_option][$sub_option];
                     }
                 }
             }
             if(isset($seed_loaded['page_rules'])) {
-                $seed_info['page_rules'] =
+                $seed_info['page_rules'] =
                     $seed_loaded['page_rules'];
             }
             $update_flag = true;
@@ -1570,7 +1570,7 @@ class AdminController extends Controller implements CrawlConstants
         if(!isset($_REQUEST['load_option'])) {
             $data = array_merge($data, $profile);
         } else {
-
+
             $this->updateProfileFields($data, $profile,
                 array('IP_LINK','CACHE_LINK', 'SIMILAR_LINK', 'IN_LINK',
                     'SIGNIN_LINK', 'SUBSEARCH_LINK','WORD_SUGGEST'));
@@ -1652,7 +1652,7 @@ class AdminController extends Controller implements CrawlConstants
             $data['SCRIPT'] .= "\nswitchTab('testoptionstab',".
                 "'crawltimetab', 'searchtimetab');\n";
         }
-
+
         $this->crawlModel->setSeedInfo($seed_info);
         if($change == true && $data['option_type'] != 'test_options') {
             $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".
@@ -1697,7 +1697,7 @@ class AdminController extends Controller implements CrawlConstants
             $site[self::OPERATING_SYSTEM] = "unknown";
             $site[self::LANG] = 'en';
             $site[self::JUST_METAS] = false;
-            if(isset($_REQUEST['page_type']) &&
+            if(isset($_REQUEST['page_type']) &&
                 in_array($_REQUEST['page_type'], $data['MIME_TYPES'])) {
                 $site[self::TYPE] = $_REQUEST['page_type'];
             }
@@ -1729,7 +1729,7 @@ class AdminController extends Controller implements CrawlConstants
                 print_r($after_process, true), "string"), 75, "\n", true);
             $rule_string = implode("\n", $seed_info['page_rules']['rule']);
             $rule_string = html_entity_decode($rule_string, ENT_QUOTES);
-            $page_rule_parser =
+            $page_rule_parser =
                 new PageRuleParser($rule_string);
             $page_rule_parser->executeRuleTrees($site);
             $after_process = array();
@@ -2073,9 +2073,9 @@ class AdminController extends Controller implements CrawlConstants
                     $data["ELEMENT"] = "machinelogElement";
                     $filter= "";
                     if(isset($_REQUEST['f'])) {
-                        $filter =
+                        $filter =
                             $this->clean($_REQUEST['f'], "string");
-                    }
+                    }
                     $data['filter'] = $filter;
                     $data["REFRESH_LOG"] = "&time=". $data["time"];
                     $data["LOG_TYPE"] = "";
@@ -2097,7 +2097,7 @@ class AdminController extends Controller implements CrawlConstants
                         }
                         $data["LOG_FILE_DATA"] = $this->machineModel->getLog(
                             $r["name"], NULL, $filter);
-                        $data["REFRESH_LOG"] .=
+                        $data["REFRESH_LOG"] .=
                             "&arg=log&name=".$r['name'];
                     }
                     if($data["time"] >= 1200) {
@@ -2581,9 +2581,9 @@ class AdminController extends Controller implements CrawlConstants
         $data['SCRIPT'] = "";

         $data['PROFILE'] = false;
-        if(isset($_REQUEST['WORK_DIRECTORY']) || (defined('WORK_DIRECTORY') &&
+        if(isset($_REQUEST['WORK_DIRECTORY']) || (defined('WORK_DIRECTORY') &&
             defined('FIX_NAME_SERVER') && FIX_NAME_SERVER) ) {
-            if(defined('WORK_DIRECTORY') && defined('FIX_NAME_SERVER')
+            if(defined('WORK_DIRECTORY') && defined('FIX_NAME_SERVER')
                 && FIX_NAME_SERVER && !isset($_REQUEST['WORK_DIRECTORY'])) {
                 $_REQUEST['WORK_DIRECTORY'] = WORK_DIRECTORY;
                 $_REQUEST['arg'] = "directory";
@@ -2668,7 +2668,7 @@ class AdminController extends Controller implements CrawlConstants
                         $uri = UrlParser::getPath($_SERVER['REQUEST_URI']);
                         $http = (isset($_SERVER['HTTPS'])) ? "https://" :
                             "http://";
-                        $profile['NAME_SERVER'] =
+                        $profile['NAME_SERVER'] =
                             $http . $_SERVER['SERVER_NAME'] . $uri;
                         $data['NAME_SERVER'] = $profile['NAME_SERVER'];
                         $profile['AUTH_KEY'] = crawlHash(
@@ -2680,7 +2680,7 @@ class AdminController extends Controller implements CrawlConstants
                         $data['ROBOT_INSTANCE'] = $profile['ROBOT_INSTANCE'];
                         if($this->profileModel->updateProfile(
                             $data['WORK_DIRECTORY'], array(), $profile)) {
-                            if((defined('WORK_DIRECTORY') &&
+                            if((defined('WORK_DIRECTORY') &&
                                 $data['WORK_DIRECTORY'] == WORK_DIRECTORY) ||
                                 $this->profileModel->setWorkDirectoryConfigFile(
                                 $data['WORK_DIRECTORY'])) {
@@ -2743,7 +2743,7 @@ class AdminController extends Controller implements CrawlConstants
                 }
             break;
             case "profile":
-                $this->updateProfileFields($data, $profile,
+                $this->updateProfileFields($data, $profile,
                     array('USE_FILECACHE', 'USE_MEMCACHE', "WEB_ACCESS",
                         'RSS_ACCESS', 'API_ACCESS'));
                 $data['DEBUG_LEVEL'] = 0;
@@ -2873,7 +2873,7 @@ class AdminController extends Controller implements CrawlConstants
     setDisplay('advance-robot', {$data['advanced']});
     function toggleAdvance() {
         var advanced = elt('a-settings');
-        advanced.value = (advanced.value =='true')
+        advanced.value = (advanced.value =='true')
             ? 'false' : 'true';
         var value = (advanced.value == 'true') ? true : false;
         setDisplay('advance-configure', value);
@@ -2894,7 +2894,7 @@ EOD;
         $data['SCRIPT'] .=
             "elt('locale').onchange = ".
             "function () { elt('configureProfileForm').submit();};\n";
-
+
         return $data;
     }

@@ -2931,4 +2931,4 @@ EOD;
         }
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/controllers/fetch_controller.php b/controllers/fetch_controller.php
index 2bc8a8003..af45da888 100755
--- a/controllers/fetch_controller.php
+++ b/controllers/fetch_controller.php
@@ -238,7 +238,7 @@ class FetchController extends Controller implements CrawlConstants
             }
             $pages = false;
             if($archive_iterator && !$archive_iterator->end_of_iterator) {
-                if(general_is_a($archive_iterator,
+                if(general_is_a($archive_iterator,
                     "TextArchiveBundleIterator")) {
                     $pages = $archive_iterator->nextChunk();
                     $chunk = true;
@@ -576,4 +576,4 @@ class FetchController extends Controller implements CrawlConstants
         return $list;
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/controllers/search_controller.php b/controllers/search_controller.php
index aa8d0a5f3..54820450d 100755
--- a/controllers/search_controller.php
+++ b/controllers/search_controller.php
@@ -113,12 +113,12 @@ class SearchController extends Controller implements CrawlConstants
         if(!$format_info) { return;}
         list($view, $web_flag, $raw, $results_per_page, $limit) = $format_info;

-        list($query, $activity, $arg) =
+        list($query, $activity, $arg) =
             $this->initializeUserAndDefaultActivity($data);

         if($activity == "query" && $this->mirrorHandle()) {return; }

-        list($index_timestamp, $index_info, $save_timestamp) =
+        list($index_timestamp, $index_info, $save_timestamp) =
             $this->initializeIndexInfo($web_flag, $raw, $data);

         if(isset($_REQUEST['q']) && strlen($_REQUEST['q']) > 0
@@ -161,7 +161,7 @@ class SearchController extends Controller implements CrawlConstants
             exit();
         }

-        if($web_flag) {
+        if($web_flag) {
             $this->addSearchViewData($index_info, $no_query, $raw, $view,
                 $subsearches, $data);
         }
@@ -178,7 +178,7 @@ class SearchController extends Controller implements CrawlConstants
     /**
      *  Determines how this query is being run and return variables for the view
      *
-     *  A query might be run as a web-based where HTML is expected as the
+     *  A query might be run as a web-based where HTML is expected as the
      *  output, an RSS query, an API query, or as a serial query from a
      *  name_server or mirror instance back to one of the other queue servers
      *  in a Yioop installation. A query might also request different numbers
@@ -288,7 +288,7 @@ class SearchController extends Controller implements CrawlConstants
     function initializeUserAndDefaultActivity(&$data)
     {
         $arg = false;
-        if(!isset($_REQUEST['a']) || !in_array($_REQUEST['a'],
+        if(!isset($_REQUEST['a']) || !in_array($_REQUEST['a'],
             $this->activities)) {
             $activity = "query";
         } else {
@@ -345,12 +345,12 @@ class SearchController extends Controller implements CrawlConstants

     /**
      *  Determines which crawl or mix timestamp should be in use for this
-     *  query. It also determines info and returns associated with this
+     *  query. It also determines info and returns associated with this
      *  timestamp.
      *
      *  @param bool $web_flag whether this is a web based query or one from
      *      the search API
-     *  @param int  and so should validate against list of known crawls or an
+     *  @param int  and so should validate against list of known crawls or an
      *      internal (say network) query that doesn't require validation
      *      (faster without).
      *  @param array &$data that will eventually be sent to the view. We set
@@ -827,7 +827,7 @@ class SearchController extends Controller implements CrawlConstants
         }
         $time = time();
         $rss_feeds = $this->sourceModel->getMediaSources("rss");
-        if(!$rss_feeds || count($rss_feeds) == 0) {
+        if(!$rss_feeds || count($rss_feeds) == 0) {
             $data["LOG_MESSAGES"] =
                 "No news update as no news feeds.";
             return;
@@ -864,8 +864,8 @@ class SearchController extends Controller implements CrawlConstants
         /*  every 3 hours everything older than a week and rebuild index
             do this every four hours so news articles tend to stay in order
          */
-        if($delta > 3 * SourceModel::ONE_HOUR &&
-          $start_delta > SourceModel::ONE_HOUR/12 &&
+        if($delta > 3 * SourceModel::ONE_HOUR &&
+          $start_delta > SourceModel::ONE_HOUR/12 &&
           $lock_delta > SourceModel::TWO_MINUTES) {
             $this->cronModel->updateCronTime("news_lock");
             $this->cronModel->updateCronTime("news_start_delete", true);
@@ -881,10 +881,10 @@ class SearchController extends Controller implements CrawlConstants
         }
         $update_cron_time = $this->cronModel->getCronTime("news_update");
         $try_cron_time = $this->cronModel->getCronTime("news_try_again");
-
+
         $delta = $time - max($update_cron_time, $try_cron_time);
         // each 15 minutes try to re-get feeds that have no items
-        if((($delta > SourceModel::ONE_HOUR/4 &&
+        if((($delta > SourceModel::ONE_HOUR/4 &&
             $delta < SourceModel::ONE_HOUR) || $delta == 0) &&
             $lock_delta > SourceModel::TWO_MINUTES) {
             $this->cronModel->updateCronTime("news_lock");
@@ -895,7 +895,7 @@ class SearchController extends Controller implements CrawlConstants
             $this->cronModel->saveCronTable();
             return;
         }
-
+
         $delta = $time - $update_cron_time;
         // every hour get items from twenty feeds whose newest items are oldest
         if(($delta > SourceModel::ONE_HOUR || $delta == 0)
@@ -1233,7 +1233,7 @@ class SearchController extends Controller implements CrawlConstants
      *
      * @param string $url to get cached page for
      * @param array $ui_flags array of  ui features which
-     *      should be added to the cache page. For example, "highlight"
+     *      should be added to the cache page. For example, "highlight"
      *      would way search terms should be highlighted, "history"
      *      says add history navigation for all copies of this cache page in
      *      yioop system.
@@ -1259,7 +1259,7 @@ class SearchController extends Controller implements CrawlConstants
      *
      * @param string $url the url of the page to find the cached version of
      * @param array $ui_flags array of  ui features which
-     *      should be added to the cache page. For example, "highlight"
+     *      should be added to the cache page. For example, "highlight"
      *      would say search terms should be highlighted, "history"
      *      says add history navigation for all copies of this cache page in
      *      yioop system. "summaries" says add a toggle headers and extracted
@@ -1367,7 +1367,7 @@ class SearchController extends Controller implements CrawlConstants
         $this->crawlModel->index_name = $crawl_time;
         $crawl_item = $this->crawlModel->getCrawlItem($url, $queue_servers);
         // A crawl item is able to override the default UI_FLAGS
-        if(isset($crawl_item[self::UI_FLAGS]) &&
+        if(isset($crawl_item[self::UI_FLAGS]) &&
             is_string($crawl_item[self::UI_FLAGS])) {
             $ui_flags = explode(",", $crawl_item[self::UI_FLAGS]);
         }
@@ -1536,7 +1536,7 @@ class SearchController extends Controller implements CrawlConstants
      *      in Yioop system
      * @param string $terms from orginal query responsible for cache request
      * @param array $ui_flags array of  ui features which
-     *      should be added to the cache page. For example, "highlight"
+     *      should be added to the cache page. For example, "highlight"
      *      would way search terms should be highlighted, "history"
      *      says add history navigation for all copies of this cache page in
      *      yioop system.
@@ -1616,9 +1616,9 @@ class SearchController extends Controller implements CrawlConstants
         } else {
             $summary_toggle_node = $first_child;
         }
-        if(isset($cache_item[self::KEYWORD_LINKS]) &&
+        if(isset($cache_item[self::KEYWORD_LINKS]) &&
             count($cache_item[self::KEYWORD_LINKS]) > 0) {
-            $keyword_node = $this->createDomBoxNode($dom, $text_align,
+            $keyword_node = $this->createDomBoxNode($dom, $text_align,
                 "zIndex: 1");
             $text_node = $dom->createTextNode("Z@key_links@Z");
             $keyword_node->appendChild($text_node);
@@ -1631,7 +1631,7 @@ class SearchController extends Controller implements CrawlConstants
         }

         if(in_array("version", $ui_flags)) {
-            $version_node =
+            $version_node =
                 $this->createDomBoxNode($dom, $text_align, "zIndex: 1");
             $textNode = $dom->createTextNode(
                 tl('search_controller_cached_version', "Z@url@Z", $date));
@@ -1646,7 +1646,7 @@ class SearchController extends Controller implements CrawlConstants

         //UI for showing history
         if(in_array("history", $ui_flags)) {
-            $history_node = $this->historyUI($crawl_time, $all_crawl_times,
+            $history_node = $this->historyUI($crawl_time, $all_crawl_times,
                 $version_node, $dom, $terms, $hist_ui_open, $url);
         } else {
             $history_node = $dom->createElement('div');
@@ -1714,7 +1714,7 @@ class SearchController extends Controller implements CrawlConstants
     }

     /**
-     *  Creates the toggle link and hidden div for extracted header and
+     *  Creates the toggle link and hidden div for extracted header and
      *  summary element on cache pages
      *
      * @param DOMDocument $dom used to create new nodes to add to body object
@@ -1729,13 +1729,13 @@ class SearchController extends Controller implements CrawlConstants
         $summary_string, $cache_item)
     {
         $first_child = $body->firstChild;
-        $summaryNode = $this->createDomBoxNode($dom, $text_align,
+        $summaryNode = $this->createDomBoxNode($dom, $text_align,
             "display:none;", 'pre');
         $summaryNode->setAttributeNS("","id", "summary-page-id");
         $summaryNode = $body->insertBefore($summaryNode, $first_child);

         if(isset($cache_item[self::HEADER])) {
-            $summary_string = $cache_item[self::HEADER]."\n".
+            $summary_string = $cache_item[self::HEADER]."\n".
                 $summary_string;
         }
         $textNode = $dom->createTextNode($summary_string);
@@ -1766,7 +1766,7 @@ class SearchController extends Controller implements CrawlConstants
     }

     /**
-     * Creates a bordered tag (usually div) in which to put meta content on a
+     * Creates a bordered tag (usually div) in which to put meta content on a
      * page when it is displayed
      *
      * @param DOMDocument $dom representing cache page
@@ -2110,4 +2110,4 @@ class SearchController extends Controller implements CrawlConstants
         $node->appendChild($script);
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/examples/search_api.php b/examples/search_api.php
index fdae973b8..14cdd8113 100644
--- a/examples/search_api.php
+++ b/examples/search_api.php
@@ -64,20 +64,27 @@ if(!PROFILE) {
  * but a crawl into the WORK_DIRECTORY and that would be used to make the
  * query.
  */
-if(!file_exists(BASE_DIR."/examples/Archive1317414322.zip") ||
-   !file_exists(BASE_DIR."/examples/IndexData1317414322.zip")) {
+$archive = BASE_DIR."/examples/Archive1317414322.zip";
+$index_archive = BASE_DIR."/examples/IndexData1317414322.zip";
+$extract_folder = CRAWL_DIR."/cache";
+if(!file_exists($archive) ||
+   !file_exists($index_archive)) {
    echo "\nSearch API test index doesn't exist, so can't run demo\n\n";
    exit();
 }

-$zip = new ZipArchive();
-$zipH = $zip->open("Archive1317414322.zip");
-$zip->extractTo(CRAWL_DIR."/cache");
-$zip->close();
-$zipH = $zip->open("IndexData1317414322.zip");
-$zip->extractTo(CRAWL_DIR."/cache");
-$zip->close();
-
+if(class_exists("ZipArchive")) {
+    $zip = new ZipArchive();
+    $zipH = $zip->open($archive);
+    $zip->extractTo($extract_folder);
+    $zip->close();
+    $zipH = $zip->open($index_archive);
+    $zip->extractTo($extract_folder);
+    $zip->close();
+} else {
+    exec("unzip $archive -d $extract_folder");
+    exec("unzip $index_archive -d $extract_folder");
+}

 /**
  * The next block of code till +++++ is needed only if you want
diff --git a/index.php b/index.php
index 269fc785d..71b2133d0 100755
--- a/index.php
+++ b/index.php
@@ -120,7 +120,9 @@ if (function_exists('lcfirst') === false) {
      *  @return string the lower cased string
      */
     function lcfirst( $str )
-    { return (string)(strtolower(substr($str,0,1)).substr($str,1));}
+    {
+        return (string)(strtolower(substr($str, 0, 1)).substr($str, 1));
+    }
 }

 $available_controllers = array( "admin", "archive",  "cache", "crawl",
diff --git a/lib/archive_bundle_iterators/database_bundle_iterator.php b/lib/archive_bundle_iterators/database_bundle_iterator.php
index d3fefebe7..b3ea0ee55 100644
--- a/lib/archive_bundle_iterators/database_bundle_iterator.php
+++ b/lib/archive_bundle_iterators/database_bundle_iterator.php
@@ -121,7 +121,7 @@ class DatabaseBundleIterator extends ArchiveBundleIterator
         $ini = parse_ini_file("{$this->iterate_dir}/arc_description.ini");

         $this->dbinfo = array("DBMS" => DBMS, "DB_HOST" => DB_HOST,
-            "DB_NAME" => DB_NAME, "DB_USER" => DB_USER,
+            "DB_NAME" => DB_NAME, "DB_USER" => DB_USER,
             "DB_PASSWORD" => DB_PASSWORD);

         foreach($this->dbinfo as $key => $value) {
@@ -273,7 +273,7 @@ class DatabaseBundleIterator extends ArchiveBundleIterator
     /**
      * Restores  the internal state from the file iterate_status.txt in the
      * result dir such that the next call to nextPages will pick up from just
-     * after the last checkpoint.
+     * after the last checkpoint.
      *
      * @return array the data serialized when saveCheckpoint was called

@@ -287,4 +287,4 @@ class DatabaseBundleIterator extends ArchiveBundleIterator
         return $info;
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php b/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
index 635af5193..910b92ed2 100644
--- a/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
+++ b/lib/archive_bundle_iterators/mediawiki_bundle_iterator.php
@@ -223,7 +223,7 @@ class MediaWikiArchiveBundleIterator extends TextArchiveBundleIterator
             array("/{{Redirect2?\|([^{}\|]+)\|([^{}\|]+)\|([^{}\|]+)}}/i",
                 "<div class='indent'>\"$1\". ($2 &rarr;<a href=\"".
                 $base_address."$3\">$3</a>)</div>"),
-            array("/{{Redirect\|([^{}\|]+)}}/i",
+            array("/{{Redirect\|([^{}\|]+)}}/i",
                 "<div class='indent'>\"$1\". (<a href=\"".
                 $base_address. "$1_(disambiguation)\">$1???</a>)</div>"),
             array("/#REDIRECT:\s+\[\[(.+?)\]\]/",
@@ -234,7 +234,7 @@ class MediaWikiArchiveBundleIterator extends TextArchiveBundleIterator
                 "<a href=\"{$base_address}$1\">$1</a>"),
             array("/\[(http[^\s\]]+)\s+(.+?)\]/s",
                 "[<a href=\"$1\">$2</a>]"),
-            array("/\[(http[^\]\s]+)\s*\]/","(<a href=\"$1\">&rarr;</a>)"),
+            array("/\[(http[^\]\s]+)\s*\]/","(<a href=\"$1\">&rarr;</a>)"),
             array("/'''''(.+?)'''''/s", "<b><i>$1</i></b>"),
             array("/'''(.+?)'''/s", "<b>$1</b>"),
             array("/''(.+?)''/s", "<i>$1</i>"),
@@ -380,7 +380,7 @@ class MediaWikiArchiveBundleIterator extends TextArchiveBundleIterator
         $pre_page = preg_replace_callback('/(\A|\n){\|(.*?)\n\|}/s',
             "makeTableCallback", $pre_page);
         $pre_page = preg_replace($this->matches, $this->replaces,$pre_page);
-        $pre_page = preg_replace("/{{Other uses}}/i",
+        $pre_page = preg_replace("/{{Other uses}}/i",
                 "<div class='indent'>\"$1\". (<a href='".
                 $site[self::URL]. "_(disambiguation)'>$pre_url</a>)</div>",
                 $pre_page);
@@ -459,7 +459,7 @@ class MediaWikiArchiveBundleIterator extends TextArchiveBundleIterator
                             if(in_array($field, $wiki_fields)) {
                                 $value = preg_replace($this->matches,
                                     $this->replaces, $value);
-                                $value = strip_tags($value,
+                                $value = strip_tags($value,
                                     '<a><b><i><span><img>');
                             }
                             $ref_data[$field] = $value;
@@ -685,4 +685,4 @@ function fixLinksCallback($matches)
     return $out;
 }

-?>
+?>
\ No newline at end of file
diff --git a/lib/archive_bundle_iterators/text_archive_bundle_iterator.php b/lib/archive_bundle_iterators/text_archive_bundle_iterator.php
index a7d749572..405231a81 100644
--- a/lib/archive_bundle_iterators/text_archive_bundle_iterator.php
+++ b/lib/archive_bundle_iterators/text_archive_bundle_iterator.php
@@ -166,7 +166,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
      * How many bytes at a time should be read from the current archive
      * file into the buffer file. 8192 = BZip2BlockIteraror::BlOCK_SIZE
      */
-    const BUFFER_SIZE = 16384000;
+    const BUFFER_SIZE = 16384000;

     /**
      *  Estimate of the maximum size of a record stored in a text archive
@@ -218,7 +218,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
             exit();
         }
         if($this->iterate_dir != false) {
-            foreach(glob("{$this->iterate_dir}/*.$extension", GLOB_BRACE)
+            foreach(glob("{$this->iterate_dir}/*.$extension", GLOB_BRACE)
                 as $filename) {
                 $this->partitions[] = $filename;
             }
@@ -405,7 +405,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
      * @param bool $no_process if true then just return page string found
      *      not any additional meta data.
      * @return mixed associative array for doc or just string of doc
-     *
+     *
      */
     function nextPage($no_process = false)
     {
@@ -414,7 +414,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
         while((preg_match($this->delimiter, $this->buffer, $matches,
             PREG_OFFSET_CAPTURE)) != 1) {
             $block = $this->getFileBlock();
-            if(!$block ||
+            if(!$block ||
                 !$this->checkFileHandle() || $this->checkEof()) {
                 return NULL;
             }
@@ -471,7 +471,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
     }

     /**
-     * Acts as gzread($num_bytes, $archive_file), hiding the fact that
+     * Acts as gzread($num_bytes, $archive_file), hiding the fact that
      * buffering of the archive_file is being done to a buffer file
      *
      * @param int $num_bytes to read from archive file
@@ -489,7 +489,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
     }

     /**
-     * Acts as gzgets(), hiding the fact that
+     * Acts as gzgets(), hiding the fact that
      * buffering of the archive_file is being done to a buffer file
      *
      * @return string from archive file up to next line ending or eof
@@ -540,7 +540,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
             if($this->compression == "plain") {
                 $success = fseek($this->fh, $seek_pos);
             }
-            if($success == -1 || !$this->checkFileHandle()
+            if($success == -1 || !$this->checkFileHandle()
                 || $this->checkEof()) { return false; }
             if(is_resource($this->buffer_fh)) {
                 fclose($this->buffer_fh);
@@ -551,7 +551,7 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
                 case 'bzip2':
                     $buffer = "";
                     while(strlen($buffer) < $padded_buffer_size) {
-                        while(!is_string($block =
+                        while(!is_string($block =
                             $this->bz2_iterator->nextBlock())) {
                             if($this->bz2_iterator->eof()) {
                                 break 2;
@@ -814,4 +814,4 @@ class TextArchiveBundleIterator extends ArchiveBundleIterator
         return array($tag_info, $tag);
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/lib/archive_bundle_iterators/warc_archive_bundle_iterator.php b/lib/archive_bundle_iterators/warc_archive_bundle_iterator.php
index f6fb46051..b27aba6ed 100644
--- a/lib/archive_bundle_iterators/warc_archive_bundle_iterator.php
+++ b/lib/archive_bundle_iterators/warc_archive_bundle_iterator.php
@@ -41,7 +41,7 @@ require_once BASE_DIR.

 /**
  * Used to iterate through the records of a collection of warc files stored in
- * a WebArchiveBundle folder. Warc is the newer file format of the
+ * a WebArchiveBundle folder. Warc is the newer file format of the
  * Internet Archive and other for digital preservation:
  * http://www.digitalpreservation.gov/formats/fdd/fdd000236.shtml
  * http://archive-access.sourceforge.net/warc/
@@ -101,8 +101,8 @@ class WarcArchiveBundleIterator extends TextArchiveBundleIterator
         } while(!in_array($page_info['warc-type'], $indexable_records) ||
             substr($page_info[self::URL], 0, 4) == 'dns:');
                 //ignore warcinfo, request, metadata, revisit, etc. records
-        if($no_process) {
-            return $header_and_page;
+        if($no_process) {
+            return $header_and_page;
         }
         unset($page_info['line']);
         unset($page_info['warc-type']);
@@ -136,7 +136,7 @@ class WarcArchiveBundleIterator extends TextArchiveBundleIterator
     function getWarcHeaders()
     {
         $warc_headers = array();
-        $warc_fields = array( 'warc-type' => 'warc-type',
+        $warc_fields = array( 'warc-type' => 'warc-type',
             'warc-target-uri' => self::URL, 'warc-date' => self::TIMESTAMP,
             'warc-ip-address' => self::IP_ADDRESSES,
             'content-length' => self::SIZE, 'warc-record-id' => self::WARC_ID,
@@ -166,4 +166,4 @@ class WarcArchiveBundleIterator extends TextArchiveBundleIterator
         return $warc_headers;
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/lib/crawl_daemon.php b/lib/crawl_daemon.php
index d595ebb57..95c169414 100644
--- a/lib/crawl_daemon.php
+++ b/lib/crawl_daemon.php
@@ -153,7 +153,7 @@ class CrawlDaemon implements CrawlConstants
                 for($i = 3; $i < count($argv); $i++) {
                     $options .= " ".$argv[$i];
                 }
-                $subname = (!isset($argv[2]) || $argv[2] == 'none') ?
+                $subname = (!isset($argv[2]) || $argv[2] == 'none') ?
                     'none' :self::$subname;
                 $name_prefix = (isset($argv[3])) ? $argv[3] : self::$subname;
                 $name_string = CrawlDaemon::getNameString($name,$name_prefix);
@@ -351,4 +351,4 @@ class CrawlDaemon implements CrawlConstants
     }

 }
- ?>
+ ?>
\ No newline at end of file
diff --git a/lib/index_bundle_iterators/group_iterator.php b/lib/index_bundle_iterators/group_iterator.php
index def778f8b..f4b0e582b 100644
--- a/lib/index_bundle_iterators/group_iterator.php
+++ b/lib/index_bundle_iterators/group_iterator.php
@@ -524,7 +524,7 @@ class GroupIterator extends IndexBundleIterator
                 $max = ($max < $current_rank ) ? $current_rank : $max;
                 $alpha = $relevance_boost * $domain_weights[$hash_host];
                 $sum_score += $alpha * $hash_page[self::DOC_RANK];
-
+
                 $sum_rank += $alpha * $hash_page[self::DOC_RANK];
                 $sum_relevance += $alpha * $hash_page[self::RELEVANCE];
                 $max_proximity = max($max_proximity,
@@ -599,4 +599,4 @@ class GroupIterator extends IndexBundleIterator
     }

 }
-?>
+?>
\ No newline at end of file
diff --git a/lib/index_bundle_iterators/network_iterator.php b/lib/index_bundle_iterators/network_iterator.php
index 2686d8137..36ea11604 100644
--- a/lib/index_bundle_iterators/network_iterator.php
+++ b/lib/index_bundle_iterators/network_iterator.php
@@ -119,7 +119,7 @@ class NetworkIterator extends IndexBundleIterator
      *      archive bundles that we look in for results
      * @param array $filter an array of hashes of domains to filter from
      *      results
-     * @param string $save_timestamp if this timestamp is nonzero, then when
+     * @param string $save_timestamp if this timestamp is nonzero, then when
      *      making queries to separate machines the save_timestamp is sent so
      *      the queries on those machine can make savepoints. Note the
      *      format of save_timestamp is timestamp-query_part where query_part
@@ -317,7 +317,7 @@ class NetworkIterator extends IndexBundleIterator
             if(!isset($sites[$index])) {
                 $sites[$index] = array();
             }
-            $tmp = urlencode(print_r($sites[$index],
+            $tmp = urlencode(print_r($sites[$index],
                 true));
             $title = 'URL not set';
             if(trim($tmp) == "") {
@@ -353,4 +353,4 @@ class NetworkIterator extends IndexBundleIterator
     }

 }
- ?>
+ ?>
\ No newline at end of file
diff --git a/lib/index_dictionary.php b/lib/index_dictionary.php
index 70187feef..5b3191b15 100644
--- a/lib/index_dictionary.php
+++ b/lib/index_dictionary.php
@@ -726,7 +726,7 @@ class IndexDictionary implements CrawlConstants
             $ws = substr($word_string, $word_key_len);
             if($extract) {
                 $tmp = IndexShard::getWordInfoFromString($ws, true);
-                if($tmp[3] < $max_entry_count &&
+                if($tmp[3] < $max_entry_count &&
                     $previous_generation != $tmp[0]) {
                     array_unshift($info, $tmp);
                     $previous_generation = $tmp[0];
@@ -841,4 +841,4 @@ class IndexDictionary implements CrawlConstants


 }
- ?>
+ ?>
\ No newline at end of file
diff --git a/lib/indexing_plugins/recipe_plugin.php b/lib/indexing_plugins/recipe_plugin.php
index c93b800a5..fab9d02ba 100644
--- a/lib/indexing_plugins/recipe_plugin.php
+++ b/lib/indexing_plugins/recipe_plugin.php
@@ -116,7 +116,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants

         $xpath = new DOMXPath($dom);
         $recipes_per_page = $xpath->evaluate(
-            /*allr, f.com, brec, fnet*/
+            /*allr, f.com, brec, fnet*/
             "/html//ul[@class = 'ingredient-wrap'] |
             /html//*[@class = 'pod ingredients'] |
             /html//*[@id='recipe_title'] |
@@ -127,7 +127,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
         if(is_object($recipes_per_page) && $recipes_per_page->length != 0) {
             $recipes_count = $recipes_per_page->length;
             $titles = $xpath->evaluate(
-               /* allr, f.com, brec, fnet   */
+               /* allr, f.com, brec, fnet   */
                "/html//*[@id = 'itemTitle']|
                /html//h1[@class = 'fn'] |
                /html//*[@id='recipe_title'] |
@@ -197,7 +197,7 @@ class RecipePlugin extends IndexingPlugin implements CrawlConstants
         while($more_docs) {
             $results = @$search_controller->queryRequest($query,
                 $num, $limit, 1, $index_name);
-            if(isset($results["PAGES"]) &&
+            if(isset($results["PAGES"]) &&
                 ($num_results = count($results["PAGES"])) > 0 ) {
                 $raw_recipes = array_merge($raw_recipes, $results["PAGES"]);
             }
@@ -675,9 +675,9 @@ class Tree
                     }
                 }
             }
-        $i++;
+            $i++;
         }
-    return $cluster;
+        return $cluster;
     }

    /**
diff --git a/lib/page_rule_parser.php b/lib/page_rule_parser.php
index bb038e1f6..589144be1 100644
--- a/lib/page_rule_parser.php
+++ b/lib/page_rule_parser.php
@@ -47,7 +47,7 @@ require_once BASE_DIR."/lib/crawl_constants.php";
  * and does a function call to manipulate that page. Right now the supported
  * commands are to unset that field value, to add the field and field value to
  * the META_WORD array for the page and to split the field on comma, view this
- * as a search keywords => link text association, and add this the
+ * as a search keywords => link text association, and add this the
  * KEYWORD_LINKS array.
  * These have the syntax:
  * unset(field)
@@ -109,7 +109,7 @@ class PageRuleParser implements CrawlConstants
         $end = '(?:\n|\Z)';
         $substitution = '(/[^/\n]+/)([^/\n]*)/';
         $command = '(\w+)\((\w+)\)';
-        $rule =
+        $rule =
             "@(?:$command$blank*($comment)?$end".
             "|$blank*($literal)$blank*($assignment)$blank*".
             "((".$quote_string.")|($literal)|($substitution))".
@@ -117,7 +117,7 @@ class PageRuleParser implements CrawlConstants
         $matches = array();
         preg_match_all($rule, $page_rules, $matches);
         $rule_trees = array();
-        if(!isset($matches[0]) ||
+        if(!isset($matches[0]) ||
             ($num_rules = count($matches[0])) == 0) { return $rule_trees; }
         for($i = 0; $i < $num_rules; $i++) {
             $tree = array();
@@ -177,7 +177,7 @@ class PageRuleParser implements CrawlConstants
     function executeFunctionRule($tree, &$page_data)
     {
         $allowed_functions = array("unset" => "unsetVariable",
-            "addMetaWord" => "addMetaWord",
+            "addMetaWord" => "addMetaWord",
             "addKeywordLink" => "addKeywordLink");
         if(in_array($tree['func_call'], array_keys($allowed_functions))) {
             $func = $allowed_functions[$tree['func_call']];
@@ -238,7 +238,7 @@ class PageRuleParser implements CrawlConstants
     }

     /**
-     *  Unsets the key $field (or the crawl constant it corresponds to)
+     *  Unsets the key $field (or the crawl constant it corresponds to)
      *  in $page_data. If it is a crawlconstant it doesn't unset it --
      *  it just sets it to the empty string
      *
@@ -304,4 +304,4 @@ class PageRuleParser implements CrawlConstants
     }
 }

-?>
+?>
\ No newline at end of file
diff --git a/lib/phrase_parser.php b/lib/phrase_parser.php
index a11b7c6eb..cc6d66ac9 100755
--- a/lib/phrase_parser.php
+++ b/lib/phrase_parser.php
@@ -474,7 +474,7 @@ class PhraseParser
         $meta_ids[] = 'info:'.crawlHash($site[CrawlConstants::URL]);
         $meta_ids[] = 'code:all';
         $meta_ids[] = 'code:'.$site[CrawlConstants::HTTP_CODE];
-        if(UrlParser::getHost($site[CrawlConstants::URL])."/" ==
+        if(UrlParser::getHost($site[CrawlConstants::URL])."/" ==
             $site[CrawlConstants::URL]) {
             $meta_ids[] = 'host:all'; //used to count number of distinct hosts
         }
@@ -509,7 +509,7 @@ class PhraseParser
                     $meta_ids[] = 'link:'.crawlHash($url);
             }
         }
-        if(isset($site[CrawlConstants::LOCATION]) &&
+        if(isset($site[CrawlConstants::LOCATION]) &&
             count($site[CrawlConstants::LOCATION]) > 0){
             foreach($site[CrawlConstants::LOCATION] as $location) {
                 $meta_ids[] = 'info:'.$location;
@@ -528,7 +528,7 @@ class PhraseParser

         $meta_ids[] = 'media:all';
         if($video_sources != array()) {
-            if(UrlParser::isVideoUrl($site[CrawlConstants::URL],
+            if(UrlParser::isVideoUrl($site[CrawlConstants::URL],
                 $video_sources)) {
                 $meta_ids[] = "media:video";
             } else {
@@ -696,4 +696,4 @@ vaffanculo fok hoer kut lul やりまん 打っ掛け
         $score = $num_unsafe_terms * $unsafe_count/($len + 1);
         return $score;
     }
-}
+}
\ No newline at end of file
diff --git a/lib/processors/rss_processor.php b/lib/processors/rss_processor.php
index 9fbb539fa..f850a90bc 100644
--- a/lib/processors/rss_processor.php
+++ b/lib/processors/rss_processor.php
@@ -234,7 +234,7 @@ class RssProcessor extends TextProcessor
             $xpath->registerNamespace('atom', "http://www.w3.org/2005/Atom");
             $link_nodes = array(
                 "/feed/entry" => array( "url" =>"link", "text" => "title"),
-                "/atom:feed/atom:entry"
+                "/atom:feed/atom:entry"
                     => array( "url" =>"link", "text" => "title"),
             );
         }
@@ -310,4 +310,4 @@ class RssProcessor extends TextProcessor

 }

-?>
+?>
\ No newline at end of file
diff --git a/lib/utility.php b/lib/utility.php
index 33eb46158..46dc937a6 100755
--- a/lib/utility.php
+++ b/lib/utility.php
@@ -1072,7 +1072,7 @@ function readMessage()

 /**
  * Checks if class_1 is the same as class_2 of has class_2 as a parent
- * Behaves like 3 param version (last param true) of PHP is_a function
+ * Behaves like 3 param version (last param true) of PHP is_a function
  * that came into being with Version 5.3.9.
  *
  */
@@ -1081,4 +1081,4 @@ function general_is_a($class_1, $class_2)
     if($class_1 == $class_2) return true;
     return (is_a($class_1, $class_2) || is_subclass_of($class_1, $class_2));
 }
-?>
+?>
\ No newline at end of file
diff --git a/locale/en-US/pages/bot.thtml b/locale/en-US/pages/bot.thtml
index cd05c5b11..928772bb8 100755
--- a/locale/en-US/pages/bot.thtml
+++ b/locale/en-US/pages/bot.thtml
@@ -1,6 +1,6 @@
-title=Bot
-
-description=Describes the web crawler used with this
-web site
-END_HEAD_VARS
+title=Bot
+
+description=Describes the web crawler used with this
+web site
+END_HEAD_VARS
 Please Describe Your Robot
\ No newline at end of file
diff --git a/locale/fr-FR/configure.ini b/locale/fr-FR/configure.ini
index b18424747..0fc285b4d 100755
--- a/locale/fr-FR/configure.ini
+++ b/locale/fr-FR/configure.ini
@@ -864,7 +864,7 @@ manageaccount_element_old_password = "Ancien Mot de passe:"
 manageaccount_element_new_password = "Nouveau Mot de passe:"
 ;
 ; manageaccount_element.php line: 76
-manageaccount_element_retype_password = "Veuillez resaisir votre mot de passe: 	"
+manageaccount_element_retype_password = "Veuillez resaisir votre mot de passe:     "
 ;
 ; manageaccount_element.php line: 84
 manageaccount_element_save = ""
@@ -1582,4 +1582,4 @@ statistics_view_url = ""
 statistics_view_number_hosts = ""
 ;
 ; view.php line: 35
-view_locale_version2 = ""
+view_locale_version2 = ""
\ No newline at end of file
diff --git a/models/crawl_model.php b/models/crawl_model.php
index 9316f1a83..eca88989c 100755
--- a/models/crawl_model.php
+++ b/models/crawl_model.php
@@ -538,7 +538,7 @@ EOT;
                 "disallowed_sites" => array(self::DISALLOWED_SITES, "url"),
                 "seed_sites" => array(self::TO_CRAWL, "url"),
                 "page_rules" => array(self::PAGE_RULES, "rule"),
-                "indexed_file_types" => array(self::INDEXED_FILE_TYPES,
+                "indexed_file_types" => array(self::INDEXED_FILE_TYPES,
                     "extensions"),
             );
             foreach($site_types as $type => $info) {
@@ -585,7 +585,7 @@ EOT;
                 "allowed_sites" => array(self::ALLOWED_SITES,'url'),
                 "disallowed_sites" => array(self::DISALLOWED_SITES, 'url'),
                 "page_rules" => array(self::PAGE_RULES, 'rule'),
-                "indexed_file_types" => array(self::INDEXED_FILE_TYPES,
+                "indexed_file_types" => array(self::INDEXED_FILE_TYPES,
                     "extensions")
             );
             foreach($updatable_site_info as $type => $info) {
@@ -1303,4 +1303,4 @@ EOT;
      }

 }
-?>
+?>
\ No newline at end of file
diff --git a/models/locale_model.php b/models/locale_model.php
index 52f50493b..dd9b1ceff 100644
--- a/models/locale_model.php
+++ b/models/locale_model.php
@@ -442,7 +442,7 @@ class LocaleModel extends Model
         if(isset($this->configure['strings'][$msg_id])) {
             $msg_string = $this->configure['strings'][$msg_id];
         }
-        if($msg_string == "" &&
+        if($msg_string == "" &&
             isset($this->default_configure['strings'][$msg_id])) {
             $msg_string = $this->default_configure['strings'][$msg_id];
         }
@@ -549,7 +549,7 @@ class LocaleModel extends Model
      */
     function extractMergeLocales()
     {
-        $list = $this->getLocaleList();
+        $list = $this->getLocaleList();
             // getLocaleList will also create any missing locale dirs
         $strings =
             $this->getTranslateStrings($this->extract_dirs, $this->extensions);
@@ -698,8 +698,8 @@ EOT;
     /**
      *  Computes a string of the form string_id = 'translation' for a string_id
      *  from among translation array data in $new_configure (most preferred,
-     *  probably come from recent web form data), $old_configure
-     *  (probably from work dir), and $fallback_configure (probably from base
+     *  probably come from recent web form data), $old_configure
+     *  (probably from work dir), and $fallback_configure (probably from base
      *  dir of Yioop instance, least preferred).
      *
      *  @param array $new_configure string_id => translation pairs
@@ -710,7 +710,7 @@ EOT;
      *      has a translation for a string_id
      *  @return string translation in format describe above
      */
-    function updateTranslation($new_configure, $old_configure,
+    function updateTranslation($new_configure, $old_configure,
         $fallback_configure, $string_id, $default_value = "")
     {
         $translation = $string_id . ' = "'.
@@ -720,8 +720,8 @@ EOT;
     }

     /**
-     *  Translates a string_id from among translation array data in
-     *  $new_configure (most preferred, probably come from recent web form
+     *  Translates a string_id from among translation array data in
+     *  $new_configure (most preferred, probably come from recent web form
      *  data), $old_configure  (probably from work dir), and $fallback_configure
      *  (probably from base  dir of Yioop instance, least preferred).
      *
@@ -756,7 +756,7 @@ EOT;
      */
     function isTranslated($translations, $string_id)
     {
-        return isset($translations[$string_id]) &&
+        return isset($translations[$string_id]) &&
             strlen($translations[$string_id]) > 0;
     }

@@ -893,4 +893,4 @@ EOT;

     }
 }
- ?>
+ ?>
\ No newline at end of file
diff --git a/models/machine_model.php b/models/machine_model.php
index a5dd6d16a..c0c74630b 100644
--- a/models/machine_model.php
+++ b/models/machine_model.php
@@ -222,7 +222,7 @@ class MachineModel extends Model
      *  @return string containing the last MachineController::LOG_LISTING_LEN
      *      bytes of the log record
      */
-    function getLog($machine_name,
+    function getLog($machine_name,
         $fetcher_num = NULL, $filter="", $is_mirror = false)
     {
         $time = time();
@@ -319,4 +319,4 @@ class MachineModel extends Model
     }
 }

- ?>
+ ?>
\ No newline at end of file
diff --git a/models/model.php b/models/model.php
index 010b13988..9333b338e 100755
--- a/models/model.php
+++ b/models/model.php
@@ -410,7 +410,7 @@ class Model implements CrawlConstants
             FROM TRANSLATION T, LOCALE L, TRANSLATION_LOCALE TL
             WHERE T.IDENTIFIER_STRING = '$string_id' AND
                 L.LOCALE_TAG = '$locale_tag' AND
-                L.LOCALE_ID = TL.LOCALE_ID AND
+                L.LOCALE_ID = TL.LOCALE_ID AND
                 T.TRANSLATION_ID = TL.TRANSLATION_ID LIMIT 1
 EOD;
         $result = $this->db->execute($sql);
@@ -421,4 +421,4 @@ EOD;
         return $string_id;
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/models/phrase_model.php b/models/phrase_model.php
index 95b61b6ac..cd34df2a6 100755
--- a/models/phrase_model.php
+++ b/models/phrase_model.php
@@ -672,7 +672,7 @@ class PhraseModel extends ParallelModel

     /**
      * Idealistically, this function tries to guess from the query what the
-     * user is looking for. For now, we are just doing simple things like
+     * user is looking for. For now, we are just doing simple things like
      * when a query term is a url and rewriting it to the appropriate meta
      * meta word.
      *
@@ -899,7 +899,7 @@ class PhraseModel extends ParallelModel
      *      be used during lookup
      * @param string $original_query if set, the original query that corresponds
      *      to $word_structs
-     * @param string $save_timestamp_name if this timestamp is not empty, then
+     * @param string $save_timestamp_name if this timestamp is not empty, then
      *      save iterate position, so can resume on future queries that make
      *      use of the timestamp. If used then $limit ignored and get next $num
      *      docs after $save_timestamp 's previous iterate position.
@@ -1119,7 +1119,7 @@ class PhraseModel extends ParallelModel
      * @param array &$pages of page data without text summaries
      * @param array &$queue_servers array of queue server to find data on
      * @param int $raw only lookup locations if 0
-     * @param bool $groups_with_docs whether to return only groups that
+     * @param bool $groups_with_docs whether to return only groups that
      *      contain at least one doc as opposed to a groups with only links
      * @return array pages with summaries added
      */
@@ -1257,7 +1257,7 @@ class PhraseModel extends ParallelModel
         if(!$network_flag) {
             $doc_iterate_hash = crawlHash("site:any");
             $doc_iterate_group_hash = crawlHash("site:doc");
-            if($save_timestamp_name != "") {
+            if($save_timestamp_name != "") {
                 // used for archive crawls of crawl mixes
                 $save_file = CRAWL_DIR.'/schedules/'.self::save_point.
                     $save_timestamp_name.".txt";
@@ -1360,4 +1360,4 @@ class PhraseModel extends ParallelModel

 }

-?>
+?>
\ No newline at end of file
diff --git a/models/profile_model.php b/models/profile_model.php
index f05969292..3e953e8ad 100644
--- a/models/profile_model.php
+++ b/models/profile_model.php
@@ -281,19 +281,19 @@ EOT;
                 USER_NAME VARCHAR(16) UNIQUE,  PASSWORD VARCHAR(16))",
             "CREATE TABLE USER_SESSION(USER_ID INTEGER PRIMARY KEY,
                 SESSION VARCHAR(4096))",
-            "CREATE TABLE TRANSLATION (TRANSLATION_ID INTEGER PRIMARY KEY
+            "CREATE TABLE TRANSLATION (TRANSLATION_ID INTEGER PRIMARY KEY
                 $auto_increment, IDENTIFIER_STRING VARCHAR(512) UNIQUE)",
             "CREATE TABLE LOCALE(LOCALE_ID INTEGER PRIMARY KEY
                 $auto_increment, LOCALE_TAG VARCHAR(16),
                 LOCALE_NAME VARCHAR(256),
                 WRITING_MODE CHAR(5))",
-            "CREATE TABLE TRANSLATION_LOCALE (TRANSLATION_ID INTEGER,
+            "CREATE TABLE TRANSLATION_LOCALE (TRANSLATION_ID INTEGER,
                 LOCALE_ID INTEGER, TRANSLATION VARCHAR(4096) )",
             "CREATE TABLE ROLE (ROLE_ID INTEGER PRIMARY KEY $auto_increment,
                 NAME VARCHAR(512))",
             "CREATE TABLE ROLE_ACTIVITY (ROLE_ID INTEGER, ACTIVITY_ID INTEGER)",
-            "CREATE TABLE ACTIVITY (ACTIVITY_ID INTEGER PRIMARY KEY
-                $auto_increment, TRANSLATION_ID INTEGER,
+            "CREATE TABLE ACTIVITY (ACTIVITY_ID INTEGER PRIMARY KEY
+                $auto_increment, TRANSLATION_ID INTEGER,
                 METHOD_NAME VARCHAR(256))",
             "CREATE TABLE USER_ROLE (USER_ID INTEGER, ROLE_ID INTEGER)",
             "CREATE TABLE CURRENT_WEB_INDEX (CRAWL_TIME INT(11) )",
@@ -488,4 +488,4 @@ EOT;
     }

 }
-?>
+?>
\ No newline at end of file
diff --git a/models/source_model.php b/models/source_model.php
index 89727f5b7..b95af9d0f 100644
--- a/models/source_model.php
+++ b/models/source_model.php
@@ -101,7 +101,7 @@ class SourceModel extends Model
             } else {
                 $sql .= " AND ";
             }
-            $sql .= " NOT EXISTS
+            $sql .= " NOT EXISTS
                 (SELECT * FROM FEED_ITEM F
                 WHERE F.SOURCE_NAME = M.NAME)";
         }
@@ -367,7 +367,7 @@ class SourceModel extends Model
                 // maybe we're dealing with atom rather than rss
                 $nodes = $dom->getElementsByTagName('entry');
                 $rss_elements = array(
-                    "title" => "title", "description" => "summary",
+                    "title" => "title", "description" => "summary",
                     "link" => "link", "guid" => "id", "pubDate" => "updated");
             }
             $max_time = min(self::MAX_EXECUTION_TIME,
@@ -609,4 +609,4 @@ class SourceModel extends Model
         return $meta_ids;
     }
 }
- ?>
+ ?>
\ No newline at end of file
diff --git a/models/user_model.php b/models/user_model.php
index 775678cb2..b28168fc2 100755
--- a/models/user_model.php
+++ b/models/user_model.php
@@ -107,7 +107,7 @@ class UserModel extends Model
             if($translate) {
                 $activities[$i]['ACTIVITY_NAME'] = $translate['ACTIVITY_NAME'];
             }
-            if(!isset($activities[$i]['ACTIVITY_NAME']) ||
+            if(!isset($activities[$i]['ACTIVITY_NAME']) ||
                 $activities[$i]['ACTIVITY_NAME'] == "") {
                 $activities[$i]['ACTIVITY_NAME'] = $this->translateDb(
                     $activities[$i]['IDENTIFIER_STRING'], DEFAULT_LOCALE);
@@ -281,4 +281,4 @@ class UserModel extends Model
     }
 }

- ?>
+ ?>
\ No newline at end of file
diff --git a/scripts/suggest.js b/scripts/suggest.js
index c32ef737d..dec3a238b 100644
--- a/scripts/suggest.js
+++ b/scripts/suggest.js
@@ -269,7 +269,7 @@ function correctSpelling(word)
     var trie_subtree;
     var curr_prob = 0;
     var candidates = known(edits1(word));
-
+
     candidates.push(word);
     var corrected_word = "";
     var correct_threshold = 25;
@@ -749,7 +749,7 @@ function spellCheck()
             var spell_link = "?" + token_name + "=" + csrf_token + "&q="
                 +corrected_query;
             corrected_spell.innerHTML = "<b>" + local_strings.spell
-                +": <a rel='nofollow' href='" + spell_link +
+                +": <a rel='nofollow' href='" + spell_link +
                 "'>"  + corrected_query + "</a></b>";
         }
     }
@@ -766,4 +766,4 @@ ip_field.oncut = function(e) {
     setTimeout(function(){
             onTypeTerm(e,ip_field);
             }, 0);
-}
+}
\ No newline at end of file
diff --git a/views/elements/configure_element.php b/views/elements/configure_element.php
index cd7c482b9..9990c0681 100644
--- a/views/elements/configure_element.php
+++ b/views/elements/configure_element.php
@@ -93,7 +93,7 @@ class ConfigureElement extends Element
                 e($data['WORK_DIRECTORY']); ?>" />
         <?php }?>
         <input type="hidden" name="c" value="admin" />
-        <input type="hidden" name="advanced" id='a-settings' value="<?php
+        <input type="hidden" name="advanced" id='a-settings' value="<?php
             e($data['advanced']); ?>" />
         <input type="hidden" name="<?php e(CSRF_TOKEN); ?>" value="<?php
             e($data[CSRF_TOKEN]); ?>" />
@@ -105,7 +105,7 @@ class ConfigureElement extends Element
         </div>
         <h2><?php e(tl('configure_element_profile_settings'))?></h2>
         <?php if($data['PROFILE']) { ?>
-        <div class="top-margin">[<a href="javascript:toggleAdvance()"><?php
+        <div class="top-margin">[<a href="javascript:toggleAdvance()"><?php
             e(tl('configure_element_toggle_advanced')); ?></a>]</div>
         <?php } ?>
         <div class="bold">
@@ -273,4 +273,4 @@ class ConfigureElement extends Element
     <?php
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/views/elements/pageoptions_element.php b/views/elements/pageoptions_element.php
index 9da4a0265..df9e955a1 100644
--- a/views/elements/pageoptions_element.php
+++ b/views/elements/pageoptions_element.php
@@ -97,8 +97,8 @@ class PageOptionsElement extends Element
             ?></div>
         <div class="top-margin"><b><label for="cache-pages"><?php
             e(tl('pageoptions_element_save_cache'))?>
-            </label><input
-            id='cache-pages' type="checkbox" name="cache_pages"
+            </label><input
+            id='cache-pages' type="checkbox" name="cache_pages"
             value="true"
             <?php if(isset($data['CACHE_PAGES']) && $data['CACHE_PAGES']) {
                 e("checked='checked'");
@@ -180,7 +180,7 @@ class PageOptionsElement extends Element
         <table class="search-page-all"><tr><td>
         <table class="search-page-table">
         <tr>
-        <td><label for="wd-suggest"><?php
+        <td><label for="wd-suggest"><?php
             e(tl('pageoptions_element_wd_suggest')); ?></label></td>
             <td><input id='wd-suggest' type="checkbox"
             name="WORD_SUGGEST" value="true"
@@ -188,7 +188,7 @@ class PageOptionsElement extends Element
                 $data['WORD_SUGGEST']){
                 e("checked='checked'");}?>
             /></td></tr>
-        <tr><td><label for="subsearch-link"><?php
+        <tr><td><label for="subsearch-link"><?php
             e(tl('pageoptions_element_subsearch_link'));?></label></td><td>
             <input id='subsearch-link'
             type="checkbox" name="SUBSEARCH_LINK" value="true"
@@ -197,7 +197,7 @@ class PageOptionsElement extends Element
                 e("checked='checked'");}?>
             /></td>
         </tr>
-        <tr><td><label for="signin-link"><?php
+        <tr><td><label for="signin-link"><?php
             e(tl('pageoptions_element_signin_link')); ?></label></td><td>
             <input id='signin-link' type="checkbox"
             name="SIGNIN_LINK" value="true"
@@ -273,7 +273,7 @@ class PageOptionsElement extends Element
          <h2><?php e(tl('pageoptions_element_test_page'))?></h2>
         <div class="top-margin"><b><label for="page-type"><?php
             e(tl('pageoptions_element_page_type'))?></label></b>
-            <?php
+            <?php
             $types = $data['MIME_TYPES'];
             $this->view->optionsHelper->render("page-type",
             "page_type", array_combine($types, $types),
@@ -286,7 +286,7 @@ class PageOptionsElement extends Element

         </div>

-        <div class="center slight-pad"><button class="button-box"
+        <div class="center slight-pad"><button class="button-box"
             id="page-button"
             type="submit"><?php if($data['test_options_active'] == "") {
                 e(tl('pageoptions_element_save_options'));
@@ -315,7 +315,7 @@ class PageOptionsElement extends Element
                 e("<h3>".tl('pageoptions_element_extracted_metas')."</h3>");
                 e("<pre>\n{$data['EXTRACTED_META_WORDS']}\n</pre>");
             } ?>
-        <?php
+        <?php
         } ?>
         </div>
         </div>
@@ -359,4 +359,4 @@ class PageOptionsElement extends Element
     <?php
     }
 }
-?>
+?>
\ No newline at end of file
diff --git a/views/machinestatus_view.php b/views/machinestatus_view.php
index 8dab2bec4..6fdb94656 100644
--- a/views/machinestatus_view.php
+++ b/views/machinestatus_view.php
@@ -67,7 +67,7 @@ class MachinestatusView extends View
         } else {
         ?>
         <div class="box">
-        <h3 class="nomargin"><?php
+        <h3 class="nomargin"><?php
             e(tl('machinestatus_view_news_updater'));
             $log_url = $base_url ."log&amp;name=news";
         ?></h3>
@@ -82,7 +82,7 @@ class MachinestatusView extends View
         <td><?php $this->optionsHelper->render("news-mode",
             "news_mode", $data['NEWS_MODES'], $data['NEWS_MODE'], true);?>
         </td>
-        <td>[<a href="<?php e($log_url);?>"><?php
+        <td>[<a href="<?php e($log_url);?>"><?php
             e(tl('machinestatus_view_log'));?></a>]</td>
         </tr></table>
         </form>
@@ -179,4 +179,4 @@ class MachinestatusView extends View
     }
     }
 }
-?>
+?>
\ No newline at end of file

ViewGit