Adds support for recrawling existing crawls, indexing arc files. Makes it easier to add stemmers for new languages. Stores HTTP response messages in archive and extracts more meta data from these. Fixes a stemming bug with file_put_contents, a=chris

Chris Pollett [2011-01-19 08:Jan:th]
Adds support for recrawling existing crawls, indexing arc files. Makes it easier to add stemmers for new languages. Stores HTTP response messages in archive and extracts more meta data from these. Fixes a stemming bug with file_put_contents, a=chris
Filename
bin/arc_tool.php
bin/fetcher.php
bin/queue_server.php
configs/config.php
configs/default_crawl.ini
controllers/admin_controller.php
controllers/search_controller.php
controllers/settings_controller.php
css/search.css
index.php
lib/archive_bundle_iterators/arc_archive_bundle_iterator.php
lib/archive_bundle_iterators/archive_bundle_iterator.php
lib/archive_bundle_iterators/web_archive_bundle_iterator.php
lib/crawl_constants.php
lib/fetch_url.php
lib/index_archive_bundle.php
lib/index_bundle_iterators/group_iterator.php
lib/phrase_parser.php
lib/processors/html_processor.php
lib/processors/rss_processor.php
lib/processors/sitemap_processor.php
lib/processors/xml_processor.php
lib/porter_stemmer.php
locale/ar/configure.ini
locale/de/configure.ini
locale/en-US/configure.ini
locale/en-US/statistics.txt
locale/es/configure.ini
locale/fr-FR/configure.ini
locale/he/configure.ini
locale/in-ID/configure.ini
locale/it/configure.ini
locale/ja/configure.ini
locale/ja/statistics.txt
locale/ko/configure.ini
locale/ko/statistics.txt
locale/pl/configure.ini
locale/pt/configure.ini
locale/ru/configure.ini
locale/th/configure.ini
locale/vi-VN/configure.ini
locale/zh-CN/configure.ini
models/crawl_model.php
models/datasources/datasource_manager.php
models/phrase_model.php
views/elements/crawloptions_element.php
diff --git a/bin/arc_tool.php b/bin/arc_tool.php
index b78211e5f..e1761d2be 100644
--- a/bin/arc_tool.php
+++ b/bin/arc_tool.php
@@ -268,15 +268,15 @@ class ArcTool implements CrawlConstants
         $generation = 0;
         while($seen < $total && $generation < $num_generations) {
             $partition = $archive->getPartition($generation, false);
-            if($archive->count < $start && $seen < $start) {
+            if($partition->count < $start && $seen < $start) {
                 $generation++;
-                $seen += $this->count;
+                $seen += $partition->count;
                 continue;
             }
             $seen_generation = 0;
-            while($seen < $total && $seen_generation < $archive->count) {
+            while($seen < $total && $seen_generation < $partition->count) {
                 $num_to_get = min($total - $seen,
-                    $archive->count - $seen_generation,
+                    $partition->count - $seen_generation,
                     self::MAX_BUFFER_DOCS);
                 $objects = $partition->nextObjects($num_to_get);
                 $seen += $num_to_get;
diff --git a/bin/fetcher.php b/bin/fetcher.php
index fb9d40254..a088435c0 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -60,13 +60,17 @@ require_once BASE_DIR."/models/datasources/".DBMS."_manager.php";
  */
 require_once BASE_DIR."/lib/web_archive_bundle.php";

+/** get available archive iterators */
+foreach(glob(BASE_DIR."/lib/archive_bundle_iterators/*_bundle_iterator.php")
+    as $filename) {
+    require_once $filename;
+}
+
 /** get processors for different file types */
 foreach(glob(BASE_DIR."/lib/processors/*_processor.php") as $filename) {
     require_once $filename;
 }

-/** To support English language stemming of words (jumps, jumping --> jump)*/
-require_once BASE_DIR."/lib/porter_stemmer.php";
 /** Used to manipulate urls*/
 require_once BASE_DIR."/lib/url_parser.php";
 /** Used to extract summaries from web pages*/
@@ -208,6 +212,21 @@ class Fetcher implements CrawlConstants
      */
     var $crawl_order;

+    /**
+     * Indicates the kind of crawl being performed: self::WEB_CRAWL indicates
+     * a new crawl of the web; self::ARCHIVE_CRAWL indicates a crawl of an
+     * existing web archive
+     * @var string
+     */
+    var $crawl_type;
+
+    /**
+     * If the crawl_type is self::ARCHIVE_CRAWL, then crawl_index is the
+     * timestamp of the existing archive to crawl
+     * @var string
+     */
+    var $crawl_index;
+
     /**
      * Sets up the field variables for that crawling can begin
      *
@@ -229,6 +248,9 @@ class Fetcher implements CrawlConstants
         $this->crawl_time = NULL;
         $this->schedule_time = NULL;

+        $this->crawl_type = self::WEB_CRAWL;
+        $this->crawl_index = NULL;
+
         $this->to_crawl = array();
         $this->to_crawl_again = array();
         $this->found_sites = array();
@@ -275,8 +297,6 @@ class Fetcher implements CrawlConstants
         $this->checkCrawlTime();

         while ($info[self::STATUS] != self::STOP_STATE) {
-
-
             $fetcher_message_file = CRAWL_DIR."/schedules/fetcher_messages.txt";
             if(file_exists($fetcher_message_file)) {
                 $info = unserialize(file_get_contents($fetcher_message_file));
@@ -286,6 +306,7 @@ class Fetcher implements CrawlConstants
             }

             $info = $this->checkScheduler();
+
             if(!isset($info[self::STATUS])) {
                 $info[self::STATUS] = self::CONTINUE_STATE;
             }
@@ -328,38 +349,19 @@ class Fetcher implements CrawlConstants
                 $this->deleteOldCrawls($info[self::SAVED_CRAWL_TIMES]);
             }

-            $start_time = microtime();
-            $can_schedule_again = false;
-            if(count($this->to_crawl) > 0)  {
-                $can_schedule_again = true;
-            }
-            $sites = $this->getFetchSites();
-            if(!$sites) {
-                crawlLog("No seeds to fetch...");
-                sleep(max(0, ceil(
-                    MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time))));
-                continue;
-            }
-
-            $site_pages = FetchUrl::getPages($sites, true);
+            switch($this->crawl_type)
+            {
+                case self::WEB_CRAWL:
+                    $downloaded_pages =  $this->downloadPagesWebCrawl();
+                break;

-            list($downloaded_pages, $schedule_again_pages) =
-                $this->reschedulePages($site_pages);
-
-            if($can_schedule_again == true) {
-                //only schedule to crawl again on fail sites without crawl-delay
-                foreach($schedule_again_pages as $schedule_again_page) {
-                    if($schedule_again_page[self::CRAWL_DELAY] == 0) {
-                        $this->to_crawl_again[] =
-                            array($schedule_again_page[self::URL],
-                                $schedule_again_page[self::WEIGHT],
-                                $schedule_again_page[self::CRAWL_DELAY]
-                            );
-                    }
-                }
+                case self::ARCHIVE_CRAWL:
+                    $downloaded_pages =  $this->downloadPagesArchiveCrawl();
+                break;
             }

             $start_time = microtime();
+
             $summarized_site_pages =
                 $this->processFetchPages($downloaded_pages);

@@ -373,7 +375,93 @@ class Fetcher implements CrawlConstants

         crawlLog("Fetcher shutting down!!");
     }
-
+
+    /**
+     * Get a list of urls from the current fetch batch provided by the queue
+     * server. Then downloads these pages. Finally, reschedules, if
+     * possible, pages that did not successfully get downloaded.
+     *
+     * @return array an associative array of web pages and meta data
+     *  fetched from the internet
+     */
+    function downloadPagesWebCrawl()
+    {
+        $start_time = microtime();
+        $can_schedule_again = false;
+        if(count($this->to_crawl) > 0)  {
+            $can_schedule_again = true;
+        }
+        $sites = $this->getFetchSites();
+
+        if(!$sites) {
+            crawlLog("No seeds to fetch...");
+            sleep(max(0, ceil(
+                MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time))));
+            return array();
+        }
+
+        $site_pages = FetchUrl::getPages($sites, true);
+
+        list($downloaded_pages, $schedule_again_pages) =
+            $this->reschedulePages($site_pages);
+
+        if($can_schedule_again == true) {
+            //only schedule to crawl again on fail sites without crawl-delay
+            foreach($schedule_again_pages as $schedule_again_page) {
+                if($schedule_again_page[self::CRAWL_DELAY] == 0) {
+                    $this->to_crawl_again[] =
+                        array($schedule_again_page[self::URL],
+                            $schedule_again_page[self::WEIGHT],
+                            $schedule_again_page[self::CRAWL_DELAY]
+                        );
+                }
+            }
+        }
+
+        return $downloaded_pages;
+    }
+
+    /**
+     * Extracts NUM_MULTI_CURL_PAGES from the cureen Archive Bundle that is
+     * being recrawled.
+     *
+     * @return array an associative array of web pages and meta data from
+     *      the archive bundle being iterated over
+     */
+    function downloadPagesArchiveCrawl()
+    {
+        $base_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $this->crawl_index;
+        $pages = array();
+        if(!isset($this->archive_iterator->iterate_timestamp) ||
+            $this->archive_iterator->iterate_timestamp != $this->crawl_index ||
+            $this->archive_iterator->result_timestamp != $this->crawl_time) {
+            if(!file_exists($base_name)){
+                crawlLog("Recrawl archive with timestamp" .
+                    " {$this->crawl_index} does not exist!");
+                return $pages;
+            } else {
+                if(file_exists("$base_name/arc_type.txt")) {
+                    $arctype = trim(file_get_contents(
+                        "$base_name/arc_type.txt"));
+                } else {
+                    $arctype = "WebArchiveBundle";
+                }
+                $iterator_name = $arctype."Iterator";
+                $this->archive_iterator =
+                    new $iterator_name($this->crawl_index, $this->crawl_time);
+                if($this->archive_iterator == NULL) {
+                    crawlLog("Error creating archive iterator!!");
+                    return $pages;
+                }
+            }
+        }
+        if(!$this->archive_iterator->end_of_iterator) {
+            $pages = $this->archive_iterator->nextPages(NUM_MULTI_CURL_PAGES);
+        }
+        return $pages;
+    }
+
     /**
      * Deletes any crawl web archive bundles not in the provided array of crawls
      *
@@ -437,8 +525,9 @@ class Fetcher implements CrawlConstants
     function checkScheduler()
     {
         $info = array();
-
-        if(count($this->to_crawl) > 0 || count($this->to_crawl_again) > 0) {
+        if((count($this->to_crawl) > 0 || count($this->to_crawl_again) > 0) &&
+           (!isset($this->archive_iterator->end_of_iterator) ||
+            !$this->archive_iterator->end_of_iterator)) {
             $info[self::STATUS]  = self::CONTINUE_STATE;
             return;
         }
@@ -456,6 +545,12 @@ class Fetcher implements CrawlConstants
         $tok = strtok($info_string, "\n");
         $info = unserialize(base64_decode($tok));

+        if(isset($info[self::CRAWL_TYPE])) {
+            $this->crawl_type = $info[self::CRAWL_TYPE];
+        }
+        if(isset($info[self::CRAWL_INDEX])) {
+            $this->crawl_index = $info[self::CRAWL_INDEX];
+        }
         if(isset($info[self::CRAWL_ORDER])) {
             $this->crawl_order = $info[self::CRAWL_ORDER];
         }
@@ -618,7 +713,7 @@ class Fetcher implements CrawlConstants
         $num_items = $this->web_archive->count;

         $i = 0;
-
+
         foreach($site_pages as $site) {
             $response_code = $site[self::HTTP_CODE];

@@ -647,6 +742,13 @@ class Fetcher implements CrawlConstants

             if(isset($PAGE_PROCESSORS[$type])) {
                 $page_processor = $PAGE_PROCESSORS[$type];
+                if($page_processor == "TextProcessor" ||
+                    get_parent_class($page_processor) == "TextProcessor") {
+                    $text_data =true;
+                } else {
+                    $text_data =false;
+                }
+
             } else {
                 continue;
             }
@@ -656,6 +758,12 @@ class Fetcher implements CrawlConstants
             $doc_info = $processor->process($site[self::PAGE],
                 $site[self::URL]);

+            $stored_fields = array(self::URL, self::HEADER, self::PAGE);
+            $summary_fields = array(self::IP_ADDRESSES, self::WEIGHT,
+                self::TIMESTAMP, self::TYPE, self::ENCODING, self::HTTP_CODE,
+                self::HASH, self::SERVER, self::SERVER_VERSION,
+                self::OPERATING_SYSTEM, self::MODIFIED);
+
             if($doc_info) {
                 $site[self::DOC_INFO] =  $doc_info;

@@ -666,37 +774,31 @@ class Fetcher implements CrawlConstants

                 }

-                if($site[self::TYPE] != "text/html" ) {
+                if($text_data) {
                     if(isset($doc_info[self::PAGE])) {
                         $site[self::PAGE] = $doc_info[self::PAGE];
                     } else {
                         $site[self::PAGE] = NULL;
                     }
-
+                }
+                if(!isset($site[self::ENCODING])) {
+                    $site[self::ENCODING] = "UTF-8";
                 }

-
-                $stored_site_pages[$i][self::URL] = $site[self::URL];
-                $stored_site_pages[$i][self::IP_ADDRESSES] =
-                    $site[self::IP_ADDRESSES];
-                $stored_site_pages[$i][self::TIMESTAMP] =
-                    $site[self::TIMESTAMP];
-                $stored_site_pages[$i][self::TYPE] = $site[self::TYPE];
-                if(isset($site[self::ENCODING])) {
-                    $encoding = $site[self::ENCODING];
-                } else {
-                    $encoding = "UTF-8";
+                foreach($summary_fields as $field) {
+                    if(isset($site[$field])) {
+                        $stored_site_pages[$i][$field] = $site[$field];
+                        $summarized_site_pages[$i][$field] = $site[$field];
+                    }
+                }
+                foreach($stored_fields as $field) {
+                    if(isset($site[$field])) {
+                        $stored_site_pages[$i][$field] = $site[$field];
+                    }
                 }
-                $stored_site_pages[$i][self::ENCODING] = $encoding;
-                $stored_site_pages[$i][self::HTTP_CODE] =
-                    $site[self::HTTP_CODE];
-                $stored_site_pages[$i][self::HASH] = $site[self::HASH];
-                $stored_site_pages[$i][self::PAGE] = $site[self::PAGE];

                 $summarized_site_pages[$i][self::URL] =
                     strip_tags($site[self::URL]);
-                $summarized_site_pages[$i][self::IP_ADDRESSES] =
-                    $site[self::IP_ADDRESSES];
                 $summarized_site_pages[$i][self::TITLE] = strip_tags(
                     $site[self::DOC_INFO][self::TITLE]);
                     // stripping html to be on the safe side
@@ -705,15 +807,10 @@ class Fetcher implements CrawlConstants
                 if(isset($site[self::DOC_INFO][self::JUST_METAS])) {
                     $summarized_site_pages[$i][self::JUST_METAS] = true;
                 }
-                $summarized_site_pages[$i][self::TIMESTAMP] =
-                    $site[self::TIMESTAMP];
-                $summarized_site_pages[$i][self::ENCODING] = $encoding;
-                $summarized_site_pages[$i][self::HASH] = $site[self::HASH];
-                $summarized_site_pages[$i][self::TYPE] = $site[self::TYPE];
-                $summarized_site_pages[$i][self::HTTP_CODE] =
-                    $site[self::HTTP_CODE];
-                $summarized_site_pages[$i][self::WEIGHT] = $site[self::WEIGHT];
-
+                if(isset($site[self::DOC_INFO][self::LANG])) {
+                    $summarized_site_pages[$i][self::LANG] =
+                        $site[self::DOC_INFO][self::LANG];
+                }
                 if(isset($site[self::DOC_INFO][self::LINKS])) {
                     $summarized_site_pages[$i][self::LINKS] =
                         $site[self::DOC_INFO][self::LINKS];
@@ -847,7 +944,8 @@ class Fetcher implements CrawlConstants
                     $this->found_sites[self::ROBOT_TXT][$host][
                         self::CRAWL_DELAY] = $site[self::CRAWL_DELAY];
                 }
-                if(isset($site[self::LINKS])) {
+                if(isset($site[self::LINKS])
+                    && $this->crawl_type == self::WEB_CRAWL) {
                     $num_links = count($site[self::LINKS]);
                     //robots pages might have sitemaps links on them
                     $this->addToCrawlSites($site[self::LINKS],
@@ -855,7 +953,8 @@ class Fetcher implements CrawlConstants
                 }
             } else {
                 $this->found_sites[self::SEEN_URLS][] = $site;
-                if(isset($site[self::LINKS])) {
+                if(isset($site[self::LINKS])
+                    && $this->crawl_type == self::WEB_CRAWL) {
                     if(!isset($this->found_sites[self::TO_CRAWL])) {
                         $this->found_sites[self::TO_CRAWL] = array();
                     }
@@ -878,7 +977,9 @@ class Fetcher implements CrawlConstants
         if((count($this->to_crawl) <= 0 && count($this->to_crawl_again) <= 0) ||
             ( isset($this->found_sites[self::SEEN_URLS]) &&
             count($this->found_sites[self::SEEN_URLS]) >
-            SEEN_URLS_BEFORE_UPDATE_SCHEDULER)) {
+            SEEN_URLS_BEFORE_UPDATE_SCHEDULER) ||
+            ($this->crawl_type == self::ARCHIVE_CRAWL &&
+            $this->archive_iterator->end_of_iterator)) {
             $this->updateScheduler();
         }

@@ -1095,11 +1196,18 @@ class Fetcher implements CrawlConstants
              */
             if(!isset($site[self::JUST_METAS])) {
                 $phrase_string =
-                    mb_ereg_replace("[[:punct:]]", " ", $site[self::TITLE] .
-                       " ". $site[self::DESCRIPTION]);
+                    mb_ereg_replace(PUNCT, " ", $site[self::TITLE] .
+                       " ". $site[self::DESCRIPTION]);
+                if(isset($site[self::LANG])) {
+                    $lang = $site[self::LANG];
+                } else {
+                    $lang = NULL;
+                }
                 $word_counts =
-                    PhraseParser::extractPhrasesAndCount($phrase_string);
+                    PhraseParser::extractPhrasesAndCount($phrase_string,
+                        MAX_PHRASE_LEN, $lang);
             }
+
             $meta_ids = array();

             /*
@@ -1124,7 +1232,35 @@ class Fetcher implements CrawlConstants
             if(strlen($url_type) > 0) {
                 $meta_ids[] = 'filetype:'.$url_type;
             }
-
+            if(isset($site[self::SERVER])) {
+                $meta_ids[] = 'server:'.strtolower($site[self::SERVER]);
+            }
+            if(isset($site[self::SERVER_VERSION])) {
+                $meta_ids[] = 'version:'.
+                    $site[self::SERVER_VERSION];
+            }
+            if(isset($site[self::OPERATING_SYSTEM])) {
+                $meta_ids[] = 'os:'.strtolower($site[self::OPERATING_SYSTEM]);
+            }
+            if(isset($site[self::MODIFIED])) {
+                $modified = $site[self::MODIFIED];
+                $meta_ids[] = 'modified:'.date('Y', $modified);
+                $meta_ids[] = 'modified:'.date('Y-m', $modified);
+                $meta_ids[] = 'modified:'.date('Y-m-d', $modified);
+            }
+            if(isset($site[self::TIMESTAMP])) {
+                $date = $site[self::TIMESTAMP];
+                $meta_ids[] = 'date:'.date('Y', $date);
+                $meta_ids[] = 'date:'.date('Y-m', $date);
+                $meta_ids[] = 'date:'.date('Y-m-d', $date);
+            }
+            if(isset($site[self::LANG])) {
+                $lang_parts = explode("-", $site[self::LANG]);
+                $meta_ids[] = 'lang:'.$lang_parts[0];
+                if(isset($lang_parts[1])){
+                    $meta_ids[] = 'lang:'.$site[self::LANG];
+                }
+            }
             // handles user added meta words
             if(isset($this->meta_words)) {
                 $matches = array();
@@ -1151,7 +1287,8 @@ class Fetcher implements CrawlConstants
             //store inlinks so they can be searched by
             $num_links = count($site[self::LINKS]);
             if($num_links > 0) {
-                $link_weight = $site[self::WEIGHT]/$num_links;
+                $weight = (isset($site[self::WEIGHT])) ? $site[self::WEIGHT] :1;
+                $link_weight = $weight/$num_links;
             } else {
                 $link_weight = 0;
             }
diff --git a/bin/queue_server.php b/bin/queue_server.php
index d9aaf65cf..1bcb5ca48 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -116,6 +116,20 @@ class QueueServer implements CrawlConstants
      * @var string
      */
     var $crawl_order;
+    /**
+     * Indicates the kind of crawl being performed: self::WEB_CRAWL indicates
+     * a new crawl of the web; self::ARCHIVE_CRAWL indicates a crawl of an
+     * existing web archive
+     * @var string
+     */
+    var $crawl_type;
+
+    /**
+     * If the crawl_type is self::ARCHIVE_CRAWL, then crawl_index is the
+     * timestamp of the existing archive to crawl
+     * @var string
+     */
+    var $crawl_index;
     /**
      * Says whether the $allowed_sites array is being used or not
      * @var bool
@@ -244,7 +258,6 @@ class QueueServer implements CrawlConstants
             //check for orphaned queue bundles
             $this->deleteOrphanedBundles();

-            $count = $this->web_queue->to_crawl_queue->count;

             $this->processIndexData();
             if(time() - $this->last_index_save_time > FORCE_SAVE_TIME){
@@ -254,28 +267,40 @@ class QueueServer implements CrawlConstants
                 crawlLog("... Save time".(changeInMicrotime($start_time)));
             }

-            $this->processRobotUrls();
+            switch($this->crawl_type)
+            {
+                case self::WEB_CRAWL:
+                    $this->processRobotUrls();

-            if($count < NUM_URLS_QUEUE_RAM -
-                SEEN_URLS_BEFORE_UPDATE_SCHEDULER * MAX_LINKS_PER_PAGE) {
-                $info = $this->processQueueUrls();
-            }
+                    $count = $this->web_queue->to_crawl_queue->count;

-            if($count > 0) {
-                $top = $this->web_queue->peekQueue();
-                if($top[1] < MIN_QUEUE_WEIGHT) {
-                    crawlLog("Normalizing Weights!!\n");
-                    $this->web_queue->normalize();
-                    /* this will undercount the weights of URLS
-                       from fetcher data that have not completed
-                     */
-                 }
-
-                if(!file_exists(CRAWL_DIR."/schedules/schedule.txt")) {
-                    $this->produceFetchBatch();
-                }
-            }
+                    if($count < NUM_URLS_QUEUE_RAM -
+                        SEEN_URLS_BEFORE_UPDATE_SCHEDULER * MAX_LINKS_PER_PAGE) {
+                        $info = $this->processQueueUrls();
+                    }
+
+                    if($count > 0) {
+                        $top = $this->web_queue->peekQueue();
+                        if($top[1] < MIN_QUEUE_WEIGHT) {
+                            crawlLog("Normalizing Weights!!\n");
+                            $this->web_queue->normalize();
+                            /* this will undercount the weights of URLS
+                               from fetcher data that have not completed
+                             */
+                         }

+                        if(!file_exists(CRAWL_DIR."/schedules/schedule.txt")) {
+                            $this->produceFetchBatch();
+                        }
+                    }
+                break;
+                case self::ARCHIVE_CRAWL:
+                    $this->processRecrawlRobotUrls();
+                    if(!file_exists(CRAWL_DIR."/schedules/schedule.txt")) {
+                        $this->writeArchiveCrawlInfo();
+                    }
+                break;
+            }
             crawlLog("Taking five second sleep...");
             sleep(5);
         }
@@ -283,6 +308,77 @@ class QueueServer implements CrawlConstants
         crawlLog("Queue Server shutting down!!");
     }

+    /**
+     *
+     */
+    function writeArchiveCrawlInfo()
+    {
+        $schedule_time = time();
+        $first_line = $this->calculateScheduleMetaInfo($schedule_time);
+        $fh = fopen(CRAWL_DIR."/schedules/schedule.txt", "wb");
+        fwrite($fh, $first_line);
+        fclose($fh);
+
+        $schedule_dir =
+            CRAWL_DIR."/schedules/".
+                self::schedule_data_base_name.$this->crawl_time;
+        $this->processDataFile($schedule_dir, "processRecrawlDataArchive");
+
+    }
+
+    function processRecrawlRobotUrls()
+    {
+        crawlLog("Checking for robots.txt files to process...");
+        $robot_dir =
+            CRAWL_DIR."/schedules/".
+                self::robot_data_base_name.$this->crawl_time;
+
+        $this->processDataFile($robot_dir, "processRecrawlRobotArchive");
+        crawlLog("done. ");
+    }
+
+    function processRecrawlRobotArchive($file)
+    {
+        crawlLog("Deleting unneeded robot schedule files");
+
+        unlink($file);
+    }
+
+    /**
+     *
+     */
+    function &getDataArchiveFileData($file)
+    {
+        crawlLog("Processing File: $file");
+
+        $fh = fopen($file, "rb");
+        $machine_string = fgets($fh);
+        $len = strlen($machine_string);
+        if($len > 0) {
+            $machine_info = unserialize(base64_decode($machine_string));
+        }
+        $sites = unserialize(gzuncompress(base64_decode(
+            urldecode(fread($fh, filesize($file) - $len))
+            )));
+        fclose($fh);
+
+        if(isset($machine_info[self::MACHINE])) {
+            $this->most_recent_fetcher = & $machine_info[self::MACHINE];
+            unset($machine_info);
+        }
+        return $sites;
+    }
+    /**
+     *
+     */
+    function processRecrawlDataArchive($file)
+    {
+        $sites = & $this->getDataArchiveFileData($file);
+        unlink($file);
+        $this->writeCrawlStatus($sites);
+    }
+
+
     /**
      * Handles messages passed via files to the QueueServer.
      *
@@ -307,6 +403,12 @@ class QueueServer implements CrawlConstants
                     $this->startCrawl($info);
                     crawlLog(
                         "Starting new crawl. Timestamp:".$this->crawl_time);
+                    if($this->crawl_type == self::WEB_CRAWL) {
+                        crawlLog("Performing a web crawl!");
+                    } else {
+                        crawlLog("Performing an archive crawl of ".
+                            "archive with timestamp ".$this->crawl_index);
+                    }
                 break;

                 case "STOP_CRAWL":
@@ -367,10 +469,10 @@ class QueueServer implements CrawlConstants
      */
     function indexSave()
     {
+        $this->last_index_save_time = time();
         if(isset($this->index_archive) && $this->index_dirty) {
             $this->index_archive->forceSave();
             $this->index_dirty = false;
-            $this->last_index_save_time = time();
             // chmod so apache can also write to these directories
             $this->db->setWorldPermissionsRecursive(
                 CRAWL_DIR.'/cache/'.
@@ -392,6 +494,8 @@ class QueueServer implements CrawlConstants

         $read_from_info = array(
             "crawl_order" => self::CRAWL_ORDER,
+            "crawl_type" => self::CRAWL_TYPE,
+            "crawl_index" => self::CRAWL_INDEX,
             "restrict_sites_by_url" => self::RESTRICT_SITES_BY_URL,
             "allowed_sites" => self::ALLOWED_SITES,
             "disallowed_sites" => self::DISALLOWED_SITES,
@@ -423,10 +527,13 @@ class QueueServer implements CrawlConstants
         $this->index_archive = NULL;

         gc_collect_cycles(); // garbage collect old crawls
-        $this->web_queue = new WebQueueBundle(
-            CRAWL_DIR.'/cache/'.self::queue_base_name.
-            $this->crawl_time, URL_FILTER_SIZE,
-                NUM_URLS_QUEUE_RAM, $min_or_max);
+
+        if($this->crawl_type == self::WEB_CRAWL) {
+            $this->web_queue = new WebQueueBundle(
+                CRAWL_DIR.'/cache/'.self::queue_base_name.
+                $this->crawl_time, URL_FILTER_SIZE,
+                    NUM_URLS_QUEUE_RAM, $min_or_max);
+        }

         if(!file_exists(
             CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time)) {
@@ -454,8 +561,10 @@ class QueueServer implements CrawlConstants
         }

         // chmod so web server can also write to these directories
-        $this->db->setWorldPermissionsRecursive(
-            CRAWL_DIR.'/cache/'.self::queue_base_name.$this->crawl_time);
+        if($this->crawl_type == self::WEB_CRAWL) {
+            $this->db->setWorldPermissionsRecursive(
+                CRAWL_DIR.'/cache/'.self::queue_base_name.$this->crawl_time);
+        }
         $this->db->setWorldPermissionsRecursive(
             CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time);
         // initialize, store the description of this crawl in the index archive
@@ -793,23 +902,7 @@ class QueueServer implements CrawlConstants
      */
     function processDataArchive($file)
     {
-        crawlLog("Processing File: $file");
-
-        $fh = fopen($file, "rb");
-        $machine_string = fgets($fh);
-        $len = strlen($machine_string);
-        if($len > 0) {
-            $machine_info = unserialize(base64_decode($machine_string));
-        }
-        $sites = unserialize(gzuncompress(base64_decode(
-            urldecode(fread($fh, filesize($file) - $len))
-            )));
-        fclose($fh);
-
-        if(isset($machine_info[self::MACHINE])) {
-            $this->most_recent_fetcher = & $machine_info[self::MACHINE];
-            unset($machine_info);
-        }
+        $sites = & $this->getDataArchiveFileData($file);

         crawlLog("...Updating Delayed Hosts Array ...");
         $start_time = microtime();
@@ -905,6 +998,15 @@ class QueueServer implements CrawlConstants

         unlink($file);

+
+        $this->writeCrawlStatus($sites);
+    }
+
+    /**
+     *
+     */
+    function writeCrawlStatus(&$sites)
+    {
         $crawl_status = array();
         $stat_file = CRAWL_DIR."/schedules/crawl_status.txt";
         if(file_exists($stat_file) ) {
@@ -939,7 +1041,6 @@ class QueueServer implements CrawlConstants
                 crawlLog("URL: $url");
             }
         }
-
     }

     /**
@@ -952,6 +1053,24 @@ class QueueServer implements CrawlConstants
         $this->web_queue->differenceSeenUrls($sites, 0);
     }

+    /**
+     *
+     */
+    function calculateScheduleMetaInfo($schedule_time)
+    {
+        $sites = array();
+        $sites[self::CRAWL_TIME] = $this->crawl_time;
+        $sites[self::SCHEDULE_TIME] = $schedule_time;
+        $sites[self::SAVED_CRAWL_TIMES] =  $this->getCrawlTimes();
+            // fetcher should delete any crawl time not listed here
+        $sites[self::CRAWL_ORDER] = $this->crawl_order;
+        $sites[self::CRAWL_TYPE] = $this->crawl_type;
+        $sites[self::CRAWL_INDEX] = $this->crawl_index;
+        $sites[self::META_WORDS] = $this->meta_words;
+        $sites[self::SITES] = array();
+
+        return base64_encode(serialize($sites))."\n";
+    }

     /**
      * Produces a schedule.txt file of url data for a fetcher to crawl next.
@@ -976,16 +1095,10 @@ class QueueServer implements CrawlConstants

         $count = $this->web_queue->to_crawl_queue->count;

-        $sites = array();
-        $sites[self::CRAWL_TIME] = $this->crawl_time;
-        $sites[self::SCHEDULE_TIME] = time();
-        $sites[self::SAVED_CRAWL_TIMES] =  $this->getCrawlTimes();
-            // fetcher should delete any crawl time not listed here
-        $sites[self::CRAWL_ORDER] = $this->crawl_order;
-        $sites[self::SITES] = array();
-        $sites[self::META_WORDS] = $this->meta_words;
-        $first_line = base64_encode(serialize($sites))."\n";
+        $schedule_time = time();
+        $first_line = $this->calculateScheduleMetaInfo($schedule_time);

+        $sites = array();

         $delete_urls = array();
         $crawl_delay_hosts = array();
@@ -1017,10 +1130,10 @@ class QueueServer implements CrawlConstants
                 } else {

                     $next_slot = $this->getEarliestSlot($current_crawl_index,
-                        $sites[self::SITES]);
+                        $sites);

                     if($next_slot < MAX_FETCH_SIZE) {
-                        $sites[self::SITES][$next_slot] =
+                        $sites[$next_slot] =
                             array($url, $weight, 0);
                         $delete_urls[$i] = $url;
                         /* note don't add to seen url filter
@@ -1069,11 +1182,11 @@ class QueueServer implements CrawlConstants
                         && $num_waiting < MAX_WAITING_HOSTS)
                         || (isset($this->waiting_hosts[crawlHash($host_url)]) &&
                             $this->waiting_hosts[crawlHash($host_url) ] ==
-                            $sites[self::SCHEDULE_TIME])) {
+                            $schedule_time)) {

                         $this->waiting_hosts[crawlHash($host_url)] =
-                            $sites[self::SCHEDULE_TIME];
-                        $this->waiting_hosts[$sites[self::SCHEDULE_TIME]][] =
+                           $schedule_time;
+                        $this->waiting_hosts[$schedule_time][] =
                             crawlHash($host_url);
                         $request_batches_per_delay =
                             ceil($delay/$time_per_request_guess);
@@ -1089,9 +1202,9 @@ class QueueServer implements CrawlConstants

                         if(($next_slot =
                             $this->getEarliestSlot( $next_earliest_slot,
-                                $sites[self::SITES])) < MAX_FETCH_SIZE) {
+                                $sites)) < MAX_FETCH_SIZE) {
                             $crawl_delay_hosts[$host_url] = $next_slot;
-                            $sites[self::SITES][$next_slot] =
+                            $sites[$next_slot] =
                                 array($url, $weight, $delay);
                             $delete_urls[$i] = $url;
                             $this->web_queue->addSeenUrlFilter($url);
@@ -1104,9 +1217,9 @@ class QueueServer implements CrawlConstants
                     }
                 } else { // add a url no crawl delay
                     $next_slot = $this->getEarliestSlot(
-                        $current_crawl_index, $sites[self::SITES]);
+                        $current_crawl_index, $sites);
                     if($next_slot < MAX_FETCH_SIZE) {
-                        $sites[self::SITES][$next_slot] =
+                        $sites[$next_slot] =
                             array($url, $weight, 0);
                         $delete_urls[$i] = $url;
                         $this->web_queue->addSeenUrlFilter($url);
@@ -1133,28 +1246,28 @@ class QueueServer implements CrawlConstants
             $this->web_queue->removeQueue($delete_url);
         }

-        if(isset($sites[self::SITES]) && count($sites[self::SITES]) > 0 ) {
+        if(isset($sites) && count($sites) > 0 ) {
             $dummy_slot = array(self::DUMMY, 0.0, 0);
             /* dummy's are used for crawl delays of sites with longer delays
                when we don't have much else to crawl
              */
             $cnt = 0;
             for($j = 0; $j < MAX_FETCH_SIZE; $j++) {
-                if(isset( $sites[self::SITES][$j])) {
+                if(isset( $sites[$j])) {
                     $cnt++;
                     if($cnt == $fetch_size) {break; }
                 } else {
                     if($j % NUM_MULTI_CURL_PAGES == 0) {
-                        $sites[self::SITES][$j] = $dummy_slot;
+                        $sites[$j] = $dummy_slot;
                     }
                 }
             }
-            ksort($sites[self::SITES]);
+            ksort($sites);

             //write schedule to disk
             $fh = fopen(CRAWL_DIR."/schedules/schedule.txt", "wb");
             fwrite($fh, $first_line);
-            foreach($sites[self::SITES] as $site) {
+            foreach($sites as $site) {
                 list($url, $weight, $delay) = $site;
                 $out_string = base64_encode(
                     packFloat($weight).packInt($delay).$url)."\n";
@@ -1231,7 +1344,7 @@ class QueueServer implements CrawlConstants
     }

     /**
-     * Checks if the url belongs to one of the sites list in site_array
+     * Checks if the url belongs to one of the sites listed in site_array
      * Sites can be either given in the form domain:host or
      * in the form of a url in which case it is check that the site url
      * is a substring of the passed url.
diff --git a/configs/config.php b/configs/config.php
index 4567d0ac7..2bad1a1cd 100755
--- a/configs/config.php
+++ b/configs/config.php
@@ -162,7 +162,6 @@ define('MAXIMUM_CRAWL_DELAY', 64);
 /** maximum number of active crawl-delayed hosts */
 define('MAX_WAITING_HOSTS', 1000);

-

 /**
  * bloom filters are used to keep track of which urls are visited,
@@ -262,6 +261,7 @@ $PAGE_PROCESSORS = array(   "text/html" => "HtmlProcessor",
                             "text/asp" => "HtmlProcessor",
                             "text/xml" => "XmlProcessor",

+                            "application/xml" => "XmlProcessor",
                             "application/xhtml+xml" => "HtmlProcessor",

                             "application/rss+xml" => "RssProcessor",
@@ -283,7 +283,8 @@ $PAGE_PROCESSORS = array(   "text/html" => "HtmlProcessor",
                             "image/svg+xml"=> "SvgProcessor"
 );

-
+/** Characters we view as not part of words, not same as POSIX [:punct:]*/
+define ('PUNCT', "\.|\,|\:|\;|\"|\'|\`|\[|\]|\{|\}|\(|\)|\!|\||\&");

 /**
  * How many non robot urls the fetcher successfully downloads before
diff --git a/configs/default_crawl.ini b/configs/default_crawl.ini
index bb1577a8e..b428899bf 100644
--- a/configs/default_crawl.ini
+++ b/configs/default_crawl.ini
@@ -24,6 +24,7 @@
 ;
 [general]
 crawl_order = 'ad';
+crawl_type = 'ax';
 restrict_sites_by_url = false;

 [allowed_sites]
diff --git a/controllers/admin_controller.php b/controllers/admin_controller.php
index 9b912ea3b..45a36e4ad 100755
--- a/controllers/admin_controller.php
+++ b/controllers/admin_controller.php
@@ -250,7 +250,7 @@ class AdminController extends Controller implements CrawlConstants
                 $data = array_merge($data, $crawl_status);
             }
         }
-        $data['RECENT_CRAWLS'] = $this->crawlModel->getCrawlList();
+        $data['RECENT_CRAWLS'] = $this->crawlModel->getCrawlList(false, true);
         if(isset($data['CRAWL_TIME'])) {
             //erase from previous crawl list any active crawl
             $num_crawls = count($data['RECENT_CRAWLS']);
@@ -687,6 +687,12 @@ class AdminController extends Controller implements CrawlConstants
                     $info[self::STATUS] = "NEW_CRAWL";
                     $info[self::CRAWL_TIME] = time();
                     $seed_info = $this->crawlModel->getSeedInfo();
+                    $info[self::CRAWL_TYPE] =
+                        $seed_info['general']['crawl_type'];
+                    $info[self::CRAWL_INDEX] =
+                        (isset($seed_info['general']['crawl_index'])) ?
+                        $seed_info['general']['crawl_index'] :
+                        '';
                     $info[self::TO_CRAWL] =
                         $seed_info['seed_sites']['url'];
                     $info[self::CRAWL_ORDER] =
@@ -789,15 +795,22 @@ class AdminController extends Controller implements CrawlConstants
                         (getLocaleDirection() == 'ltr') ? "right": "left";
                     $data["ELEMENT"] = "crawloptionsElement";
                     $crawls = $this->crawlModel->getCrawlList();
+                    $indexes = $this->crawlModel->getCrawlList(true, true);
                     $update_flag = false;
                     $data['available_options'] = array(
                         tl('admin_controller_use_below'),
                         tl('admin_controller_use_defaults'));
+                    $data['available_crawl_indexes'] = array();
                     $data['options_default'] = tl('admin_controller_use_below');
                     foreach($crawls as $crawl) {
                         $data['available_options'][$crawl['CRAWL_TIME']] =
                             tl('admin_controller_previous_crawl')." ".
                             $crawl['DESCRIPTION'];
+
+                    }
+                    foreach($indexes as $crawl) {
+                        $data['available_crawl_indexes'][$crawl['CRAWL_TIME']]
+                            = $crawl['DESCRIPTION'];
                     }
                     $no_further_changes = false;
                     if(isset($_REQUEST['load_option']) &&
@@ -816,6 +829,34 @@ class AdminController extends Controller implements CrawlConstants
                     } else {
                         $seed_info = $this->crawlModel->getSeedInfo();
                     }
+                    if(!$no_further_changes && isset($_REQUEST['crawl_indexes'])
+                        && in_array($_REQUEST['crawl_indexes'],
+                        array_keys($data['available_crawl_indexes']))) {
+                        $seed_info['general']['crawl_index'] =
+                            $_REQUEST['crawl_indexes'];
+                        $update_flag = true;
+                    }
+                    $data['crawl_index'] =
+                        (isset($seed_info['general']['crawl_index'])) ?
+                        $seed_info['general']['crawl_index'] : '';
+                    $data['available_crawl_types'] = array(self::WEB_CRAWL,
+                        self::ARCHIVE_CRAWL);
+                    if(!$no_further_changes && isset($_REQUEST['crawl_type'])
+                        &&  in_array($_REQUEST['crawl_type'],
+                            $data['available_crawl_types'])) {
+                        $seed_info['general']['crawl_type'] =
+                            $_REQUEST['crawl_type'];
+                        $update_flag = true;
+                    }
+                    $data['crawl_type'] = $seed_info['general']['crawl_type'];
+                    if($data['crawl_type'] == self::WEB_CRAWL) {
+                        $data['web_crawl_active'] = "active";
+                        $data['archive_crawl_active'] = "";
+                    } else {
+                        $data['archive_crawl_active'] = "active";
+                        $data['web_crawl_active'] = "";
+                    }
+
                     $data['available_crawl_orders'] = array(
                         self::BREADTH_FIRST =>
                             tl('admin_controller_breadth_first'),
@@ -879,7 +920,13 @@ class AdminController extends Controller implements CrawlConstants
                         " elt('load-options').onchange = ".
                         "function() { if(elt('load-options').selectedIndex !=".
                         " 0) { elt('crawloptionsForm').submit();  }};";
-
+                    if($data['crawl_type'] == CrawlConstants::WEB_CRAWL) {
+                        $data['SCRIPT'] .=
+                            "switchTab('webcrawltab', 'archivetab');";
+                    } else {
+                        $data['SCRIPT'] .=
+                            "switchTab('archivetab', 'webcrawltab');";
+                    }
                     if($update_flag) {
                         $this->crawlModel->setSeedInfo($seed_info);
                         $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".
diff --git a/controllers/search_controller.php b/controllers/search_controller.php
index cc16f5ff0..79b8cc8ee 100755
--- a/controllers/search_controller.php
+++ b/controllers/search_controller.php
@@ -367,39 +367,42 @@ class SearchController extends Controller implements CrawlConstants
             exit();
         }

-
         $machine = $crawl_item[self::MACHINE];
         $machine_uri = $crawl_item[self::MACHINE_URI];
         $page = $crawl_item[self::HASH];
         $offset = $crawl_item[self::OFFSET];
         $cache_item = $this->crawlModel->getCacheFile($machine,
             $machine_uri, $generation, $offset,  $crawl_time);
-
         $cache_file = $cache_item[self::PAGE];

-        $request = $cache_item['REQUEST'];
-
-        $meta_words = array('link\:', 'site\:',
-            'filetype\:', 'info\:', '\-',
-            'index:', 'ip:', 'i:', 'weight:', 'w:', 'u:');
-        foreach($meta_words as $meta_word) {
-            $pattern = "/(\s)($meta_word(\S)+)/";
-            $query = preg_replace($pattern, "", $query);
-        }
-        $query = str_replace("'", " ", $query);
-        $query = str_replace('"', " ", $query);
-        $query = str_replace('\\', " ", $query);
-        $query = str_replace('|', " ", $query);
-        $query = $this->clean($query, "string");
-
-        $page_url = $url;
+        if(!stristr($cache_item[self::TYPE], "image")) {

-        $phrase_string = mb_ereg_replace("[[:punct:]]", " ", $query);
-        $words = mb_split(" ",$phrase_string);
-        if(!$highlight) {
+            $meta_words = array('link\:', 'site\:', 'version\:', 'modified\:',
+                'filetype\:', 'info\:', '\-', 'os\:', 'server\:', 'date\:',
+                'lang\:',
+                'index:', 'ip:', 'i:', 'weight:', 'w:', 'u:');
+            foreach($meta_words as $meta_word) {
+                $pattern = "/(\s)($meta_word(\S)+)/";
+                $query = preg_replace($pattern, "", $query);
+            }
+            $query = str_replace("'", " ", $query);
+            $query = str_replace('"', " ", $query);
+            $query = str_replace('\\', " ", $query);
+            $query = str_replace('|', " ", $query);
+            $query = $this->clean($query, "string");
+
+            $phrase_string = mb_ereg_replace("[[:punct:]]", " ", $query);
+            $words = mb_split(" ",$phrase_string);
+            if(!$highlight) {
+                $words = array();
+            }
+        } else {
+            $type = $cache_item[self::TYPE];
+            $cache_file = "<html><head><title>Yioop! Cache</title></head>".
+                "<body><object data='data:$type;base64,".
+                base64_encode($cache_file)."' type='$type' /></body></html>";
             $words = array();
         }
-
         $date = date ("F d Y H:i:s", $cache_item[self::TIMESTAMP]);

         $dom = new DOMDocument();
@@ -418,17 +421,22 @@ class SearchController extends Controller implements CrawlConstants
             $body =  $dom->getElementsByTagName('body')->item(0);
         }
         $first_child = $body->firstChild;
-
+        $preNode = $dom->createElement('pre');
+        $preNode = $body->insertBefore($preNode, $first_child);
         $divNode = $dom->createElement('div');
-        $divNode = $body->insertBefore($divNode, $first_child);
+        $divNode = $body->insertBefore($divNode, $preNode);
         $divNode->setAttributeNS("","style", "border-color: black; ".
             "border-style:solid; border-width:3px; ".
             "padding: 5px; background-color: white");

         $textNode = $dom->createTextNode(tl('search_controller_cached_version',
-            "$page_url", $date));
-        $textNode = $divNode->appendChild($textNode);
+            "$url", $date));
+        $divNode->appendChild($textNode);
+        if(isset($cache_item[self::HEADER])) {

+            $textNode = $dom->createTextNode($cache_item[self::HEADER]);
+            $preNode->appendChild($textNode);
+        }
         $body = $this->markChildren($body, $words, $dom);

         $newDoc = $dom->saveHTML();
diff --git a/controllers/settings_controller.php b/controllers/settings_controller.php
index 5456248b0..b476c0133 100755
--- a/controllers/settings_controller.php
+++ b/controllers/settings_controller.php
@@ -111,7 +111,7 @@ class SettingsController extends Controller
             $data['PER_PAGE_SELECTED'] = NUM_RESULTS_PER_PAGE;
         }

-        $crawls = $this->crawlModel->getCrawlList();
+        $crawls = $this->crawlModel->getCrawlList(false, true);
         $data['CRAWLS'] = array();
         foreach($crawls as $crawl) {
             $data['CRAWLS'][$crawl['CRAWL_TIME']] = $crawl['DESCRIPTION'].
diff --git a/css/search.css b/css/search.css
index 9fbf45143..ef537c8e0 100755
--- a/css/search.css
+++ b/css/search.css
@@ -614,6 +614,44 @@ p
 }


+.tabmenu-list
+{
+    border-bottom: 2px solid black;
+    padding: 0px;
+    margin-bottom: 0px;
+    z-index: 1;
+}
+.tabmenu-list li
+{
+    display: inline;
+    list-style-type: none;
+}
+
+.tabmenu-list a
+{
+    border: 1px solid black;
+    border-bottom: 0px;
+    padding: 5px 5px 0px 5px;
+    background-color:#EEE;
+    overflow: hidden;
+    margin: 0;
+    text-decoration: none;
+}
+
+.tabmenu-list a.active
+{
+    background-color:white;
+    border-bottom: 3px solid white;
+}
+
+.tabmenu-content
+{
+    border: 1px solid black;
+    border-top: none;
+    padding: 10px;
+    z-index: 2;
+}
+
 .crawlstable, .mixestable, .crawlstable th, .mixestable th,
 .crawlstable td, .mixestable td
 {
diff --git a/index.php b/index.php
index 8a31f6367..0a3911ae8 100755
--- a/index.php
+++ b/index.php
@@ -48,7 +48,6 @@ ini_set("memory_limit","500M");
 header("X-FRAME-OPTIONS: DENY"); //prevent click jacking
 session_name(SESSION_NAME);
 session_start();
-
 /**
  * Sets up DB to be used
  */
diff --git a/lib/archive_bundle_iterators/arc_archive_bundle_iterator.php b/lib/archive_bundle_iterators/arc_archive_bundle_iterator.php
new file mode 100644
index 000000000..28741d7eb
--- /dev/null
+++ b/lib/archive_bundle_iterators/arc_archive_bundle_iterator.php
@@ -0,0 +1,201 @@
+<?php
+/**
+ *  SeekQuarry/Yioop --
+ *  Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ *  Copyright (C) 2009, 2010, 2011  Chris Pollett chris@pollett.org
+ *
+ *  LICENSE:
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  END LICENSE
+ *
+ * @author Chris Pollett chris@pollett.org
+ * @package seek_quarry
+ * @subpackage iterator
+ * @license http://www.gnu.org/licenses/ GPL3
+ * @link http://www.seekquarry.com/
+ * @copyright 2009, 2010, 2011
+ * @filesource
+ */
+
+if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
+
+/**
+ *Loads base class for iterating
+ */
+require_once BASE_DIR.
+    '/lib/archive_bundle_iterators/archive_bundle_iterator.php';
+
+/**
+ * Used to iterate through the records of a collection of arc files stored in
+ * a WebArchiveBundle folder. Arc is the file format of the Internet Archive
+ * http://www.archive.org/web/researcher/ArcFileFormat.php. Iteration would be
+ * for the purpose making an index of these records
+ *
+ * @author Chris Pollett
+ * @package seek_quarry
+ * @subpackage iterator
+ * @see WebArchiveBundle
+ */
+class ArcArchiveBundleIterator implements CrawlConstants
+{
+    /**
+     * The number of arc files in this arc archive bundle
+     *  @var int
+     */
+    var $num_partitions;
+
+    /**
+     *  Counting in glob order for this arc archive bundle directory, the
+     *  current active file number of the arc file being process.
+     *
+     *  @var int
+     */
+    var $current_partition_num;
+    /**
+        current byte offset into the current arc file
+     *  @var int
+     */
+    var $current_offset;
+    /**
+     *  Array of filenames of arc files in this directory (glob order)
+     *  @var array
+     */
+    var $partitions;
+    /**
+     *  File handle for current arc file
+     *  @var resource
+     */
+    var $fh;
+
+    /**
+     * Creates a arc archive iterator with the given parameters.
+     *
+     * @param string $iterate_timestamp timestamp of the arc archive bundle to
+     *      iterate  over the pages of
+     * @param string $result_timestamp timestamp of the arc archive bundle
+     *      results are being stored in
+     */
+    function __construct($iterate_timestamp, $result_timestamp)
+    {
+        $this->iterate_timestamp = $iterate_timestamp;
+        $this->result_timestamp = $result_timestamp;
+        $archive_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $iterate_timestamp;
+        $this->partitions = array();
+        foreach(glob("$archive_name/*.arc.gz") as $filename) {
+            $this->partitions[] = $filename;
+        }
+        $this->num_partitions = count($this->partitions);
+
+        if(file_exists("$archive_name/iterate_status.txt")) {
+            $info = unserialize(file_get_contents(
+                "$archive_name/iterate_status.txt"));
+            $this->end_of_iterator = $info['end_of_iterator'];
+            $this->current_partition_num = $info['current_partition_num'];
+            $this->current_offset = $info['current_offset'];
+        } else {
+            $this->reset();
+        }
+
+        $this->fh=gzopen($this->partitions[$this->current_partition_num], "rb");
+        gzseek($this->fh, $this->current_offset);
+
+    }
+
+    /**
+     * Resets the iterator to the start of the archive bundle
+     */
+    function reset()
+    {
+        $this->current_partition_num = 0;
+        $this->end_of_iterator = false;
+        $this->current_offset = 0;
+        $archive_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $this->result_timestamp;
+        @unlink("$archive_name/iterate_status.txt");
+    }
+
+    /**
+     * Gets the next $num many docs from the iterator
+     * @param int $num number of docs to get
+     * @return array associative arrays for $num pages
+     */
+    function nextPages($num)
+    {
+        $pages = array();
+        for($i = 0; $i < $num; $i++) {
+            $page = $this->nextPage();
+            if(!$page) {
+                if(is_resource($this->fh)) {
+                    gzclose($this->fh);
+                }
+                $this->current_partition_num++;
+                if($this->current_partition_num >= $this->num_partitions) {
+                    $this->end_of_iterator = true;
+                    break;
+                }
+                $this->fh = gzopen(
+                    $this->partitions[$this->current_partition_num], "rb");
+            } else {
+                $pages[] = $page;
+            }
+        }
+        if(is_resource($this->fh)) {
+            $this->current_offset = gztell($this->fh);
+        }
+
+        $archive_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $this->result_timestamp;
+        $info = array();
+        $info['end_of_iterator'] = $this->end_of_iterator;
+        $info['current_partition_num'] = $this->current_partition_num;
+        $info['current_offset'] = $this->current_offset;
+        file_put_contents("$archive_name/iterate_status.txt",
+            serialize($info));
+        return $pages;
+    }
+
+
+    /**
+     * Gets the next doc from the iterator
+     * @return array associative array for doc
+     */
+    function nextPage()
+    {
+        if(!is_resource($this->fh)) return NULL;
+        do {
+            if(!$page_info = gzgets($this->fh) ) return NULL;
+            $info_parts = explode(" ", $page_info);
+            $num_parts = count($info_parts);
+            $length = $info_parts[$num_parts - 1];
+
+            if(!$object = gzread($this->fh, $length + 1)) return NULL;
+        } while(substr($page_info, 0, 3) == 'dns'); //ignore dns entries in arc
+        $site = array();
+        $site[self::URL] = $info_parts[0];
+        $site[self::IP_ADDRESSES] = array($info_parts[1]);
+        $site[self::TIMESTAMP] = date("U", strtotime($info_parts[2]));
+        $site[self::TYPE] = $info_parts[3];
+        $site_contents = FetchUrl::parseHeaderPage($object);
+        $site = array_merge($site, $site_contents);
+        $site[self::HASH] = FetchUrl::computePageHash($site[self::PAGE]);
+        $site[self::WEIGHT] = 1;
+        return $site;
+    }
+
+}
+?>
diff --git a/lib/archive_bundle_iterators/archive_bundle_iterator.php b/lib/archive_bundle_iterators/archive_bundle_iterator.php
new file mode 100644
index 000000000..938efae4c
--- /dev/null
+++ b/lib/archive_bundle_iterators/archive_bundle_iterator.php
@@ -0,0 +1,83 @@
+<?php
+/**
+ *  SeekQuarry/Yioop --
+ *  Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ *  Copyright (C) 2009, 2010, 2011  Chris Pollett chris@pollett.org
+ *
+ *  LICENSE:
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  END LICENSE
+ *
+ * @author Chris Pollett chris@pollett.org
+ * @package seek_quarry
+ * @subpackage iterator
+ * @license http://www.gnu.org/licenses/ GPL3
+ * @link http://www.seekquarry.com/
+ * @copyright 2009, 2010, 2011
+ * @filesource
+ */
+
+if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
+
+/** Loads common constants for web crawling*/
+require_once BASE_DIR."/lib/crawl_constants.php";
+
+/**
+ * Abstract class used to model iterating documents indexed in
+ * an WebArchiveBundle or set of such bundles.
+ *
+ *
+ * @author Chris Pollett
+ * @package seek_quarry
+ * @subpackage iterator
+ * @see WebArchiveBundle
+ */
+abstract class ArchiveBundleIterator implements CrawlConstants
+{
+
+
+    /**
+     * Timestamp of the archive that is being iterated over
+     * @var int
+     */
+     var $iterate_timestamp;
+
+    /**
+     * Timestamp of the archive that is being used to store results in
+     * @var int
+     */
+     var $result_timestamp;
+
+    /**
+     * Whether or not the iterator still has more documents
+     * @var bool
+     */
+     var $end_of_iterator;
+
+    /**
+     * Gets the next $num many docs from the iterator
+     * @param int $num number of docs to get
+     * @return array associative arrays for $num pages
+     */
+    abstract function nextPages($num);
+
+    /**
+     * Resets the iterator to the start of the archive bundle
+     */
+    abstract function reset();
+}
+?>
diff --git a/lib/archive_bundle_iterators/web_archive_bundle_iterator.php b/lib/archive_bundle_iterators/web_archive_bundle_iterator.php
new file mode 100644
index 000000000..83861f537
--- /dev/null
+++ b/lib/archive_bundle_iterators/web_archive_bundle_iterator.php
@@ -0,0 +1,196 @@
+<?php
+/**
+ *  SeekQuarry/Yioop --
+ *  Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ *  Copyright (C) 2009, 2010, 2011  Chris Pollett chris@pollett.org
+ *
+ *  LICENSE:
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  END LICENSE
+ *
+ * @author Chris Pollett chris@pollett.org
+ * @package seek_quarry
+ * @subpackage iterator
+ * @license http://www.gnu.org/licenses/ GPL3
+ * @link http://www.seekquarry.com/
+ * @copyright 2009, 2010, 2011
+ * @filesource
+ */
+
+if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
+
+/**
+ *Loads base class for iterating
+ */
+require_once BASE_DIR.
+    '/lib/archive_bundle_iterators/archive_bundle_iterator.php';
+
+/**
+ * Class used to model iterating documents indexed in
+ * an WebArchiveBundle. This would typically be for the purpose
+ * of re-indexing these documents.
+ *
+ * @author Chris Pollett
+ * @package seek_quarry
+ * @subpackage iterator
+ * @see WebArchiveBundle
+ */
+class WebArchiveBundleIterator implements CrawlConstants
+{
+
+    /**
+     * Number of web archive objects in this web archive bundle
+     * @var int
+     */
+    var $num_partitions;
+    /**
+     * The current web archive in the bundle that is being iterated over
+     * @var int
+     */
+    var $partition;
+    /**
+     * The item within the current partition to be returned next
+     * @var int
+     */
+    var $partition_index;
+    /**
+     * Index of web archive in the web archive bundle that the iterator is
+     * currently getting results from
+     * @var int
+     */
+    var $current_partition_num;
+    /**
+     * Index between 0 and $this->count of where the iterator is at
+     * @var int
+     */
+    var $overall_index;
+    /**
+     * Number of documents in the web archive bundle being iterated over
+     * @var int
+     */
+    var $count;
+    /**
+     * The web archive bundle being iterated over
+     * @var object
+     */
+    var $archive;
+
+    /**
+     * Creates a web archive iterator with the given parameters.
+     *
+     * @param string $iterate_timestamp timestamp of the web archive bundle to
+     *      iterate  over the pages of
+     * @param string $result_timestamp timestamp of the web archive bundle
+     *      results are being stored in
+     */
+    function __construct($iterate_timestamp, $result_timestamp)
+    {
+        $this->iterate_timestamp = $iterate_timestamp;
+        $this->result_timestamp = $result_timestamp;
+        $archive_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $iterate_timestamp;
+        $this->archive = new WebArchiveBundle($archive_name);
+        $archive_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $result_timestamp;
+        if(file_exists("$archive_name/iterate_status.txt")) {
+            $info = unserialize(file_get_contents(
+                "$archive_name/iterate_status.txt"));
+            $this->count = $this->archive->count;
+            $this->num_partitions = $this->archive->write_partition+1;
+            $this->overall_index = $info['overall_index'];
+            $this->end_of_iterator = $info['end_of_iterator'];
+            $this->partition_index = $info['partition_index'];
+            $this->current_partition_num = $info['current_partition_num'];
+            $this->partition =  $this->archive->getPartition(
+                    $this->current_partition_num, false);
+            $this->partition->iterator_pos = $info['iterator_pos'];
+        } else {
+            $this->reset();
+        }
+
+    }
+
+    /**
+     * Gets the next $num many docs from the iterator
+     *
+     * @param int $num number of docs to get
+     * @return array associative arrays for $num pages
+     */
+    function nextPages($num)
+    {
+        if($num + $this->overall_index >= $this->count) {
+            $num = max($this->count - $this->overall_index, 0);
+        }
+        $num_to_get = 1;
+        $objects = array();
+        for($i = 0; $i < $num; $i += $num_to_get) {
+            $num_to_get = min($num, $this->partition->count -
+                $this->partition_index);
+            $pre_new_objects = $this->partition->nextObjects($num_to_get);
+            foreach($pre_new_objects as $object) {
+                $objects[] = $object[1];
+            }
+
+            $this->overall_index += $num_to_get;
+            $this->partition_index += $num_to_get;
+            if($num_to_get <= 0) {
+                $this->current_partition_num++;
+                $this->partition = $this->archive->getPartition(
+                    $this->current_partition_num, false);
+                $this->partition_index = 0;
+            }
+            if($this->current_partition_num > $this->num_partitions) break;
+        }
+        $this->end_of_iterator = ($this->overall_index >= $this->count ) ?
+            true : false;
+
+        $archive_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $this->result_timestamp;
+        $info = array();
+        $info['overall_index'] = $this->overall_index;
+        $info['end_of_iterator'] = $this->end_of_iterator;
+        $info['partition_index'] = $this->partition_index;
+        $info['current_partition_num'] = $this->current_partition_num;
+        $info['iterator_pos'] =$this->partition->iterator_pos;
+        file_put_contents("$archive_name/iterate_status.txt",
+            serialize($info));
+
+        return $objects;
+    }
+
+    /**
+     * Resets the iterator to the start of the archive bundle
+     */
+    function reset()
+    {
+        $this->count = $this->archive->count;
+        $this->num_partitions = $this->archive->write_partition+1;
+        $this->overall_index = 0;
+        $this->end_of_iterator = ($this->overall_index >= $this->count) ?
+            true : false;
+        $this->partition_index = 0;
+        $this->current_partition_num = 0;
+        $this->partition = $this->archive->getPartition(
+            $this->current_partition_num, false);
+        $this->partition->reset();
+        $archive_name = CRAWL_DIR.'/cache/'.self::archive_base_name.
+            $this->result_timestamp;
+        @unlink("$archive_name/iterate_status.txt");
+    }
+
+}
+?>
diff --git a/lib/crawl_constants.php b/lib/crawl_constants.php
index dd82fb006..a668e37c7 100644
--- a/lib/crawl_constants.php
+++ b/lib/crawl_constants.php
@@ -142,6 +142,16 @@ interface CrawlConstants
     const BOOST = 'av';
     const IP_ADDRESSES = 'au';
     const JUST_METAS = 'aw';
+    const WEB_CRAWL = 'ax';
+    const ARCHIVE_CRAWL = 'ay';
+    const CRAWL_TYPE = 'az';
+    const CRAWL_INDEX = 'ba';
+    const HEADER = 'bb';
+    const SERVER = 'bc';
+    const SERVER_VERSION = 'bd';
+    const OPERATING_SYSTEM = 'be';
+    const MODIFIED = 'bf';
+    const LANG = 'bg';

     const NEEDS_OFFSET_FLAG = 0x7FFFFFFF;

diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 2493b18d2..e53fda044 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -91,6 +91,7 @@ class FetchUrl implements CrawlConstants
             curl_setopt($sites[$i][0], CURLOPT_RETURNTRANSFER, true);
             curl_setopt($sites[$i][0], CURLOPT_CONNECTTIMEOUT, PAGE_TIMEOUT);
             curl_setopt($sites[$i][0], CURLOPT_TIMEOUT, PAGE_TIMEOUT);
+            curl_setopt($sites[$i][0], CURLOPT_HEADER, true);
             curl_setopt($sites[$i][0], CURLOPT_HTTPHEADER,
                 array('Range: bytes=0-'.PAGE_RANGE_REQUEST));
             curl_multi_add_handle($agent_handler, $sites[$i][0]);
@@ -128,9 +129,21 @@ class FetchUrl implements CrawlConstants
             $ip_addresses = self::getCurlIp($ip_holder[$i]);
             fclose($ip_holder[$i]);
             if($sites[$i][0]) {
-
                 // Get Data and Message Code
                 $content = @curl_multi_getcontent($sites[$i][0]);
+
+                if(isset($content)) {
+                    $site = self::parseHeaderPage($content, $value);
+                    $sites[$i] = array_merge($sites[$i], $site);
+                    /*
+                       Store Data into our $sites array, create a hash for
+                       deduplication purposes
+                     */
+                    $sites[$i][$hash] =
+                        self::computePageHash($sites[$i][$value]);
+
+                }
+
                 $sites[$i][self::HTTP_CODE] =
                     curl_getinfo($sites[$i][0], CURLINFO_HTTP_CODE);
                 if(!$sites[$i][self::HTTP_CODE]) {
@@ -141,35 +154,6 @@ class FetchUrl implements CrawlConstants
                 } else {
                     $sites[$i][self::IP_ADDRESSES] = array("0.0.0.0");
                 }
-                /*
-                   Store Data into our $sites array, create a hash for
-                   deduplication purposes
-                 */
-                if(isset($content)) {
-                    $sites[$i][$value] =
-                        mb_substr($content, 0, PAGE_RANGE_REQUEST);
-                    /* to do dedup we strip script, noscript, and style tags
-                       as well as their content, then we strip tags, get rid
-                       of whitespace and hash
-                     */
-                    $strip_array =
-                        array('@<script[^>]*?>.*?</script>@si',
-                            '@<noscript[^>]*?>.*?</noscript>@si',
-                            '@<style[^>]*?>.*?</style>@si');
-                    $dedup_string = preg_replace(
-                        $strip_array, '', $sites[$i][$value]);
-                    $dedup_string_old = preg_replace(
-                        '/\W+/', '', $dedup_string);
-                    $dedup_string = strip_tags($dedup_string_old);
-                    if($dedup_string == "") {
-                        $dedup_string = $dedup_string_old;
-                    }
-                    $dedup_string = preg_replace(
-                        '/\W+/', '', $dedup_string);
-
-                    $sites[$i][$hash] = crawlHash($dedup_string, true);
-
-                }

                 //Get Time, Mime type and Character encoding
                 $sites[$i][self::TIMESTAMP] = time();
@@ -207,6 +191,103 @@ class FetchUrl implements CrawlConstants
         return $sites;
     }

+    /**
+     * Computes a hash of a string containing page data for use in
+     * deduplication of pages with similar content
+     *
+     *  @param string &$page  web page data
+     *  @return string 8 byte hash to identify page contents
+     */
+    public static function computePageHash(&$page)
+    {
+        /* to do dedup we strip script, noscript, and style tags
+           as well as their content, then we strip tags, get rid
+           of whitespace and hash
+         */
+        $strip_array =
+            array('@<script[^>]*?>.*?</script>@si',
+                '@<noscript[^>]*?>.*?</noscript>@si',
+                '@<style[^>]*?>.*?</style>@si');
+        $dedup_string = preg_replace(
+            $strip_array, '', $page);
+        $dedup_string_old = preg_replace(
+            '/\W+/', '', $dedup_string);
+        $dedup_string = strip_tags($dedup_string_old);
+        if($dedup_string == "") {
+            $dedup_string = $dedup_string_old;
+        }
+        $dedup_string = preg_replace(
+            '/\W+/', '', $dedup_string);
+
+        return crawlHash($dedup_string, true);
+    }
+
+    /**
+     *  Splits an http response document into the http headers sent
+     *  and the web page returned. Parses out useful information from
+     *  the header and return an array of these two parts and the useful info.
+     *
+     *  @param string &$header_and_page
+     *  @param string $value
+     *  @return array info array consisting of a header, page for an http
+     *      response, as well as parsed from the header the server, server
+     *      version, operating system, encoding, and date information.
+     */
+    public static function parseHeaderPage(&$header_and_page,
+        $value=CrawlConstants::PAGE)
+    {
+        $CRLFCRLF = strpos($header_and_page, "\x0D\x0A\x0D\x0A");
+        $LFLF = strpos($header_and_page, "\x0A\x0A");
+
+        $header_offset = ($CRLFCRLF > 0) ? $CRLFCRLF : $LFLF;
+            //either two CRLF (what spec says) or two LF's to be safe
+        $site = array();
+        $site[CrawlConstants::HEADER] =
+            substr($header_and_page, 0, $header_offset);
+
+        $site[$value] = ltrim(substr($header_and_page, $header_offset));
+        $lines = explode("\n", $site[CrawlConstants::HEADER]);
+        $first_line = array_shift($lines);
+        $response = preg_split("/(\s+)/", $first_line);
+        $site[CrawlConstants::HTTP_CODE] = @trim($response[1]);
+        foreach($lines as $line) {
+            $line = trim($line);
+            if(stristr($line, 'Server:')) {
+                $server_parts = explode("Server:", $line);
+                $server_name_parts = explode("/", $server_parts[1]);
+                $site[CrawlConstants::SERVER] = @trim($server_name_parts[0]);
+                if(isset($server_name_parts[1])) {
+                    $version_parts = explode("(", $server_name_parts[1]);
+                    $site[CrawlConstants::SERVER_VERSION] =
+                        @trim($version_parts[0]);
+                    if(isset($version_parts[1])) {
+                        $os_parts = explode(")", $version_parts[1]);
+                        $site[CrawlConstants::OPERATING_SYSTEM] =
+                            @trim($os_parts[0]);
+                    }
+                }
+            }
+            if(stristr($line, 'charset=')) {
+                $line_parts = explode("charset=", $line);
+                $site[CrawlConstants::ENCODING] = @trim($line_parts[1]);
+            }
+            if(stristr($line, 'Last-Modified:')) {
+                $line_parts = explode("Last-Modified:", $line);
+                $site[CrawlConstants::MODIFIED] =
+                    strtotime(@trim($line_parts[1]));
+            }
+
+        }
+        if(!isset($site[CrawlConstants::ENCODING]) ) {
+            $site[CrawlConstants::ENCODING] =
+                mb_detect_encoding($site[$value], 'auto');
+        }
+        if(!isset($site[CrawlConstants::SERVER]) ) {
+            $site[CrawlConstants::SERVER] = "unknown";
+        }
+        return $site;
+    }
+
     /**
      * Computes the IP address from a file pointer assumed to be pointing
      * at STDERR output from a curl request
@@ -242,7 +323,7 @@ class FetchUrl implements CrawlConstants
         curl_setopt($agent, CURLOPT_USERAGENT, USER_AGENT);
         curl_setopt($agent, CURLOPT_URL, $site);
         curl_setopt($agent, CURLOPT_AUTOREFERER, true);
-          curl_setopt($agent, CURLOPT_FOLLOWLOCATION, true);
+        curl_setopt($agent, CURLOPT_FOLLOWLOCATION, true);

         curl_setopt($agent, CURLOPT_RETURNTRANSFER, true);
         curl_setopt($agent, CURLOPT_FAILONERROR, true);
diff --git a/lib/index_archive_bundle.php b/lib/index_archive_bundle.php
index b80186bfb..48498ce3b 100644
--- a/lib/index_archive_bundle.php
+++ b/lib/index_archive_bundle.php
@@ -143,7 +143,7 @@ class IndexArchiveBundle implements CrawlConstants
      * @param int $num_partitions_summaries number of WebArchive partitions
      *      to use in the summmaries WebArchiveBundle
      * @param string $description a text name/serialized info about this
-     * IndexArchiveBundle
+     *      IndexArchiveBundle
      */
     public function __construct($dir_name, $read_only_archive = true,
         $description = NULL, $num_docs_per_generation = NUM_DOCS_PER_GENERATION)
@@ -406,18 +406,34 @@ class IndexArchiveBundle implements CrawlConstants


     /**
-     * Gets the description, count of summaries, and number of partions of the
-     * summaries store in the supplied directory
+     * Gets the description, count of summaries, and number of partitions of the
+     * summaries store in the supplied directory. If the file arctype.txt
+     * exist, this is view as a dummy index archive for the sole purpose of
+     * allowing conversions of downloaded data such as arc files into
+     * Yioop! format.
      *
      * @param string path to a directory containing a summaries WebArchiveBundle
      * @return array summary of the given archive
      */
     public static function getArchiveInfo($dir_name)
     {
+        if(file_exists($dir_name."/arc_description.txt")) {
+            $crawl = array();
+            $info = array();
+            $crawl['DESCRIPTION'] = substr(
+                file_get_contents($dir_name."/arc_description.txt"), 0, 256);
+            $crawl['ARCFILE'] = true;
+            $info['VISITED_URLS_COUNT'] = 0;
+            $info['COUNT'] = 0;
+            $info['NUM_DOCS_PER_PARTITION'] = 0;
+            $info['WRITE_PARTITION'] = 0;
+            $info['DESCRIPTION'] = serialize($crawl);
+
+            return $info;
+        }
         return WebArchiveBundle::getArchiveInfo($dir_name."/summaries");
     }


 }
 ?>
-
diff --git a/lib/index_bundle_iterators/group_iterator.php b/lib/index_bundle_iterators/group_iterator.php
index d7f5dc8af..8f2669f25 100644
--- a/lib/index_bundle_iterators/group_iterator.php
+++ b/lib/index_bundle_iterators/group_iterator.php
@@ -110,7 +110,6 @@ class GroupIterator extends IndexBundleIterator
      *
      * @param object $index_bundle_iterator to use as a source of documents
      *      to iterate over
-
      */
     function __construct($index_bundle_iterator)
     {
diff --git a/lib/phrase_parser.php b/lib/phrase_parser.php
index fc8bcdddf..4fb1a501a 100755
--- a/lib/phrase_parser.php
+++ b/lib/phrase_parser.php
@@ -34,9 +34,12 @@
 if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}

 /**
- *  Load the stem word function, if necessary
+ *  Load the stem word functions, if necessary
  */
-require_once BASE_DIR."/lib/porter_stemmer.php";
+foreach(glob(BASE_DIR."/lib/stemmers/*_stemmer.php")
+    as $filename) {
+    require_once $filename;
+}

 /**
  * Reads in constants used as enums used for storing web sites
@@ -54,19 +57,28 @@ require_once BASE_DIR."/lib/crawl_constants.php";
  */
 class PhraseParser
 {
+    /**
+     * Language tags and their corresponding stemmer
+     * @var array
+     */
+     static $STEMMERS = array(
+        'en' => "EnStemmer",
+        'en-US' => "EnStemmer",
+        'en-GB' => "EnStemmer",
+        'en-CA' => "EnStemmer",
+     );
     /**
      * Converts a summary of a web page into a string of space separated words
      *
-     * @param array $page associateive array of page summary data. Contains
+     * @param array $page associative array of page summary data. Contains
      *      title, description, and links fields
      * @return string the concatenated words extracted from the page summary
      */
     static function extractWordStringPageSummary($page)
     {
-        $punct = "\.|\,|\:|\;|\"|\'|\`|\[|\]|\{|\}|\(|\)|\!|\|";
-        $title_phrase_string = mb_ereg_replace($punct, " ",
+        $title_phrase_string = mb_ereg_replace(PUNCT, " ",
             $page[CrawlConstants::TITLE]);
-        $description_phrase_string = mb_ereg_replace($punct, " ",
+        $description_phrase_string = mb_ereg_replace(PUNCT, " ",
             $page[CrawlConstants::DESCRIPTION]);

         $page_string = $title_phrase_string . " " . $description_phrase_string;
@@ -81,17 +93,18 @@ class PhraseParser
      *
      * @param string $string subject to extract phrases from
      * @param int $len longest length of phrases to consider
+     * @param string $lang locale tag for stemming
      * @return array pairs of the form (phrase, number of occurrences)
      */
     static function extractPhrasesAndCount($string,
-        $len =  MAX_PHRASE_LEN)
+        $len =  MAX_PHRASE_LEN, $lang = NULL)
     {
-
         $phrases = array();

         for($i = 0; $i < $len; $i++) {
             $phrases =
-                array_merge($phrases,self::extractPhrasesOfLength($string, $i));
+                array_merge($phrases,
+                    self::extractPhrasesOfLength($string, $i, $lang));
         }

         $phrase_counts = array_count_values($phrases);
@@ -105,15 +118,17 @@ class PhraseParser
      *
      * @param string $string subject to extract phrases from
      * @param int $len length of phrases to consider
+     * @param string $lang locale tag for stemming
      * @return array pairs of the form (phrase, number of occurrences)
      */
-    static function extractPhrasesOfLength($string, $phrase_len)
+    static function extractPhrasesOfLength($string, $phrase_len, $lang = NULL)
     {
         $phrases = array();

         for($i = 0; $i < $phrase_len; $i++) {
             $phrases = array_merge($phrases,
-                self::extractPhrasesOfLengthOffset($string, $phrase_len, $i));
+                self::extractPhrasesOfLengthOffset($string,
+                    $phrase_len, $i, $lang));
         }

         return $phrases;
@@ -128,17 +143,21 @@ class PhraseParser
      * @param string $string subject to extract phrases from
      * @param int $len length of phrases to consider
      * @param int $offset the first word to begin with
+     * @param string $lang locale tag for stemming
      * @return array pairs of the form (phrase, number of occurrences)
      */
     static function extractPhrasesOfLengthOffset($string,
-        $phrase_len, $offset)
+        $phrase_len, $offset, $lang = NULL)
     {
-        $punct = "\.|\,|\:|\;|\"|\'|\`|\[|\]|\{|\}|\(|\)|\!|\||\&";
-        $words = mb_split("[[:space:]]|".$punct, $string);
+        $words = mb_split("[[:space:]]|".PUNCT, $string);

         $stems = array();

-
+        if(isset(self::$STEMMERS[$lang])) {
+            $stemmer = self::$STEMMERS[$lang];
+        } else {
+            $stemmer = NULL;
+        }
         for($i = $offset; $i < count($words); $i++) {
             if($words[$i] == "") {continue;}

@@ -149,8 +168,9 @@ class PhraseParser
             }
             $pre_stem = mb_strtolower($words[$i]);

-            if(strlen($pre_stem) == mb_strlen($pre_stem)) {
-                $stem = PorterStemmer::stem($pre_stem);
+
+            if($stemmer != NULL) {
+                $stem = $stemmer::stem($pre_stem);
             } else {
                 $stem = $pre_stem;
             }
diff --git a/lib/processors/html_processor.php b/lib/processors/html_processor.php
index 9aef48aff..732000193 100755
--- a/lib/processors/html_processor.php
+++ b/lib/processors/html_processor.php
@@ -72,10 +72,11 @@ class HtmlProcessor extends TextProcessor
         if(is_string($page)) {
             $dom = self::dom($page);
             if($dom !==false && self::checkMetaRobots($dom)) {
+                $summary[self::LANG] = self::lang($dom);
                 $summary[self::TITLE] = self::title($dom);
                 $summary[self::DESCRIPTION] = self::description($dom);
                 $summary[self::LINKS] = self::links($dom, $url);
-
+                $summary[self::PAGE] = $page;
                 if(strlen($summary[self::DESCRIPTION] . $summary[self::TITLE])
                     == 0 && count($summary[self::LINKS]) == 0) {
                     //maybe not html? treat as text still try to get urls
@@ -88,9 +89,7 @@ class HtmlProcessor extends TextProcessor

     }

-    static function processDom($dom, $url)
-    {
-    }
+

     /**
      * Return a document object based on a string containing the contents of
@@ -132,6 +131,23 @@ class HtmlProcessor extends TextProcessor
         return true;
     }

+    /**
+     *  Determines the language of the html document by looking at the root
+     *  language attribute
+     *
+     *  @param object $dom - a document object to check the language of
+     *
+     *  @return string language tag for guessed language
+
+     */
+    static function lang($dom)
+    {
+        $xpath = new DOMXPath($dom);
+        $html = $xpath->evaluate("/html");
+        $lang = $html->item(0)->getAttribute('lang');
+        return $lang;
+    }
+
     /**
      *  Returns html head title of a webpage based on its document object
      *
diff --git a/lib/processors/rss_processor.php b/lib/processors/rss_processor.php
index 6b908b9b6..0ea3481b4 100644
--- a/lib/processors/rss_processor.php
+++ b/lib/processors/rss_processor.php
@@ -73,6 +73,7 @@ class RssProcessor extends TextProcessor
             $dom = self::dom($page);

             if($dom !==false) {
+                $summary[self::LANG] = self::lang($dom);
                 $summary[self::TITLE] = self::title($dom);
                 $summary[self::DESCRIPTION] = self::description($dom);
                 $summary[self::LINKS] = self::links($dom, $url);
@@ -84,12 +85,28 @@ class RssProcessor extends TextProcessor
                 }
             }
         }
-
         return $summary;

     }

+    /**
+     *  Determines the language of the rss document by looking at the channel
+     *  language tag
+     *
+     *  @param object $dom - a document object to check the language of
+     *
+     *  @return string language tag for guessed language

+     */
+    static function lang($dom)
+    {
+        $xpath = new DOMXPath($dom);
+        $languages = $xpath->evaluate("/rss/channel/language");
+        if($languages && is_object($languages)) {
+            return $languages->item(0)->textContent;
+        }
+        return NULL;
+    }

     /**
      * Return a document object based on a string containing the contents of
diff --git a/lib/processors/sitemap_processor.php b/lib/processors/sitemap_processor.php
index 7257b9332..f3be9eded 100644
--- a/lib/processors/sitemap_processor.php
+++ b/lib/processors/sitemap_processor.php
@@ -80,6 +80,9 @@ class SitemapProcessor extends TextProcessor
                     $summary = parent::process($page, $url);
                 }
                 $summary[self::JUST_METAS] = true;
+            } else {
+                $summary = parent::process($page, $url);
+                $summary[self::JUST_METAS] = true;
             }
         }

diff --git a/lib/processors/xml_processor.php b/lib/processors/xml_processor.php
index 0574d5e23..5278c876c 100644
--- a/lib/processors/xml_processor.php
+++ b/lib/processors/xml_processor.php
@@ -89,7 +89,6 @@ class XmlProcessor extends TextProcessor
             $root_name = isset($dom->documentElement->nodeName) ?
                 $dom->documentElement->nodeName : "";
             unset($dom);
-
             switch ($root_name)
             {
                 case "rss":
diff --git a/lib/porter_stemmer.php b/lib/stemmers/en_stemmer.php
similarity index 99%
rename from lib/porter_stemmer.php
rename to lib/stemmers/en_stemmer.php
index 50521d70f..54f57eb02 100755
--- a/lib/porter_stemmer.php
+++ b/lib/stemmers/en_stemmer.php
@@ -46,7 +46,7 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
  * @subpackage library
  */

-class PorterStemmer
+class EnStemmer
 {

     /**
diff --git a/locale/ar/configure.ini b/locale/ar/configure.ini
index 7341daf77..b2ca140e1 100755
--- a/locale/ar/configure.ini
+++ b/locale/ar/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/de/configure.ini b/locale/de/configure.ini
index 5a0ec1b10..20bb1e1f8 100755
--- a/locale/de/configure.ini
+++ b/locale/de/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/en-US/configure.ini b/locale/en-US/configure.ini
index 9a972dd4a..b4f5ac38b 100755
--- a/locale/en-US/configure.ini
+++ b/locale/en-US/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = "Use options below"
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = "Previous Crawl:"
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = "Breadth First"
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = "Page Importance"
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = "Updating Seed Site Info!"
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = "Select Crawl"
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = "Unnamed Crawl"
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = "Crawl Mix Created!"
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = "Mix to Delete Does not Exist!"
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = "Delete"
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = "Crawl Mix Changes Saved!"
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = "Setting Crawl To Use as Index"
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = "Mix to Delete Does not Exist!"
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = "Crawl Mix Deleted!"
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = "Select Locale"
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = "Locale Added!"
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = "Locale Does Not Exist!"
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = "Locale Deleted"
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = "Locale Strings Updated!"
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = "configs/config.php not web server writable."
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = "Work directory needs to be writable by web server. "
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = "php.ini directive post_max_size needs to be at least 16M"
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = "The following required items were missing: %s"
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = "The following optional items were missing: %s"
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = "Check Passed."
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = "Using configs/local_config.php so changing work directory above may not work."
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = "Work Directory Set! You may need to re-login!"
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = "Please Name Your robot"
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = "Working Directory and Profile Created!"
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = "Unable to Update config.php File!"
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = "Unable to Create Profile!"
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = "Work Directory is Invalid! Cannot Create Profile!"
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = "Work Directory is Invalid! Cannot Create Profile!"
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = "Problem Updating Database!"
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = "Profile Updated!"
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = "There was a Problem Updating Profile!"
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = "Please Describe Your Robot"
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = "Search Auxiliary Links Displayed"
 ; configure_element.php line: 204
 configure_element_cache_link = "Cache"
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = "Similar"
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = "Inlinks"
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = "IP address"
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = "Crawl Robot Set-up"
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = "Crawl Robot Name:"
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = "Robot Description"
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = "Submit"
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = "Back"
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = "Edit Crawl Options"
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = "Get Crawl Options From:"
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = "Web Crawl"
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = "Archive Crawl"
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = "Crawl Order:"
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = "Restrict Sites By Url:"
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = "Allowed To Crawl Sites"
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = "Disallowed Sites"
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = "Seed Sites"
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = "Crawl or Arc Folder to Re-index:"
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = "Meta Words"
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = "Word"
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = "URL Pattern"
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = "Word"
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = "URL Pattern"
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = "Save Options"
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = "Rel: %s "
 ; search_view.php line: 145
 search_view_score = "Score %s"
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = "Cached"
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = "View&nbsp;as&nbsp;text"
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = "Similar"
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = "Inlinks"
 ;
 ; settings_view.php line: 76
diff --git a/locale/en-US/statistics.txt b/locale/en-US/statistics.txt
index 5a165df53..b6bef56f0 100755
--- a/locale/en-US/statistics.txt
+++ b/locale/en-US/statistics.txt
@@ -1 +1 @@
-d:100;
\ No newline at end of file
+d:99;
\ No newline at end of file
diff --git a/locale/es/configure.ini b/locale/es/configure.ini
index ae51e0557..68c631b15 100755
--- a/locale/es/configure.ini
+++ b/locale/es/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/fr-FR/configure.ini b/locale/fr-FR/configure.ini
index 8595e2036..d94384292 100755
--- a/locale/fr-FR/configure.ini
+++ b/locale/fr-FR/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = "Pertinence: %s"
 ; search_view.php line: 145
 search_view_score = "Total: %s"
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = "En&nbsp;Cache"
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = "Version&nbsp;texte"
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = "Pages&nbsp;similaires"
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/he/configure.ini b/locale/he/configure.ini
index ec49de587..ebbfe4d2d 100755
--- a/locale/he/configure.ini
+++ b/locale/he/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/in-ID/configure.ini b/locale/in-ID/configure.ini
index eaa76a04e..0a4a6af5f 100755
--- a/locale/in-ID/configure.ini
+++ b/locale/in-ID/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = "Pilih name locale"
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = "Locale telah ditambah"
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = "Locale tidak ditemukan"
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = "Locale telah dihapus"
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/it/configure.ini b/locale/it/configure.ini
index cda233be8..3e58e164b 100755
--- a/locale/it/configure.ini
+++ b/locale/it/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/ja/configure.ini b/locale/ja/configure.ini
index b09a7258c..fce75371f 100755
--- a/locale/ja/configure.ini
+++ b/locale/ja/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = "幅優先"
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = "ページの重要性"
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = "シッド情報の更新"
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = "指数のための検索設定する。"
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = "選択ローケル"
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = "ローケルが追加しました"
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = "ローケルは存在しません"
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = "ローケルを削除しました"
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = "ローケルストリングを編集しました"
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = "作業ディレクトリの設定しました。もう一度ログインしてください。"
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = "ボット名を入力してください。"
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = "作業ディレクトリとプロフィールの作成しました。"
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = "config.phpファイルのできない。更新"
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = "プロフィールを作成できない。"
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = "無効な作業ディレクト。プロフィールを作成できない。"
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = "無効な作業ディレクト。プロフィールを作成できない。"
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = "ディータベースの更新ない"
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = "プロフィールの変更できました。"
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = "プロフィールの変更できない。"
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = "ロボットの説明してください。"
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = "検索ロボット設定"
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = "ロボット名"
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = "ロボット説明"
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = "サブミット"
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = "戻る"
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = "検索オプションの編集"
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = "検索の順序"
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = "URLで制限"
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = "検索ができます"
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = "検索はできません"
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = "シッドサイト"
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = "保存オプション"
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = "関連:%s"
 ; search_view.php line: 145
 search_view_score = "スコア %s"
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = "キャッシューしました。"
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = "テクストビュー"
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = "同じビュー"
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/ja/statistics.txt b/locale/ja/statistics.txt
index 2c43a0adb..d1d9ed5ab 100755
--- a/locale/ja/statistics.txt
+++ b/locale/ja/statistics.txt
@@ -1 +1 @@
-d:73;
\ No newline at end of file
+d:72;
\ No newline at end of file
diff --git a/locale/ko/configure.ini b/locale/ko/configure.ini
index 3e355fd94..26375e4ef 100755
--- a/locale/ko/configure.ini
+++ b/locale/ko/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = "너비 우선"
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = "페이지 중요성"
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = "씨드 사이트 업데이트"
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = "크롤을 인덱스로써 사용하기 지정"
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = "로케일을 선택하여 주십시요."
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = "로케일 추가!!"
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = "로케일이 존재하지 않습니다."
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = "로케일을 삭제 하였습니다."
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = "로케일 지정 문자열을 업데이트 하였습니다."
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = "작업 디렉토리가 지정 됐습니다. 다시 로그인이 필요할수 있습니다."
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = "로봇 이름을 정해 주십시요."
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = "작업 디렉토리와 프로필이 생성됐습니다."
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = "config.php 파일을  업데이트 실패했습니다."
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = "프로필을 생성할수 없습니다."
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = "작업 디렉토리가 올바르지 않습니다. 프로필을 생성할수 없습니다."
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = "작업 디렉토리가 올바르지 않습니다. 프로필을 생성할수 없습니다."
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = "데이터베이스를 업데이트하는데 문제가 발생했습니다."
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = "프로필을 업데이트 했습니다."
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = "프로필을 업데이트하는데 문제가 발생했습니다."
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = "당신의 로봇을 기술해 주십시요."
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = "크롤 로봇 설정"
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = "로봇 기술 "
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = "크롤 로봇 이름:"
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = "제출 "
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = "뒤"
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = "크롤 옵션들 편집"
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = "크롤 순서:"
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = "사이트들을 주소로 제한:"
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = "크롤을 허가한 사이트들"
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = "허가 하지않은 사이트들"
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = "씨드 사이트들"
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = "옵션들 저장하기"
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = "관련성: %s "
 ; search_view.php line: 145
 search_view_score = "점수 %s"
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = "캐시 됀것"
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = "일반 텍스트로써 보기"
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = "유사성"
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = "인링크"
 ;
 ; settings_view.php line: 76
diff --git a/locale/ko/statistics.txt b/locale/ko/statistics.txt
index 23fc7ec4f..77bbfe053 100755
--- a/locale/ko/statistics.txt
+++ b/locale/ko/statistics.txt
@@ -1 +1 @@
-d:75;
\ No newline at end of file
+d:74;
\ No newline at end of file
diff --git a/locale/pl/configure.ini b/locale/pl/configure.ini
index e3a01fb2f..2d54ef5d6 100755
--- a/locale/pl/configure.ini
+++ b/locale/pl/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/pt/configure.ini b/locale/pt/configure.ini
index a86ee5e49..358ef6893 100755
--- a/locale/pt/configure.ini
+++ b/locale/pt/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/ru/configure.ini b/locale/ru/configure.ini
index d1a985623..63e9d821d 100755
--- a/locale/ru/configure.ini
+++ b/locale/ru/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/th/configure.ini b/locale/th/configure.ini
index fc83e6e22..5183309b8 100755
--- a/locale/th/configure.ini
+++ b/locale/th/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = ""
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = ""
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = ""
 ; search_view.php line: 145
 search_view_score = ""
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = ""
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/vi-VN/configure.ini b/locale/vi-VN/configure.ini
index a8262559b..3b153b11d 100755
--- a/locale/vi-VN/configure.ini
+++ b/locale/vi-VN/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = "Bề rộng đầu ti&ecirc;n"
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = "Trang quan trọng"
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = "Cập nhật th&ocirc;ng tin trang web hạt giống"
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = "Thiết lập thu thập dữ liệu để sử dụng l&agrave;m chỉ mục"
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = "Chọn miền địa phương"
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = "Miền địa phương th&ecirc;m v&agrave;o"
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = "Miền địa phương kh&ocirc;ng tồn tại"
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = "X&oacute;a miền địa phương"
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = "Chuỗi Địa phương được cập nhật"
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = "C&ocirc;ng việc thiết lập thư mục bị đ&ocirc;ng cứng (Bạn c&oacute; thể cần phải đăng nhập)"
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = "Đặt t&ecirc;n cho r&ocirc; b&ocirc; của bạn"
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = "Thư mục l&agrave;m việc v&agrave; hồ sơ được tạo ra"
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = "Kh&ocirc;ng thể cập nhật hồ sơ config.php"
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = "Kh&ocirc;ng thể tạo hồ sơ"
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = "C&ocirc;ng t&aacute;c thư mục kh&ocirc;ng hợp lệ"
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = "C&ocirc;ng t&aacute;c thư mục kh&ocirc;ng hợp lệ"
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = "Vấn đề cập nhật cơ sở dữ liệu"
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = "Hồ sơ được cập nhật"
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = "C&oacute; sự trở ngaị về việc cập nhật hồ sơ "
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = "Diễn tả r&ocirc; b&ocirc; của bạn"
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = "M&ocirc; tả r&ocirc;-bốt"
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = "Trở lại"
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = "Lưu những lựa chọn"
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = "Th&iacute;ch hợp:"
 ; search_view.php line: 145
 search_view_score = "Điểm: %s"
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = "Trang&nbsp;gốc"
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = "Trang&nbsp;Web&nbsp;Bắng Chữ"
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = "Tương&nbsp;Tự"
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/locale/zh-CN/configure.ini b/locale/zh-CN/configure.ini
index 709186d4a..73aaa3def 100755
--- a/locale/zh-CN/configure.ini
+++ b/locale/zh-CN/configure.ini
@@ -154,109 +154,109 @@ admin_controller_use_below = ""
 ; admin_controller.php line: 798
 admin_controller_previous_crawl = ""
 ;
-; admin_controller.php line: 820
+; admin_controller.php line: 829
 admin_controller_breadth_first = "深度優先"
 ;
-; admin_controller.php line: 822
+; admin_controller.php line: 831
 admin_controller_page_importance = "網頁重要性"
 ;
-; admin_controller.php line: 885
+; admin_controller.php line: 894
 admin_controller_update_seed_info = ""
 ;
-; admin_controller.php line: 974
+; admin_controller.php line: 983
 admin_controller_select_crawl = ""
 ;
-; admin_controller.php line: 998
+; admin_controller.php line: 1007
 admin_controller_unnamed = ""
 ;
-; admin_controller.php line: 1003
+; admin_controller.php line: 1012
 admin_controller_mix_created = ""
 ;
-; admin_controller.php line: 1012
+; admin_controller.php line: 1021
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1034
+; admin_controller.php line: 1043
 editcrawl_view_delete = ""
 ;
-; admin_controller.php line: 1079
+; admin_controller.php line: 1088
 admin_controller_mix_saved = ""
 ;
-; admin_controller.php line: 1085
+; admin_controller.php line: 1094
 admin_controller_set_index = ""
 ;
-; admin_controller.php line: 1095
+; admin_controller.php line: 1104
 admin_controller_mix_doesnt_exists = ""
 ;
-; admin_controller.php line: 1103
+; admin_controller.php line: 1112
 admin_controller_mix_deleted = ""
 ;
-; admin_controller.php line: 1139
+; admin_controller.php line: 1148
 admin_controller_select_localename = ""
 ;
-; admin_controller.php line: 1182
+; admin_controller.php line: 1191
 admin_controller_locale_added = ""
 ;
-; admin_controller.php line: 1189
+; admin_controller.php line: 1198
 admin_controller_localename_doesnt_exists = ""
 ;
-; admin_controller.php line: 1198
+; admin_controller.php line: 1207
 admin_controller_localename_deleted = ""
 ;
-; admin_controller.php line: 1218
+; admin_controller.php line: 1227
 admin_controller_localestrings_updated = ""
 ;
-; admin_controller.php line: 1272
+; admin_controller.php line: 1281
 admin_controller_no_write_config_php = ""
 ;
-; admin_controller.php line: 1277
+; admin_controller.php line: 1286
 admin_controller_no_write_work_dir = ""
 ;
-; admin_controller.php line: 1282
+; admin_controller.php line: 1291
 admin_controller_post_size_small = ""
 ;
-; admin_controller.php line: 1288
+; admin_controller.php line: 1297
 admin_controller_missing_required = ""
 ;
-; admin_controller.php line: 1304
+; admin_controller.php line: 1313
 admin_controller_missing_optional = ""
 ;
-; admin_controller.php line: 1309
+; admin_controller.php line: 1318
 admin_controller_check_passed = ""
 ;
-; admin_controller.php line: 1314
+; admin_controller.php line: 1323
 admin_controller_using_local_config = ""
 ;
-; admin_controller.php line: 1384
+; admin_controller.php line: 1393
 admin_controller_configure_work_dir_set = ""
 ;
-; admin_controller.php line: 1396
+; admin_controller.php line: 1405
 admin_controller_name_your_bot = ""
 ;
-; admin_controller.php line: 1405
+; admin_controller.php line: 1414
 admin_controller_configure_work_profile_made = ""
 ;
-; admin_controller.php line: 1413
+; admin_controller.php line: 1422
 admin_controller_configure_no_set_config = ""
 ;
-; admin_controller.php line: 1424
+; admin_controller.php line: 1433
 admin_controller_configure_no_create_profile = ""
 ;
-; admin_controller.php line: 1433
+; admin_controller.php line: 1442
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1444
+; admin_controller.php line: 1453
 admin_controller_configure_work_dir_invalid = ""
 ;
-; admin_controller.php line: 1518
+; admin_controller.php line: 1528
 admin_controller_configure_no_change_db = ""
 ;
-; admin_controller.php line: 1532
+; admin_controller.php line: 1542
 admin_controller_configure_profile_change = ""
 ;
-; admin_controller.php line: 1546
+; admin_controller.php line: 1556
 admin_controller_configure_no_change_profile = ""
 ;
-; admin_controller.php line: 1582
+; admin_controller.php line: 1592
 admin_controller_describe_robot = ""
 ;
 ; search_controller.php line: 119
@@ -437,25 +437,25 @@ configure_element_search_results = ""
 ; configure_element.php line: 204
 configure_element_cache_link = ""
 ;
-; configure_element.php line: 209
+; configure_element.php line: 210
 configure_element_similar_link = ""
 ;
-; configure_element.php line: 214
+; configure_element.php line: 215
 configure_element_in_link = ""
 ;
-; configure_element.php line: 219
+; configure_element.php line: 220
 configure_element_ip_link = ""
 ;
-; configure_element.php line: 223
+; configure_element.php line: 224
 configure_element_crawl_robot = ""
 ;
-; configure_element.php line: 225
+; configure_element.php line: 226
 configure_element_robot_name = ""
 ;
-; configure_element.php line: 232
+; configure_element.php line: 233
 configure_element_robot_description = ""
 ;
-; configure_element.php line: 241
+; configure_element.php line: 242
 configure_element_submit = ""
 ;
 ; crawloptions_element.php line: 62
@@ -464,40 +464,49 @@ crawloptions_element_back_to_manage = ""
 ; crawloptions_element.php line: 64
 crawloptions_element_edit_crawl_options = ""
 ;
-; crawloptions_element.php line: 74
+; crawloptions_element.php line: 75
 crawloptions_element_load_options = ""
 ;
-; crawloptions_element.php line: 79
+; crawloptions_element.php line: 83
+crawloptions_element_web_crawl = ""
+;
+; crawloptions_element.php line: 87
+crawloptions_element_archive_crawl = ""
+;
+; crawloptions_element.php line: 92
 crawloptions_element_crawl_order = ""
 ;
-; crawloptions_element.php line: 85
+; crawloptions_element.php line: 98
 crawloptions_element_restrict_by_url = ""
 ;
-; crawloptions_element.php line: 92
+; crawloptions_element.php line: 105
 crawloptions_element_allowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 97
+; crawloptions_element.php line: 110
 crawloptions_element_disallowed_to_crawl = ""
 ;
-; crawloptions_element.php line: 103
+; crawloptions_element.php line: 116
 crawloptions_element_seed_sites = ""
 ;
-; crawloptions_element.php line: 108
+; crawloptions_element.php line: 123
+crawloptions_element_reindex_crawl = ""
+;
+; crawloptions_element.php line: 130
 crawloptions_element_meta_words = ""
 ;
-; crawloptions_element.php line: 110
+; crawloptions_element.php line: 132
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 112
+; crawloptions_element.php line: 134
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 118
+; crawloptions_element.php line: 140
 crawloptions_element_word = ""
 ;
-; crawloptions_element.php line: 124
+; crawloptions_element.php line: 146
 crawloptions_element_url_pattern = ""
 ;
-; crawloptions_element.php line: 143
+; crawloptions_element.php line: 165
 crawloptions_element_save_options = ""
 ;
 ; editlocales_element.php line: 62
@@ -786,16 +795,16 @@ search_view_relevancy = "關聯度:  %s 趴"
 ; search_view.php line: 145
 search_view_score = "分數"
 ;
-; search_view.php line: 158
+; search_view.php line: 159
 search_view_cache = ""
 ;
-; search_view.php line: 161
+; search_view.php line: 162
 search_view_as_text = ""
 ;
-; search_view.php line: 168
+; search_view.php line: 174
 search_view_similar = "相似"
 ;
-; search_view.php line: 173
+; search_view.php line: 184
 search_view_inlink = ""
 ;
 ; settings_view.php line: 76
diff --git a/models/crawl_model.php b/models/crawl_model.php
index 34243e3f4..059be96e9 100755
--- a/models/crawl_model.php
+++ b/models/crawl_model.php
@@ -175,13 +175,18 @@ class CrawlModel extends Model implements CrawlConstants

     /**
      * Gets a list of all index archives of crawls that have been conducted
+     *
+     * @param bool $return_arc_bundles whether index bundles used for indexing
+     *      arc or other archive bundles should be included in the lsit
+     * @param bool $return_recrawls whether index archive bundles generated as
+     *      a result of recrawling should be included in the result
      *
      * @return array Available IndexArchiveBundle directories and
-     * their meta information this meta information includes the time of the
-     * crawl, its description, the number of pages downloaded, and the number
-     * of partitions used in storing the inverted index
+     *      their meta information this meta information includes the time of
+     *      the crawl, its description, the number of pages downloaded, and the
+     *      number of partitions used in storing the inverted index
      */
-    function getCrawlList()
+    function getCrawlList($return_arc_bundles = false, $return_recrawls = false)
     {
         $list = array();
         $dirs = glob(CRAWL_DIR.'/cache/*', GLOB_ONLYDIR);
@@ -194,7 +199,23 @@ class CrawlModel extends Model implements CrawlConstants
                     substr($pre_timestamp, strlen(self::index_data_base_name));
                 $info = IndexArchiveBundle::getArchiveInfo($dir);
                 $index_info = unserialize($info['DESCRIPTION']);
-                $crawl['DESCRIPTION'] = $index_info['DESCRIPTION'];
+                $crawl['DESCRIPTION'] = "";
+                if(!$return_arc_bundles && isset($index_info['ARCFILE'])) {
+                    continue;
+                } else if ($return_arc_bundles
+                    && isset($index_info['ARCFILE'])) {
+                    $crawl['DESCRIPTION'] = "ARCFILE::";
+                }
+                if(!$return_recrawls &&
+                    isset($index_info[self::CRAWL_TYPE]) &&
+                    $index_info[self::CRAWL_TYPE] == self::ARCHIVE_CRAWL) {
+                    continue;
+                } else if($return_recrawls  &&
+                    isset($index_info[self::CRAWL_TYPE]) &&
+                    $index_info[self::CRAWL_TYPE] == self::ARCHIVE_CRAWL) {
+                    $crawl['DESCRIPTION'] = "RECRAWL::";
+                }
+                $crawl['DESCRIPTION'] .= $index_info['DESCRIPTION'];
                 $crawl['VISITED_URLS_COUNT'] =
                     isset($info['VISITED_URLS_COUNT']) ?
                     $info['VISITED_URLS_COUNT'] : 0;
@@ -299,7 +320,7 @@ class CrawlModel extends Model implements CrawlConstants
     }

     /**
-     *  Returns whether the supplied timestamp corresponds to a crawl mix
+     * Returns whether the supplied timestamp corresponds to a crawl mix
      *
      * @param string timestamp of the requested crawl mix
      * @return bool true if it does; false otherwise
@@ -378,6 +399,12 @@ class CrawlModel extends Model implements CrawlConstants
             $index_info = unserialize($info['DESCRIPTION']);
             $seed_info['general']["restrict_sites_by_url"] =
                 $index_info[self::RESTRICT_SITES_BY_URL];
+            $seed_info['general']["crawl_type"] =
+                (isset($index_info[self::CRAWL_TYPE])) ?
+                $index_info[self::CRAWL_TYPE] : self::WEB_CRAWL;
+            $seed_info['general']["crawl_index"] =
+                (isset($index_info[self::CRAWL_INDEX])) ?
+                $index_info[self::CRAWL_INDEX] : '';
             $seed_info['general']["crawl_order"] =
                 $index_info[self::CRAWL_ORDER];
             $site_types = array(
@@ -431,6 +458,9 @@ class CrawlModel extends Model implements CrawlConstants
      */
     function setSeedInfo($info)
     {
+        if(!isset($info['general']['crawl_index'])) {
+            $info['general']['crawl_index']='12345678';
+        }
         $n = array();
         $n[] = <<<EOT
 ; ***** BEGIN LICENSE BLOCK *****
@@ -458,6 +488,8 @@ class CrawlModel extends Model implements CrawlConstants
 EOT;
         $n[] = '[general]';
         $n[] = "crawl_order = '".$info['general']['crawl_order']."';";
+        $n[] = "crawl_type = '".$info['general']['crawl_type']."';";
+        $n[] = "crawl_index = '".$info['general']['crawl_index']."';";
         $bool_string =
             ($info['general']['restrict_sites_by_url']) ? "true" : "false";
         $n[] = "restrict_sites_by_url = $bool_string;";
diff --git a/models/datasources/datasource_manager.php b/models/datasources/datasource_manager.php
index ad028b83e..59083677b 100755
--- a/models/datasources/datasource_manager.php
+++ b/models/datasources/datasource_manager.php
@@ -260,4 +260,3 @@ abstract class DatasourceManager

 }
 ?>
-
diff --git a/models/phrase_model.php b/models/phrase_model.php
index ea7b21360..cff0856b6 100755
--- a/models/phrase_model.php
+++ b/models/phrase_model.php
@@ -255,9 +255,10 @@ class PhraseModel extends Model
     {
         $phrase = " ".$phrase;
         $phrase_string = $phrase;
-        $meta_words = array('link\:', 'site\:',
-            'filetype\:', 'info\:', '\-',
-            'index\:', 'i\:', 'ip\:', 'weight\:', 'w\:', 'u\:');
+        $meta_words = array('link\:', 'site\:', 'version\:', 'modified\:',
+            'filetype\:', 'info\:', '\-', 'os\:', 'server\:', 'date\:',
+            'index\:', 'i\:', 'ip\:', 'weight\:', 'w\:', 'u\:',
+            'lang\:');
         $index_name = $this->index_name;
         $weight = 1;
         $found_metas = array();
@@ -265,7 +266,8 @@ class PhraseModel extends Model
         foreach($meta_words as $meta_word) {
             $pattern = "/(\s)($meta_word(\S)+)/";
             preg_match_all($pattern, $phrase, $matches);
-            if(in_array($meta_word, array('link\:', 'site\:',
+            if(in_array($meta_word, array('link\:', 'site\:', 'os\:',
+            'server\:', 'version\:', 'modified\:', 'date\:', 'lang\:',
             'filetype\:', 'ip\:', 'info\:', 'u\:') )) {
                 $found_metas = array_merge($found_metas, $matches[2]);
             } else if($meta_word == '\-') {
@@ -289,8 +291,8 @@ class PhraseModel extends Model
         $index_archive_name = self::index_data_base_name . $index_name;
         $index_archive = new IndexArchiveBundle(
             CRAWL_DIR.'/cache/'.$index_archive_name);
-        $punct = "\.|\,|\:|\;|\"|\'|\`|\[|\]|\{|\}|\(|\)|\!|\||\&";
-        $phrase_string = mb_ereg_replace($punct, " ", $phrase_string);
+
+        $phrase_string = mb_ereg_replace(PUNCT, " ", $phrase_string);
         $phrase_string = preg_replace("/(\s)+/", " ", $phrase_string);
         /*
             we search using the stemmed words, but we format snippets in the
@@ -298,7 +300,8 @@ class PhraseModel extends Model
          */
         $query_words = explode(" ", $phrase_string); //not stemmed
         $base_words =
-            array_keys(PhraseParser::extractPhrasesAndCount($phrase_string));
+            array_keys(PhraseParser::extractPhrasesAndCount($phrase_string,
+            MAX_PHRASE_LEN, getLocaleTag()));
             //stemmed
         $words = array_merge($base_words, $found_metas);
         if(isset($words) && count($words) == 1) {
@@ -372,7 +375,7 @@ class PhraseModel extends Model
      * pretty weak. For now we pick the $num many words which appear in the
      * fewest documents.
      *
-     * @param string $craw_item a page summary
+     * @param string $crawl_item a page summary
      * @param int $num number of key phrase to return
      * @return array  an array of most selective key phrases
      */
@@ -454,7 +457,8 @@ class PhraseModel extends Model
         $query_iterator = $this->getQueryIterator($word_structs);
         $num_retrieved = 0;
         $pages = array();
-        while(is_array($next_docs = $query_iterator->nextDocsWithWord()) &&
+        while(is_object($query_iterator) &&
+            is_array($next_docs = $query_iterator->nextDocsWithWord()) &&
             $num_retrieved < $to_retrieve) {
             foreach($next_docs as $doc_key => $doc_info) {
                 $summary = & $doc_info[CrawlConstants::SUMMARY];
diff --git a/views/elements/crawloptions_element.php b/views/elements/crawloptions_element.php
index a4bab69d1..e93bf93d1 100644
--- a/views/elements/crawloptions_element.php
+++ b/views/elements/crawloptions_element.php
@@ -63,7 +63,6 @@ class CrawloptionsElement extends Element
         ><?php e(tl('crawloptions_element_back_to_manage'))?></a>
         </div>
         <h2><?php e(tl('crawloptions_element_edit_crawl_options'))?></h2>
-
         <form id="crawloptionsForm" method="post" action=''>
         <input type="hidden" name="c" value="admin" />
         <input type="hidden" name="YIOOP_TOKEN" value="<?php
@@ -71,6 +70,20 @@ class CrawloptionsElement extends Element
         <input type="hidden" name="a" value="manageCrawls" />
         <input type="hidden" name="arg" value="options" />
         <input type="hidden" name="posted" value="posted" />
+        <input type="hidden" id='crawl-type' name="crawl_type" value="<?php
+            e($data['crawl_type'])?>" />
+        <ul class='tabmenu-list'>
+        <li><a href="javascript:switchTab('webcrawltab', 'archivetab');"
+            id='webcrawltabitem'
+            class="<?php e($data['web_crawl_active']); ?>"><?php
+            e(tl('crawloptions_element_web_crawl'))?></a></li>
+        <li><a href="javascript:switchTab('archivetab', 'webcrawltab');"
+            id='archivetabitem'
+            class="<?php e($data['archive_crawl_active']); ?>"><?php
+            e(tl('crawloptions_element_archive_crawl'))?></a></li>
+        </ul>
+        <div class='tabmenu-content'>
+        <div id='webcrawltab'>
         <div class="topmargin"><label for="load-options"><b><?php
             e(tl('crawloptions_element_load_options'))?></b></label><?php
             $this->view->optionsHelper->render("load-options", "load_option",
@@ -105,6 +118,15 @@ class CrawloptionsElement extends Element
         <textarea class="talltextarea"  name="seed_sites" ><?php
             e($data['seed_sites']);
         ?></textarea>
+        </div>
+        <div id='archivetab'>
+        <div class="topmargin"><label for="load-options"><b><?php
+            e(tl('crawloptions_element_reindex_crawl'))?></b></label><?php
+            $this->view->optionsHelper->render("crawl-indexes", "crawl_indexes",
+                $data['available_crawl_indexes'], $data['crawl_index']);
+        ?></div>
+        </div>
+        </div>
         <div class="topmargin"><b><?php
             e(tl('crawloptions_element_meta_words'))?></b></div>
         <table class="metawordstable">
@@ -145,7 +167,23 @@ class CrawloptionsElement extends Element
             ?></button></div>
         </form>
         </div>
+        <script type="text/javascript">
+
+        function switchTab(newtab, oldtab)
+        {
+            setDisplay(newtab, true);
+            setDisplay(oldtab, false);
+            ntab = elt(newtab+"item");
+            ntab.className = 'active';
+            otab = elt(oldtab+"item");
+            otab.className = '';
+            ctype = elt('crawl-type');
+            ctype.value = (newtab == 'webcrawltab')
+                ? '<?php e(CrawlConstants::WEB_CRAWL); ?>' :
+                '<?php e(CrawlConstants::ARCHIVE_CRAWL); ?>';
+        }

+        </script>
     <?php
     }
 }
ViewGit