Back out database and log iterator code, fix a glitch where web arcive bundle iterator not correctly constructed, a=chris

Chris Pollett [2013-01-04 17:Jan:th]
Back out database and log iterator code, fix a glitch where web arcive bundle iterator not correctly constructed, a=chris
Filename
bin/fetcher.php
controllers/admin_controller.php
css/search.css
lib/archive_bundle_iterators/database_archive_bundle_iterator.php
lib/archive_bundle_iterators/log_archive_bundle_iterator.php
lib/crawl_constants.php
scripts/suggest.js
views/elements/crawloptions_element.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index 1cd0a367c..faf4e9820 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -529,7 +529,6 @@ class Fetcher implements CrawlConstants

                 crawlLog("New name: ".$this->web_archive->dir_name);
                 crawlLog("Switching archive...");
-
             }

             if(isset($info[self::SAVED_CRAWL_TIMES])) {
@@ -661,19 +660,20 @@ class Fetcher implements CrawlConstants
     function downloadPagesArchiveCrawl()
     {
         $prefix = $this->fetcher_num."-";
-        $base_name = CRAWL_DIR."/cache/{$prefix}".self::archive_base_name.
-            $this->crawl_index;
+        $arc_name = "$prefix" . self::archive_base_name . $this->crawl_index;
+        $base_name = CRAWL_DIR."/cache/$arc_name";
         $pages = array();
         if(!isset($this->archive_iterator->iterate_timestamp) ||
             $this->archive_iterator->iterate_timestamp != $this->crawl_index ||
             $this->archive_iterator->result_timestamp != $this->crawl_time) {
             if(!file_exists($base_name)){
-                crawlLog("Recrawl archive with timestamp" .
-                    " {$this->crawl_index} does not exist!");
+                crawlLog("!!Fetcher web archive $arc_name  does not exist.");
+                crawlLog("  Only fetchers involved in original crawl will ");
+                crawlLog("  participate in a web archive recrawl!!");
                 return $pages;
             } else {
                 $this->archive_iterator =
-                    new WebArchiveBundle($prefix, $this->crawl_index,
+                    new WebArchiveBundleIterator($prefix, $this->crawl_index,
                         $this->crawl_time);
                 if($this->archive_iterator == NULL) {
                     crawlLog("Error creating archive iterator!!");
@@ -843,8 +843,9 @@ class Fetcher implements CrawlConstants
                 } else {
                     $update_num = SEEN_URLS_BEFORE_UPDATE_SCHEDULER;
                     crawlLog("Fetch on crawl {$this->crawl_time} was not ".
-                        "halted properly, dumping $update_num from old fetch ".
-                        "to try to make a clean re-start");
+                        "halted properly.");
+                    crawlLog("  Dumping $update_num from old fetch ".
+                        "to try to make a clean re-start.");
                     $count = count($this->to_crawl);
                     if($count > SEEN_URLS_BEFORE_UPDATE_SCHEDULER) {
                         $this->to_crawl = array_slice($this->to_crawl,
diff --git a/controllers/admin_controller.php b/controllers/admin_controller.php
index 69988a4db..54e2886ca 100755
--- a/controllers/admin_controller.php
+++ b/controllers/admin_controller.php
@@ -860,13 +860,6 @@ class AdminController extends Controller implements CrawlConstants
         $crawl_params[self::META_WORDS] = isset($seed_info['meta_words']) ?
             $seed_info['meta_words'] : array();

-        $crawl_params[self::LOG_RECORDS] = isset($seed_info['log_records']) ?
-            $seed_info['log_records'] : array();
-
-        $crawl_params[self::DATABASE_CONNECTION_DETAILS] =
-            isset($seed_info['database_connection_details']) ?
-            $seed_info['database_connection_details'] : array();
-
         if(isset($seed_info['indexing_plugins']['plugins'])) {
             $crawl_params[self::INDEXING_PLUGINS] =
                 $seed_info['indexing_plugins']['plugins'];
@@ -1085,48 +1078,6 @@ class AdminController extends Controller implements CrawlConstants
                 $data['META_WORDS'] = $seed_info['meta_words'];
         }

-        $data['LOG_RECORDS'] = array();
-        if(!$no_further_changes) {
-            if(isset($_REQUEST["LOG_RECORDS"])){
-                foreach($_REQUEST["LOG_RECORDS"] as $triplet) {
-                    list($field, $field_name,$field_type) =
-                        array_values($triplet);
-                    $field = $this->clean($field, "string");
-                    $field_name =
-                            $this->clean($field_name, "string");
-                    $field_type =
-                            $this->clean($field_type,"string");
-                    $field_nt = $field_name."::".$field_type;
-                    if(trim($field) != "" &&trim($field_nt) !=""){
-                          $data['LOG_RECORDS'][$field] = $field_nt;
-                    }
-                }
-                $seed_info['log_records'] = $data['LOG_RECORDS'];
-                $update_flag = true;
-            } else if(isset($seed_info['log_records'])){
-                $data['LOG_RECORDS'] = $seed_info['log_records'];
-            }
-        } else if(isset($seed_info['log_records'])){
-                $data['LOG_RECORDS'] = $seed_info['log_records'];
-        }
-
-        $data['DATABASE_CONNECTION_DETAILS'] = array();
-        if(!$no_further_changes) {
-            if(isset($_REQUEST["DATABASE_CONNECTION_DETAILS"])){
-                $data['DATABASE_CONNECTION_DETAILS']=
-                           $_REQUEST["DATABASE_CONNECTION_DETAILS"];
-                $seed_info['database_connection_details'] =
-                           $data['DATABASE_CONNECTION_DETAILS'];
-                $update_flag = true;
-            } else if(isset($seed_info['database_connection_details'])) {
-                $data['DATABASE_CONNECTION_DETAILS'] =
-                    $seed_info['database_connection_details'];
-            }
-        } else if(isset($seed_info['database_connection_details'])) {
-            $data['DATABASE_CONNECTION_DETAILS'] =
-                $seed_info['database_connection_details'];
-        }
-
         $data['INDEXING_PLUGINS'] = array();
         $included_plugins = array();
         if(!$no_further_changes && isset($_REQUEST["posted"])) {
diff --git a/css/search.css b/css/search.css
index c94c889e1..65898598b 100755
--- a/css/search.css
+++ b/css/search.css
@@ -1734,107 +1734,6 @@ ul.in-list li
     width: 97%;
 }

-.log-records-table
-{
-   width:100%;
-}
-
-.log-records-table,
-.log-records-table td,
-.log-records-table th
-{
-    border: 1px ridge black;
-}
-
-.log-records-table th
-{
-    padding: 0.03in;
-    text-align: center;
-}
-
-.log-records-table td.input-field
-{
-    width: 1.3in;
-}
-
-.log-records-table td.input-field input
-{
-    margin: 0.05in;
-    width: 1.5in;
-}
-
-.log-records-table td.input-field-name
-{
-    margin: 0.03in;
-    width: 100%;
-}
-
-.log-records-table td.input-field-name input
-{
-    margin: 0.05in;
-    width: 97%;
-}
-
-.log-records-table td.input-field-type
-{
-    margin: 0.03in;
-    width: 100%;
-}
-
-.log-records-table td.input-field-type input
-{
-    margin: 0.05in;
-    width: 97%;
-}
-
-.html-rtl .log-record-new-field
-{
-    position: relative;
-}
-
-.html-ltr .log-record-new-field
-{
-    position: relative;
-}
-
-.database-connection-details-table
-{
-   width:100%;
-}
-
-.database-connection-details-table,
-.database-connection-details-table td
-{
-    border: 1px ridge black;
-}
-
-.database-connection-details-table td.input-name
-{
-    width: 1.3in;
-}
-
-.database-connection-details-table td.input-data
-{
-    margin: 0.03in;
-    width: 100%;
-}
-
-.database-connection-details-table td.input-data input
-{
-    margin: 0.05in;
-    width: 98%;
-}
-
-.html-rtl .database-connection-details-submit
-{
-    position: relative;
-}
-
-.html-ltr .database-connection-details-submit
-{
-    position: relative;
-}
-
 .indexing-plugin-table
 {
    width:100%;
diff --git a/lib/archive_bundle_iterators/database_archive_bundle_iterator.php b/lib/archive_bundle_iterators/database_archive_bundle_iterator.php
deleted file mode 100644
index df94e54d2..000000000
--- a/lib/archive_bundle_iterators/database_archive_bundle_iterator.php
+++ /dev/null
@@ -1,375 +0,0 @@
-<?php
-/**
- *  SeekQuarry/Yioop --
- *  Open Source Pure PHP Search Engine, Crawler, and Indexer
- *
- *  Copyright (C) 2009 - 2013  Chris Pollett chris@pollett.org
- *
- *  LICENSE:
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  END LICENSE
- *
- * @author Tanmayee Potluri
- * @package seek_quarry
- * @subpackage iterator
- * @license http://www.gnu.org/licenses/ GPL3
- * @link http://www.seekquarry.com/
- * @copyright 2009 - 2013
- * @filesource
- */
-
-if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
-
-/**
- *Loads base class for iterating
- */
-require_once BASE_DIR.
-    '/lib/archive_bundle_iterators/archive_bundle_iterator.php';
-
-/**
- * Used to iterate through the records of a database stored in a
- * DatabaseArchiveBundle folder. Database is a collection of tables with
- * various rows in each table. Iteration would be for the purpose of making
- * an index of each of these records.
- *
- * @author Tanmayee Potluri
- * @package seek_quarry
- * @subpackage iterator
- * @see WebArchiveBundle
- */
-class DatabaseArchiveBundleIterator extends ArchiveBundleIterator
-    implements CrawlConstants
-{
-    /**
-     * The path to the directory containing the archive partitions to be
-     * iterated over.
-     * @var string
-     */
-    var $iterate_dir;
-    /**
-     * The path to the directory where the iteration status is stored.
-     * @var string
-     */
-    var $result_dir;
-    /**
-     * The path to the directory where all html files are stored
-     * and used to point to.
-     * @var string
-     */
-    var $index_dir;
-
-    /**
-     * The part of the path to the directory where all html files are stored
-     * and used to point to.
-     * @var string
-     */
-    var $path_for_html_files;
-    /**
-     *  current number of record in the current database
-     *  @var int
-     */
-    var $current_page_num;
-    /**
-     * The number of database records in this database archive bundle
-     *  @var int
-     */
-    var $num_of_records;
-    /**
-     *  Array of database records according to the query specified by the user
-     *  @var array
-     */
-    var $records;
-    /**
-     *  Array of fields of database record specified by the user
-     *  @var array
-     */
-    var $fields;
-    /**
-     *  Array of field names in database record specified by the user
-     *  @var array
-     */
-    var $field_names;
-    /**
-     *  Array of database fieldtypes specified by the user
-     *  @var array
-     */
-    var $field_types;
-    /**
-     *  Database handle for a database
-     *  @var resource
-     */
-    var $db_handle;
-    /**
-     *  Whether database exists or not
-     *  @var resource
-     */
-    var $db_found;
-    /**
-     *  Array of database connection details to connect to database
-     *  @var array
-     */
-    var $databaseConnectionsArray;
-    /**
-     *  Host name for the database
-     *  @var string
-     */
-    var $host;
-    /**
-     *  User Name for the localhost
-     *  @var string
-     */
-    var $user_name;
-    /**
-     *  Password for the connection
-     *  @var string
-     */
-    var $password;
-    /**
-     *  Name of the database for the records
-     *  @var string
-     */
-    var $database;
-    /**
-     *  Query to retrieve the records to be indexed from the database
-     *  @var array
-     */
-    var $query;
-    /**
-     *  Constants required for database archive_bundle_iterator
-     *  @var const
-     */
-    const DATABASE_CONNECTION_DETAILS_FILE = 'database_connection_details.txt';
-
-    /**
-     * Creates a database archive iterator with the given parameters.
-     * @param string $iterate_timestamp timestamp of the arc archive bundle to
-     *      iterate  over the pages of
-     * @param string $result_timestamp timestamp of the arc archive bundle
-     *      results are being stored in
-     */
-    function __construct($iterate_timestamp, $iterate_dir,
-        $result_timestamp, $result_dir)
-    {
-        $this->iterate_timestamp = $iterate_timestamp;
-        $this->iterate_dir = $iterate_dir;
-        $this->result_timestamp = $result_timestamp;
-        $this->result_dir = $result_dir;
-        $this->index_dir = CRAWL_DIR."/cache/IndexData".$this->result_timestamp;
-        $temp_array = explode("/", $this->index_dir);
-        $temp_array_len = count($temp_array);
-
-        for($i = 1;$i<$temp_array_len;$i++){
-            if($temp_array[$i-1]=="htdocs"){
-                while($temp_array[$i] != "cache"){
-                    $this->path_for_html_files .= $temp_array[$i]."/";
-                    $i++;
-                }
-                break;
-            }
-        }
-        if(file_exists("{$this->iterate_dir}/".
-            self::DATABASE_CONNECTION_DETAILS_FILE))
-        {
-            $database_connection_details_info = unserialize
-               (file_get_contents(
-               "{$this->iterate_dir}/".self::DATABASE_CONNECTION_DETAILS_FILE));
-            file_put_contents("{$this->index_dir}/database_connection_details".
-                $this->result_timestamp.".txt",
-                serialize($database_connection_details_info));
-            @unlink("{$this->iterate_dir}/".
-                self::DATABASE_CONNECTION_DETAILS_FILE);
-        }
-        if(!is_dir("{$this->index_dir}/HTML_FILES")){
-            mkdir("{$this->index_dir}/HTML_FILES");
-        }
-        $this->createRecords();
-
-        if(file_exists("{$this->result_dir}/iterate_status.txt")) {
-            $this->restoreCheckpoint();
-        }
-        else {
-            $this->reset();
-        }
-    }
-
-    /**
-     * Estimates the important of the site according to the weighting of
-     * the particular archive iterator
-     * @param $site an associative array containing info about a web page
-     * @return bool false we assume arc files were crawled according to
-     *      OPIC and so we use the default doc_depth to estimate page importance
-     */
-    function weight(&$site)
-    {
-        return false;
-    }
-
-    /**
-     * Resets the iterator to the start of the archive bundle
-     */
-    function reset()
-    {
-        $this->current_page_num = -1;
-        $this->end_of_iterator = false;
-        @unlink("{$this->result_dir}/iterate_status.txt");
-    }
-
-    /**
-     * Saves the current state so that a new instantiation can pick up just
-     * after the last batch of pages extracted.
-     */
-    function saveCheckpoint($info = array())
-    {
-        $info['end_of_iterator'] = $this->end_of_iterator;
-        $info['current_page_num'] = $this->current_page_num;
-        $info['database_iterator'] = $this->database_iterator;
-        file_put_contents("{$this->result_dir}/iterate_status.txt",
-            serialize($info));
-    }
-
-    /**
-     * Restores state from a previous instantiation, after the last batch of
-     * pages extracted.
-     */
-    function restoreCheckpoint()
-    {
-        $info = unserialize(file_get_contents(
-            "{$this->result_dir}/iterate_status.txt"));
-        $this->end_of_iterator = $info['end_of_iterator'];
-        $this->current_page_num = $info['current_page_num'];
-        $this->database_iterator = $info['database_iterator'];
-        return $info;
-    }
-
-    /**
-     * Creates Records array containing all the records to satisfying the query
-     */
-    function createRecords()
-    {
-        $this->databaseConnectionsArray = unserialize(file_get_contents(
-            "{$this->index_dir}/database_connection_details".
-            $this->result_timestamp.".txt"));
-        $this->host = $this->databaseConnectionsArray['HOSTNAME'];
-        $this->user_name = $this->databaseConnectionsArray['USERNAME'];
-        $this->password = $this->databaseConnectionsArray['PASSWORD'];
-        $this->database = $this->databaseConnectionsArray['DATABASENAME'];
-        $this->query = $this->databaseConnectionsArray['QUERY'];
-        $this->db_handle = mysql_connect($this->host, $this->user_name,
-            $this->password);
-        $this->db_found = mysql_select_db($this->database, $this->db_handle);
-
-        /*If database exists*/
-        if ($this->db_found) {
-            $result1 = mysql_query($this->query);
-            $num_fields = mysql_num_fields($result1);
-            for($i = 0; $i < $num_fields; $i++) {
-                $this->field_names[$i] = mysql_field_name($result1, $i);
-            }
-            while ($row = mysql_fetch_row($result1)) {
-                $this->records[] = $row;
-            }
-            $this->num_of_records = count($this->records);
-            mysql_free_result($result1);
-            mysql_close($db_handle);
-        }
-    }
-    /**
-     * Gets the next at most $num many records from the iterator. It might
-     * return less than $num many documents if the end of the bundle is reached.
-     * @param int $num number of docs to get
-     * @return array associative arrays for $num pages
-     */
-    function nextPages($num)
-    {
-        $pages = array();
-        for($i = 0; $i < $num; $i++) {
-            $this->current_page_num++;
-            $page = $this->nextPage();
-            if($this->current_page_num >= $this->num_of_records) {
-                $this->end_of_iterator = true;
-                break;
-            }
-            else {
-                $pages[] = $page;
-            }
-        }
-
-        $this->saveCheckpoint();
-        return $pages;
-    }
-
-
-    /**
-     * Gets the next record from the iterator
-     * @return array associative array for record
-     */
-    function nextPage()
-    {
-        $site = array();
-        $field_nd = "";
-        $html_page = "";
-        $temp_record = array();
-        $temp_record = $this->records[$this->current_page_num];
-        $dom = new DOMDocument('1.0');
-        $root =$dom->createElement('html');
-        $root = $dom->appendChild($root);
-        $head = $dom->createElement('head');
-        $head = $root->appendChild($head);
-        $title = $dom->createElement('title');
-        $title = $head->appendChild($title);
-        $recordTitle = "Database Record".$this->current_page_num;
-        $text = $dom->createTextNode($recordTitle);
-        $text = $title->appendChild($text);
-        $body = $dom->createElement('body');
-        $body = $root->appendChild($body);
-        $field = $dom->createElement('p');
-        $field = $body->appendChild($field);
-        $fieldnames_len = count($this->field_names);
-        for($i = 0; $i < $fieldnames_len; $i++) {
-                $field_nd .= $this->field_names[$i]." : ".$temp_record[$i]
-                ."<br>";
-        }
-        $text1 = $dom->createTextNode($field_nd);
-        $text1 = $field->appendChild($text1);
-        $desc = $dom->createElement('p');
-        $desc = $body->appendChild($desc);
-        $text3 = "This is database record ".$this->current_page_num;
-        $text2 = $dom->createTextNode($text3);
-        $text2 = $desc->appendChild($text2);
-        $site[self::PAGE] =$dom->saveHTML();
-        $html_page = "<html><head><title>DatabaseRecord".
-            $this->current_page_num.
-            "</title></head><body><h1>The details of the database record are:".
-            "<br></h1><h3>".$field_nd."</h3></body></html>";
-        file_put_contents("{$this->index_dir}/HTML_FILES/databaserecord".
-            $this->current_page_num.".php",$html_page);
-        $site[self::URL] = "http://localhost/".$this->path_for_html_files.
-            "cache/IndexData".$this->result_timestamp.
-            "/HTML_FILES/databaserecord".$this->current_page_num.".php";
-        $site[self::TYPE] ="text/html";
-        $site[self::HTTP_CODE] = 200;
-        $site[self::ENCODING] = "UTF-8";
-        $site[self::SERVER] = "unknown";
-        $site[self::SERVER_VERSION] = "unknown";
-        $site[self::OPERATING_SYSTEM] = "unknown";
-        $site[self::HASH] = FetchUrl::computePageHash($site[self::PAGE]);
-        $site[self::WEIGHT] = 1;
-
-        return $site;
-
-    }
-}
-?>
\ No newline at end of file
diff --git a/lib/archive_bundle_iterators/log_archive_bundle_iterator.php b/lib/archive_bundle_iterators/log_archive_bundle_iterator.php
deleted file mode 100644
index b5645b334..000000000
--- a/lib/archive_bundle_iterators/log_archive_bundle_iterator.php
+++ /dev/null
@@ -1,452 +0,0 @@
-<?php
-/**
- *  SeekQuarry/Yioop --
- *  Open Source Pure PHP Search Engine, Crawler, and Indexer
- *
- *  Copyright (C) 2009 - 2013  Chris Pollett chris@pollett.org
- *
- *  LICENSE:
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- *  END LICENSE
- *
- * @author Tanmayee Potluri
- * @package seek_quarry
- * @subpackage iterator
- * @license http://www.gnu.org/licenses/ GPL3
- * @link http://www.seekquarry.com/
- * @copyright 2009 - 2013
- * @filesource
- */
-
-if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
-
-/**
- *Loads base class for iterating
- */
-require_once BASE_DIR.
-    '/lib/archive_bundle_iterators/archive_bundle_iterator.php';
-
-/**
- * Used to iterate through the collection of log files stored in
- * a WebArchiveBundle folder. Log is the file format which has the
- * activities the system or a server performs. Iteration would be
- * for the purpose making an index of these files.
- *
- * @author Tanmayee Potluri
- * @package seek_quarry
- * @subpackage iterator
- * @see WebArchiveBundle
- */
-
-class LogArchiveBundleIterator extends ArchiveBundleIterator
-    implements CrawlConstants
-{
-    /**
-     * The path to the directory containing the archive partitions to be
-     * iterated over.
-     * @var string
-     */
-    var $iterate_dir;
-
-    /**
-     * The path to the directory where the iteration status is stored.
-     * @var string
-     */
-    var $result_dir;
-
-    /**
-     * The path to the directory where all html files are stored
-     * and used to point to.
-     * @var string
-     */
-    var $index_dir;
-
-    /**
-     * The part of the path to the directory where all html files are stored
-     * and used to point to.
-     * @var string
-     */
-    var $path_for_html_files;
-
-    /**
-     * The number of log files in this log archive bundle
-     *  @var int
-     */
-    var $num_partitions;
-
-    /**
-     *  current record number in the master log file
-     *  @var int
-     */
-    var $current_page_num;
-
-    /**
-     *  number of records in the master log file
-     *  @var int
-     */
-    var $num_of_records;
-
-    /**
-     *  Array of log records in the master log file in the directory
-     *  @var array
-     */
-    var $records;
-
-    /**
-     *  Array of filenames of log files in this directory (glob order)
-     *  @var array
-     */
-    var $partitions;
-
-    /**
-     *  Array of fields of log file specified by the user
-     *  @var array
-     */
-    var $fields;
-
-    /**
-     *  Array of fieldnames specified by the user
-     *  @var array
-     */
-    var $field_names;
-
-    /**
-     *  Array of fieldtypes specified by the user
-     *  @var array
-     */
-    var $field_types;
-
-    /**
-     *  Array of fields in each record separately
-     *  @var array
-     */
-    var $page_info;
-
-    /**
-     *  Array of regular expressions for all the data types
-     *  @var array
-     */
-    var $regular_exprs;
-
-    /**
-     *  Array of log fields type in drop down box in the UI
-     *  @var array
-     */
-    var $logfields_type_ddm = array(
-        1=>'IP_Address',
-        2=>'Timestamp',
-        3=>'URL',
-        4=>'Status Code',
-        5=>'User Agent',
-        6=>'Request',
-        7=>'Int');
-
-    /**
-     *  Constants required for log archive_bundle_iterator
-     *  @var const
-     */
-    const FIELDS_DATA_FILE = 'fields_data.txt';
-    const MASTER_LOG_FILE = 'master.log';
-
-
-    /**
-     * Creates a log archive iterator with the given parameters.
-     *
-     * @param string $iterate_timestamp timestamp of the log archive bundle to
-     *      iterate  over the pages of
-     * @param string $result_timestamp timestamp of the log archive bundle
-     *      results are being stored in
-     */
-    function __construct($iterate_timestamp, $iterate_dir,
-        $result_timestamp, $result_dir)
-    {
-        $this->regular_exprs = array(
-        'IP_Address' => '/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/',
-        'Timestamp' => '/\[[^:]+:\d+:\d+:\d+ [^\]]+\]/',
-        'Request' => '/(GET|HEAD|POST|PUT|DELETE|TRACE|OPTIONS|CONNECT)+[^"]*/',
-        'Status Code'=> '/\s[1-5]\d{2}\s/',
-        'Int' => '/\s[0-9]+\s/',
-        'User Agent' => '/"([a-zA-Z0-9][^"]+)"/');
-        $this->regular_exprs['URL'] = '/(http|https|ftp):\/\/[A-Za-z0-9]'.
-            '[A-Za-z0-9_-]*[\/]*(?:.[A-Za-z0-9][A-Za-z0-9_-]*'.
-            '[\/]*)+:?(d*)[\/]*/';
-        $this->path_for_html_files = "";
-        $this->iterate_timestamp = $iterate_timestamp;
-        $this->iterate_dir = $iterate_dir;
-        $this->result_timestamp = $result_timestamp;
-        $this->result_dir = $result_dir;
-        $this->index_dir = CRAWL_DIR."/cache/IndexData".$this->result_timestamp;
-        $temp_array = explode("/", $this->index_dir);
-        $temp_array_len = count($temp_array);
-
-        for($i = 1; $i < $temp_array_len; $i++) {
-            if($temp_array[$i-1] == "htdocs") {
-                while($temp_array[$i] != "cache") {
-                    $this->path_for_html_files .= $temp_array[$i]."/";
-                    $i++;
-                }
-                break;
-            }
-        }
-        if(file_exists("{$this->iterate_dir}/".self::FIELDS_DATA_FILE)) {
-            $fields_data = unserialize(file_get_contents(
-                               "{$this->iterate_dir}/".self::FIELDS_DATA_FILE));
-            file_put_contents(
-                "{$this->index_dir}/fields_data".$this->result_timestamp.".txt",
-                serialize($fields_data));
-            @unlink("{$this->iterate_dir}/".self::FIELDS_DATA_FILE);
-        }
-        if(!is_dir("{$this->index_dir}/HTML_FILES")) {
-            mkdir("{$this->index_dir}/HTML_FILES");
-        }
-        $this->partitions = array();
-        foreach(glob("{$this->iterate_dir}/*.log") as $filename) {
-            if(strpos($filename,self::MASTER_LOG_FILE)!= true) {
-                $this->partitions[] = $filename;
-            }
-        }
-        $this->num_partitions = count($this->partitions);
-        $this->records = $this->createMasterAndRecords();
-        $this->num_of_records = count($this->records);
-        if(file_exists("{$this->result_dir}/iterate_status.txt")){
-            $this->restoreCheckpoint();
-        }
-        else {
-            $this->reset();
-        }
-    }
-
-    /**
-     * Estimates the important of the site according to the weighting of
-     * the particular archive iterator
-     * @param $site an associative array containing info about a web page
-     * @return value 1 we assume all log files crawled have the same
-     *  page importance
-     */
-    function weight(&$site)
-    {
-        return 1;
-    }
-
-    /**
-     * Resets the iterator to the start of the archive bundle
-     */
-    function reset()
-    {
-        $this->current_page_num = -1;
-        $this->end_of_iterator = false;
-        @unlink("{$this->result_dir}/iterate_status.txt");
-    }
-
-    /**
-     * Saves the current state so that a new instantiation can pick up just
-     * after the last batch of pages extracted.
-     */
-    function saveCheckpoint($info = array())
-    {
-        $info['end_of_iterator'] = $this->end_of_iterator;
-        $info['current_page_num'] = $this->current_page_num;
-        $info['log_iterator'] = $this->log_iterator;
-        file_put_contents("{$this->result_dir}/iterate_status.txt",
-            serialize($info));
-        if($this->end_of_iterator == true){
-            @unlink("{$this->iterate_dir}/".self::MASTER_LOG_FILE);
-        }
-    }
-
-    /**
-     * Restores state from a previous instantiation, after the last batch of
-     * pages extracted.
-     */
-    function restoreCheckpoint()
-    {
-        $info = unserialize(file_get_contents(
-            "{$this->result_dir}/iterate_status.txt"));
-        $this->end_of_iterator = $info['end_of_iterator'];
-        $this->current_page_num = $info['current_page_num'];
-        $this->log_iterator = $info['log_iterator'];
-        return $info;
-    }
-
-    /**
-     * Pulls data from all the log files in the directory and place it in
-     * the master log file and splits the file into log records
-     * @return array  of log records from the master log file
-     */
-    function createMasterAndRecords(){
-        if(file_exists("{$this->iterate_dir}/".self::MASTER_LOG_FILE)) {
-            @unlink("{$this->iterate_dir}/".self::MASTER_LOG_FILE);
-        }
-        for ($i=0; $i < $this->num_partitions; $i++){
-            $file_data = file_get_contents($this->partitions[$i]);
-            file_put_contents("{$this->iterate_dir}/".self::MASTER_LOG_FILE,
-                 $file_data, FILE_APPEND);
-        }
-        $recordArray = explode("\n",
-                file_get_contents("{$this->iterate_dir}/".
-                        self::MASTER_LOG_FILE));
-
-        return $recordArray;
-    }
-
-    /**
-     * Unserializes the array of log records stored when save
-     * options is clicked and stores them in the global arrays.
-     */
-     function getFieldDetails()
-     {
-        $fieldArray = unserialize(file_get_contents(
-            "{$this->index_dir}/fields_data".$this->result_timestamp.".txt"));
-        foreach($fieldArray as $field=>$field_nt){
-            $matches = explode("::",$field_nt);
-            $this->fields[] = $field;
-            $this->field_names[] = $matches[0];
-            $this->field_types[] = $matches[1];
-        }
-     }
-
-    /**
-     * Takes the log record as input and parses the log record and returns
-     * the array containing the details of each record.
-     *
-     * @param string $record single log record in a file
-     * @return array $matches matched fields of a log record
-     */
-     function parseLogRecord($record)
-     {
-        $content = "";
-        $field_types_len = count($this->field_types);
-        for($j = 0; $j < $field_types_len; $j++) {
-            $content = "";
-            $matches = array();
-            preg_match(
-        $this->regular_exprs[$this->logfields_type_ddm[$this->field_types[$j]]],
-        $record,$matches);
-            if (count($matches)>0) {
-                $spaces_removed = trim($matches[0]);
-                $record = str_replace($spaces_removed,"",$record);
-                $return_page[$this->logfields_type_ddm[$this->field_types[$j]]]
-                        = $matches[0];
-                }
-                else {
-                $return_page[$this->logfields_type_ddm[$this->field_types[$j]]]
-                        = "-";
-                }
-        }
-        return $return_page;
-     }
-
-    /**
-     * Gets the next at most $num many docs from the iterator. It might return
-     * less than $num many documents if the partition changes or the end of the
-     * bundle is reached.
-     *
-     * @param int $num number of docs to get
-     * @return array associative arrays for $num pages
-     */
-    function nextPages($num)
-    {
-        $this->getFieldDetails();
-        $pages = array();
-        $page_count = 0;
-        for($i = 0; $i < $num; $i++) {
-            $this->current_page_num++;
-            $this->page_info
-               = $this->parseLogRecord($this->records[$this->current_page_num]);
-            $page = $this->nextPage();
-            if($this->current_page_num >= $this->num_of_records) {
-               $this->end_of_iterator = true;
-               break;
-            }
-            else {
-               $pages[] = $page;
-               $page_count++;
-           }
-        }
-        $this->saveCheckpoint();
-        return $pages;
-    }
-
-
-    /**
-     * Gets the next doc from the iterator
-     * @return array associative array for doc
-     */
-    function nextPage()
-    {
-        $site = array();
-        $field_nt = "";
-        $fields_count = count($this->page_info);
-        $dom = new DOMDocument('1.0');
-        $root =$dom->createElement('html');
-        $root = $dom->appendChild($root);
-        $head = $dom->createElement('head');
-        $head = $root->appendChild($head);
-        $title = $dom->createElement('title');
-        $title = $head->appendChild($title);
-        for($i=0;$i<$fields_count;$i++){
-            if($this->logfields_type_ddm[$this->field_types[$i]] =="Request"
-                && $this->page_info['Request'] !="-") {
-                $recordTitle = "Line ".$this->current_page_num.":".
-        $this->page_info[$this->logfields_type_ddm[$this->field_types[$i]]];
-                break;
-                }
-        }
-        if($recordTitle == ""){
-                $recordTitle = "Line ".$this->current_page_num;
-        }
-        $text = $dom->createTextNode($recordTitle);
-        $text = $title->appendChild($text);
-        $body = $dom->createElement('body');
-        $body = $root->appendChild($body);
-        $field = $dom->createElement('p');
-        $field = $body->appendChild($field);
-        for($i=0;$i<$fields_count;$i++){
-            $field_nt .= $this->field_names[$i].":".
-        $this->page_info[$this->logfields_type_ddm[$this->field_types[$i]]]
-        ."<br/>";
-        }
-        $text1 = $dom->createTextNode($field_nt);
-        $text1 = $field->appendChild($text1);
-        $desc = $dom->createElement('p');
-        $desc = $body->appendChild($desc);
-        $text3 = "This is line ".$this->current_page_num;
-        $text2 = $dom->createTextNode($text3);
-        $text2 = $desc->appendChild($text2);
-        $site[self::PAGE] =$dom->saveHTML();
-        $html_page = "<html><head><title>LogRecord".$this->current_page_num.
-                "</title></head><body><h1>".
-                "The details of the log record are: <br/></h1><h3>".
-                $field_nt."</h3></body></html>";
-        file_put_contents("{$this->index_dir}/HTML_FILES/logrecord".
-                $this->current_page_num.".php",$html_page);
-        $site[self::URL] = "http://localhost/".$this->path_for_html_files.
-                "cache/IndexData".$this->result_timestamp.
-                "/HTML_FILES/logrecord"
-                .$this->current_page_num.".php";
-        $site[self::TYPE] ="text/html";
-        $site[self::HTTP_CODE] = 200;
-        $site[self::ENCODING] = "UTF-8";
-        $site[self::SERVER] = "unknown";
-        $site[self::SERVER_VERSION] = "unknown";
-        $site[self::OPERATING_SYSTEM] = "unknown";
-        $site[self::HASH] = FetchUrl::computePageHash($site[self::PAGE]);
-        $site[self::WEIGHT] = 1;
-
-        return $site;
-    }
-}
-?>
diff --git a/lib/crawl_constants.php b/lib/crawl_constants.php
index 801d960af..6ad21a61d 100644
--- a/lib/crawl_constants.php
+++ b/lib/crawl_constants.php
@@ -210,11 +210,8 @@ interface CrawlConstants
     const LINK_SEEN_URLS = 'cj';
     const POST_MAX_SIZE = 'ck';
     const LOGGING = 'cl';
-    const LOG_RECORDS = 'cm';
-    const DATABASE_RECORDS = 'cn';
-    const DATABASE_CONNECTION_DETAILS = 'co';

     const NEEDS_OFFSET_FLAG = 0x7FFFFFFF;

 }
-?>
\ No newline at end of file
+?>
diff --git a/scripts/suggest.js b/scripts/suggest.js
index 564e50f96..fc693626d 100644
--- a/scripts/suggest.js
+++ b/scripts/suggest.js
@@ -104,7 +104,10 @@ function onTypeTerm(event, text_field)
     search_list_array = new Object();
     scroll_horz = false;

-    out_query = transliterate(query);
+    out_query = false;
+    if(typeof transliterate == 'function') {
+        out_query = transliterate(query);
+    }
     if(out_query && out_query.length > 0)
     {
        input_term = out_query;
diff --git a/views/elements/crawloptions_element.php b/views/elements/crawloptions_element.php
index 26383333d..910aca795 100644
--- a/views/elements/crawloptions_element.php
+++ b/views/elements/crawloptions_element.php
@@ -159,287 +159,6 @@ class CrawloptionsElement extends Element
             <div class="center red"><?php
             e(tl('crawloptions_element_need_api_for_mix')); ?></div>
         <?php } ?>
-
-        <script>
-        obj = document.getElementById("crawl-indexes");
-        obj.onchange = function(){crawloptionsForm.submit();}
-        </script>
-
-        <?php $data['logfields_type'] = array(
-        1=>'IP_Address',
-        2=>'Timestamp',
-        3=>'URL',
-        4=>'Status Code',
-        5=>'User Agent',
-        6=>'Request',
-        7=>'Int');
-
-        $flag = false;
-
-        /* If log files are selected as the option */
-        if(isset($_POST['crawl_indexes'])
-            && $data['available_crawl_indexes'][$_POST['crawl_indexes']]
-            == 'ARCFILE::Log Files') {
-                $LogFolderPath = CRAWL_DIR.'/cache/archives';
-                foreach(glob($LogFolderPath."/*") as $folder){
-                  if(is_dir($folder)){
-                    if(file_exists("$folder/arc_description.ini")){
-                      $contents =
-                        file_get_contents("$folder/arc_description.ini");
-                      if(strpos($contents,"LogArchiveBundle")
-                        == true){
-                        $flag = true;
-                        $LogFolderPath = $folder;
-                      }
-                    }
-                  }
-                  if($flag == true) {break;}
-                }
-
-        /*Get all the file names into an array*/
-        $filenames = glob($LogFolderPath."/*.log");
-        /*Retrieve the first filename*/
-        $firstFile = $filenames[0];
-        /*Split the file content into an array*/
-        $l_delim = "\n";
-        $file_array = explode($l_delim, file_get_contents($firstFile));
-        echo "<br/><b>".tl('crawloptions_element_first_line_text')."</b><br/>";
-        echo "<br/>".$file_array[0]."<br/>";
-
-        ?>
-
-        <div id="Log_Records" class="top-margin"><b><?php
-            e(tl('crawloptions_element_log_records_details'))?></b></div>
-
-        <table class="log-records-table">
-            <tr><th><?php e(tl('crawloptions_element_field'));?></th>
-            <th><?php e(tl('crawloptions_element_field_name')); ?></th>
-            <th><?php e(tl('crawloptions_element_field_type')); ?></th></tr>
-        <?php
-            $i = 0;
-            foreach($data['LOG_RECORDS'] as $field => $field_nt) {
-                $matches = explode("::",$field_nt);
-        ?>
-            <tr><td class="input-field" >
-                <input
-                       title="<?php e(tl('crawloptions_element_field')); ?>"
-                       name="LOG_RECORDS[<?php e($i); ?>][FIELD]"
-                       value="<?php e($field); ?>"
-                />
-                </td>
-                <td class="input-field-name">
-                <input
-                     title="<?php e(tl('crawloptions_element_field_name')); ?>"
-                     name="LOG_RECORDS[<?php e($i); ?>]['FIELD_NAME']"
-                     value="<?php e($matches[0]); ?>"
-                />
-                </td>
-                <td class="input-field-type" >
-                <?php $this->view->optionsHelper->render(
-                        'field-types',
-                        "LOG_RECORDS[$i]['FIELD_TYPE']",
-                        $data['logfields_type'],
-                        $matches[1]);
-                ?>
-                </td>
-            </tr>
-            <?php
-                $i++;
-                }
-                if($i==0){
-            ?>
-            <tr><td class="input-field">
-                <input
-                       type="text"
-                       title="New Field"
-                       name="LOG_RECORDS[<?php e($i); ?>][FIELD]"
-                       value=""
-                />
-                </td>
-                <td class="input-field-name">
-                <input
-                       type="text"
-                       title="New Field Name"
-                       name="LOG_RECORDS[<?php e($i); ?>]['FIELD_NAME']"
-                       value=""
-                />
-                </td>
-                <td class="input-field-type">
-                <?php $this->view->optionsHelper->render(
-                        'field-types',
-                        "LOG_RECORDS[$i]['FIELD_TYPE']",
-                        $data['logfields_type'],
-                        1);
-                ?>
-                </td>
-            </tr>
-
-                <?php } ?>
-
-                <?php
-                    if(isset($_POST['add_fields']) && $i>0){
-                ?>
-            <tr>
-                <td class="input-field">
-                <input
-                       type="text"
-                       title="New Field"
-                       name="LOG_RECORDS[<?php e($i); ?>]['FIELD']"
-                       value=""
-                />
-                </td>
-                <td class="input-field-name">
-                <input
-                       type="text"
-                       title="New Field Name"
-                       name="LOG_RECORDS[<?php e($i); ?>]['FIELD_NAME']"
-                       value=""
-                />
-                </td>
-                <td class="input-field-type">
-                <?php $this->view->optionsHelper->render(
-                        'field-types',
-                        "LOG_RECORDS[$i]['FIELD_TYPE']",
-                        $data['logfields_type'],
-                        1);
-                ?>
-                </td>
-            </tr>
-
-                <?php } ?>
-        </table>
-        <?php
-        if(isset($_POST['save_options'])
-        && $data['available_crawl_indexes'][$_POST['crawl_indexes']]
-        == 'ARCFILE::Log Files'){
-            file_put_contents($LogFolderPath."/fields_data.txt",
-                serialize($data['LOG_RECORDS']));
-        }
-        ?>
-        <div class="log-record-new-field">
-        <input
-               type="submit"
-               id="add-fields"
-               name="add_fields"
-               value="<?php e(tl('crawloptions_element_add_new_field')); ?>"
-        />
-        </div>
-        <?php } ?>
-
-        <?php
-        if(isset($_POST['crawl_indexes']) &&
-            $data['available_crawl_indexes'][$_POST['crawl_indexes']]
-            == 'ARCFILE::Database files') {
-            $flag1 = false;
-            $DatabaseFolderPath = CRAWL_DIR.'/cache/archives';
-            foreach(glob($DatabaseFolderPath."/*") as $folder){
-                if(is_dir($folder)){
-                    if(file_exists("$folder/arc_description.ini")){
-                        $contents =
-                            file_get_contents("$folder/arc_description.ini");
-                        if(strpos($contents,"Database files") == true){
-                            $flag1 = true;
-                            $DatabaseFolderPath = $folder;
-                        }
-                    }
-                }
-                if($flag1 == true) {break;}
-            }
-        ?>
-
-        <div id="Database_Connection_Details" class="top-margin"><b><?php
-            e(tl('crawloptions_element_database_connection_details'))?></b>
-        </div><br/>
-
-        <table class="database-connection-details-table">
-           <tr><td class="input-name"><?php
-                    e(tl('crawloptions_element_hostname'))?>
-                </td>
-                <td class="input-data">
-                <input
-                       title="<?php e(tl('crawloptions_element_hostname')); ?>"
-                       name="DATABASE_CONNECTION_DETAILS[HOSTNAME]"
-                       value=""
-                />
-                </td>
-            </tr>
-            <tr>
-                <td class="input-name"><?php
-                    e(tl('crawloptions_element_username'))?>
-                </td>
-                <td class="input-data">
-                <input
-                       title="<?php e(tl('crawloptions_element_username')); ?>"
-                       name="DATABASE_CONNECTION_DETAILS[USERNAME]"
-                       value=""
-                />
-                </td>
-            </tr>
-            <tr>
-                <td class="input-name"><?php
-                    e(tl('crawloptions_element_password'))?>
-                </td>
-                <td class="input-data">
-                <input
-                       title="<?php e(tl('crawloptions_element_password')); ?>"
-                       name="DATABASE_CONNECTION_DETAILS[PASSWORD]"
-                       value=""
-                />
-                </td>
-            </tr>
-            <tr>
-                <td class="input-name"><?php
-                    e(tl('crawloptions_element_databasename'))?>
-                </td>
-                <td class="input-data">
-                <input
-                   title= "<?php e(tl('crawloptions_element_databasename')); ?>"
-                   name="DATABASE_CONNECTION_DETAILS[DATABASENAME]"
-                   value=""
-                />
-                </td>
-            </tr>
-            <tr>
-                <td class="input-name"><?php
-                    e(tl('crawloptions_element_query'))?>
-                </td>
-                <td class="input-data">
-                <input
-                       title="<?php e(tl('crawloptions_element_query')); ?>"
-                       name="DATABASE_CONNECTION_DETAILS[QUERY]"
-                       value=""
-                />
-                </td>
-            </tr>
-        </table>
-        <div class="database-connection-details-submit">
-            <input type="submit"
-                   id="submit-details"
-                   name="submit_details"
-                   value="<?php e(tl('crawloptions_element_submit')); ?>"
-            />
-        </div>
-
-        <?php
-        if(isset($_POST['submit_details'])){
-            file_put_contents($DatabaseFolderPath.
-                "/database_connection_details.txt",
-                serialize($data['DATABASE_CONNECTION_DETAILS']));
-        }
-        ?>
-        <?php
-        if(isset($_POST['save_options'])
-            && $data['available_crawl_indexes'][$_POST['crawl_indexes']]
-            == 'ARCFILE::Database files'){
-            file_put_contents($DatabaseFolderPath.
-                "/database_connection_details.txt",
-                serialize($data['DATABASE_CONNECTION_DETAILS']));
-        }
-        ?>
-
-        <?php } ?>
-
-
         </div>
         <?php } ?>
         </div>
ViewGit