Fixes Bug 4, also adds more documentation to various files

Chris Pollett [2010-07-19 06:Jul:th]
Fixes Bug 4, also adds more documentation to various files
Filename
bin/fetcher.php
bin/queue_server.php
bot.php
configs/config.php
controllers/search_controller.php
index.php
lib/bloom_filter_bundle.php
lib/fetch_url.php
lib/phrase_parser.php
lib/porter_stemmer.php
lib/string_array.php
lib/unit_test.php
lib/web_archive_bundle.php
lib/web_queue_bundle.php
locale/extract_merge.php
models/crawl_model.php
tests/index.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index e633f1690..bdae4bbb5 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -31,6 +31,7 @@
  * @filesource
  */

+/** Calculate base directory of script */
 define("BASE_DIR",
     substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].$_SERVER["SCRIPT_NAME"],
     0, -strlen("bin/fetcher.php")));
diff --git a/bin/queue_server.php b/bin/queue_server.php
index 39a158124..9b76ad89d 100755
--- a/bin/queue_server.php
+++ b/bin/queue_server.php
@@ -31,6 +31,7 @@
  * @filesource
  */

+/** Calculate base directory of script */
 define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].
     $_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0,
     -strlen("bin/queue_server.php")));
diff --git a/bot.php b/bot.php
index c0938914f..0398ed170 100755
--- a/bot.php
+++ b/bot.php
@@ -34,6 +34,7 @@
  * @filesource
  */

+/** Calculate base directory of script */
 define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].
     $_SERVER["SCRIPT_NAME"], 0,
     -strlen("bot.php")));
diff --git a/configs/config.php b/configs/config.php
index 8a2a9c778..ec23c5acd 100755
--- a/configs/config.php
+++ b/configs/config.php
@@ -35,8 +35,11 @@
  * @filesource
  */
 if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
+/** bit of DEBUG_LEVEL used to indicate test cases should be displayable*/
 define('TEST_INFO', 1);
+/** bit of DEBUG_LEVEL used to indicate query statistics should be displayed*/
 define('QUERY_INFO', 2);
+/** bit of DEBUG_LEVEL used to indicate php messages should be displayed*/
 define('ERROR_INFO', 4);
 date_default_timezone_set('America/Los_Angeles');

@@ -84,105 +87,125 @@ if((DEBUG_LEVEL & ERROR_INFO) == ERROR_INFO) {
     error_reporting(0);
 }

-if( (DEBUG_LEVEL & TEST_INFO) == TEST_INFO) {
-    define('DISPLAY_TESTS', true);
-} else {
-    define('DISPLAY_TESTS', false);
-}
+/** if true tests are diplayable*/
+define('DISPLAY_TESTS', ((DEBUG_LEVEL & TEST_INFO) == TEST_INFO));

-if( (DEBUG_LEVEL & QUERY_INFO) == QUERY_INFO) {
-    define('QUERY_STATISTICS', true);
-} else {
-    define('QUERY_STATISTICS', false);
-}
+/** if true query statistics are diplayed */
+define('QUERY_STATISTICS', ((DEBUG_LEVEL & QUERY_INFO) == QUERY_INFO));

 if(!PROFILE) {
     return;
 }
 /*+++ End machine generated code, feel free to edit the below as desired +++*/

+/** this is the User-Agent names the crawler provides
+ * a web-server it is crawling
+ */
 define('USER_AGENT',
     'Mozilla/5.0 (compatible; '.USER_AGENT_SHORT.'  +'.QUEUE_SERVER.'bot.php)');
-    /* this is the User-Agent names the crawler provides
-       a web-server it is crawling
-     */
+
+/** name of the cookie used to manage the session
+ * (store language and perpage settings)
+ */
 define ('SESSION_NAME', "yioopbiscuit");
-    /* name of the cookie used to manage the session
-       (store language and perpage settings)
-     */

+/** maximum size of a log file before it is rotated */
 define("MAX_LOG_FILE_SIZE", 5000000);
-    // maximum size of a log file before it is rotated
+
+/** number of log files to rotate amongst */
 define("NUMBER_OF_LOG_FILES", 5);
-    // number of log files to rotate amongst

+/**
+ * how long in seconds to keep a cache of a robot.txt
+ * file before re-requesting it
+ */
 define('CACHE_ROBOT_TXT_TIME', 86400);
-    /* how long in seconds to keep a cache of a robot.txt
-      file before re-requesting it
-     */
+
+/**
+ * if the robots.txt has a Crawl-delay larger than this
+ * value don't crawl the site.
+ * maximum value for this is 255
+ */
 define('MAXIMUM_CRAWL_DELAY', 64);
-    /* if the robots.txt has a Crawl-delay larger than this
-       value don't crawl the site.
-       maximum value for this is 255
-    */
-define('MAX_WAITING_HOSTS', 1000);
-    //maximum number of active crawl-delayed hosts

+/** maximum number of active crawl-delayed hosts */
+define('MAX_WAITING_HOSTS', 1000);

+/**
+ * bloom filters are used to keep track of which urls are visited,
+ * this parameter determines up to how many
+ * urls will be stored in a single filter. Additional filters are
+ * read to and from disk.
+ */
 define('URL_FILTER_SIZE', 10000000);
-    /* bloom filters are used to keep track of which urls are visited,
-       this parameter determines up to how many
-       urls will be stored in a single filter. Additional filters are
-       read to and from disk.
-     */
+
+/** number of fetchers that will be used in a given crawl */
 define('NUM_FETCHERS', 3);
-    // number of fetchers that will be used in a given crawl
+
+/**
+ * maximum number of urls that will be held in ram
+ * (as opposed to in files) in the priority queue
+ */
 define('NUM_URLS_QUEUE_RAM', 300000);
-    /* maximum number of urls that will be held in ram
-       (as opposed to in files) in the priority queue
-     */

+/** Minimum weight in priority queue before rebuilt*/
 define('MIN_QUEUE_WEIGHT', 1/100000);
-define('NUM_ARCHIVE_PARTITIONS', 10);
-    // number of web archive files to use to store web pages in
+
+/**  number of web archive files to use to store web pages in */
+define('NUM_ARCHIVE_PARTITIONS', 10);
+
+/**
+ * number of web archive files to use for the inverted index of
+ * word->docs in a given generation
+ */
 define('NUM_INDEX_PARTITIONS', 250);
-    /* number of web archive files to use for the inverted index of
-      word->docs in a given generation
-     */
-define('NUM_WORDS_PER_GENERATION', 6*URL_FILTER_SIZE/NUM_INDEX_PARTITIONS);
-    // number of words before next gen

-define('SAMPLE_GENERATIONS', 3);
-    // number of generations to sample in estimating number of urls in a query
+/** number of words before next gen */
+define('NUM_WORDS_PER_GENERATION', 6*URL_FILTER_SIZE/NUM_INDEX_PARTITIONS);

+/** number of generations to sample in estimating number of urls in a query */
+define('SAMPLE_GENERATIONS', 3);

+/** store inlink data in word inverted index */
 define('STORE_INLINKS_IN_DICTIONARY', false);
-    //store inlink data in word inverted index
+
+/** precision to round floating points document scores */
 define('PRECISION', 10);
-    // precision to round floating points document scores
+
+/**
+ * when index data from relatively uncommon words,
+ * how many docs should be grouped together in a block
+ */
 define('BLOCK_SIZE', 50);
-    /* when index data from relatively uncommon words,
-       how many docs should be grouped together in a block
-     */
+
+/** how many documents a word needs to be to get its own index file. */
 define('COMMON_WORD_THRESHOLD', 1000);
-    // how many documents a word needs to be to get its own index file.

+/** maximum number of links to consider on any given page */
 define('MAX_LINKS_PER_PAGE', 50);
-    // maximum number of links to consider on any given page
+
+/**  maximum number of words from links to consider on any given page */
 define('MAX_LINKS_WORD_TEXT', 200);
-    // maximum number of words from links to consider on any given page
-define('PAGE_RANGE_REQUEST', 50000); // request this many bytes out of a page
-define('MAX_PHRASE_LEN', 2); //maximum length +1 exact phrase matches

+/** request this many bytes out of a page */
+define('PAGE_RANGE_REQUEST', 50000);
+
+/** maximum length +1 exact phrase matches */
+define('MAX_PHRASE_LEN', 2);
+
+/** number of multi curl page requests in one go */
 define('NUM_MULTI_CURL_PAGES', 100);
-    //number of multi curl page requests in one go
+
+/** time in seconds before we give up on a page */
 define('PAGE_TIMEOUT', 30);
-    //time in seconds before we give up on a page

+/** how often should we make in OPIC the sum of weights totals MAX_URLS */
 define('NORMALIZE_FREQUENCY', 10000);
-    // how often should we make in OPIC the sum of weights totals MAX_URLS
-

+/**
+ * @global array file extensions which can be handled by the search engine,
+ * other extensions will be ignored
+ */
 $INDEXED_FILE_TYPES =
     array(  "html",
             "htm",
@@ -207,6 +230,10 @@ $INDEXED_FILE_TYPES =
             "gif",
             "png");

+/**
+ * @global array associates mimetypes that can be processed by the search
+ * engine with the processor class that can process them
+ */
 $PAGE_PROCESSORS = array(   "text/html" => "HtmlProcessor",
                             "text/asp" => "HtmlProcessor",

@@ -229,19 +256,21 @@ $PAGE_PROCESSORS = array(   "text/html" => "HtmlProcessor",



-
+/**
+ * How many non robot urls the fetcher successfully downloads before
+ * between times data sent back to queue server
+ */
 define ('SEEN_URLS_BEFORE_UPDATE_SCHEDULER', 500);
-define ('MAX_FETCH_SIZE', 5000);
-    //maximum number of urls to schedule to a given fetcher in one go
+
+/** maximum number of urls to schedule to a given fetcher in one go */
+define ('MAX_FETCH_SIZE', 5000);
+
+/** fetcher must wait at least this long between multi-curl requests */
 define ('MINIMUM_FETCH_LOOP_TIME', 5);
-    //fetcher must wait at least this long between multi-curl requests

-/*
- * searching and admin
- */
+/** default number of search results to display per page */
 define ('NUM_RESULTS_PER_PAGE', 10);
-    //default number of search results to display per page

+/** Number of recently crawled urls to display on admin screen */
 define ('NUM_RECENT_URLS_TO_DISPLAY', 10);
-    // Number of recently crawled urls to display on admin screen
 ?>
diff --git a/controllers/search_controller.php b/controllers/search_controller.php
index cc2668737..c3e67715e 100755
--- a/controllers/search_controller.php
+++ b/controllers/search_controller.php
@@ -154,8 +154,14 @@ class SearchController extends Controller implements CrawlConstants
             }
         }

+        $token_okay = $this->checkCSRFToken('YIOOP_TOKEN', $user);
+        if($token_okay === false) {
+            unset($_SESSION['USER_ID']);
+            $user = $_SERVER['REMOTE_ADDR'];
+        }
         $data['YIOOP_TOKEN'] = $this->generateCSRFToken($user);

+
         $data['ELAPSED_TIME'] = changeInMicrotime($start_time);
         $this->displayView($view, $data);
     }
@@ -172,9 +178,9 @@ class SearchController extends Controller implements CrawlConstants
      *      argument provides auxiliary information on how to conduct the
      *      search. For instance on a related web page search, it might provide
      *      the url of the site with which to perform the related search.
-     *  @param int $results_per_page the maixmum number of search results
+     * @param int $results_per_page the maixmum number of search results
      *      that can occur on a page
-     *  @return array an array of at most results_per_page many search results
+     * @return array an array of at most results_per_page many search results
      */
     function processQuery($query, $activity, $arg, $results_per_page)
     {
@@ -236,7 +242,7 @@ class SearchController extends Controller implements CrawlConstants
      * This method parses the raw query string for query activities.
      * It parses the name of each activity and its argument
      *
-     *  @return array list of search activities parsed out of the search string
+     * @return array list of search activities parsed out of the search string
      */
     function extractActivityQuery() {

diff --git a/index.php b/index.php
index 2bfa007b2..f2e6e3093 100755
--- a/index.php
+++ b/index.php
@@ -35,6 +35,7 @@
  * @filesource
  */

+/** Calculate base directory of script */
 define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].
     $_SERVER["SCRIPT_NAME"], 0, -strlen("index.php")));

diff --git a/lib/bloom_filter_bundle.php b/lib/bloom_filter_bundle.php
index 16d7cc5a7..6710bd8d9 100644
--- a/lib/bloom_filter_bundle.php
+++ b/lib/bloom_filter_bundle.php
@@ -47,12 +47,34 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
 class BloomFilterBundle
 {

+    /**
+     * Reference to the filter which will be used to store new data
+     * @var object
+     */
     var $current_filter;
+    /**
+     * Total number of filter that this filter bundle currently has
+     * @var int
+     */
     var $num_filters;
+    /**
+     * The number of items which have been stored in the current filter
+     * @var int
+     */
     var $current_filter_count;
+    /**
+     * The maximum capacity of a filter in this filter bundle
+     * @var int
+     */
     var $filter_size;
+    /**
+     * The folder name of this filter bundle
+     * @var string
+     */
     var $dir_name;
-
+    /**
+     * The default maximum size of a filter in a filter bundle
+     */
     const default_filter_size = 10000000;

     /**
@@ -156,7 +178,8 @@ class BloomFilterBundle
     }

     /**
-     *
+     * Saves the meta data (number of filter, number of items stored, and size)
+     * of the bundle
      */
     public function saveMetaData()
     {
@@ -169,7 +192,7 @@ class BloomFilterBundle
     }

     /**
-     *
+     * Used to save to disk all the file data associated with this bundle
      */
     public function forceSave()
     {
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 0899b9d06..738201f4f 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -33,6 +33,11 @@

 if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}

+/**
+ * Reads in constants used as enums used for storing web sites
+ */
+require_once BASE_DIR."/lib/crawl_constants.php";
+
 /**
  *
  * Code used to manage HTTP requests from one or more URLS
@@ -41,10 +46,7 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
  *
  * @package seek_quarry
  * @subpackage library
- */
-
-require_once BASE_DIR."/lib/crawl_constants.php";
-
+ */
 class FetchUrl implements CrawlConstants
 {

diff --git a/lib/phrase_parser.php b/lib/phrase_parser.php
index 3c4d9142a..7cc49d41f 100755
--- a/lib/phrase_parser.php
+++ b/lib/phrase_parser.php
@@ -36,9 +36,13 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
 /**
  *  load the stem word function, if necessary
  */
-
 require_once BASE_DIR."/lib/porter_stemmer.php";

+/**
+ * Reads in constants used as enums used for storing web sites
+ */
+require_once BASE_DIR."/lib/crawl_constants.php";
+
 /**
  * library of functions used to manipulate words and phrases
  *
@@ -48,8 +52,6 @@ require_once BASE_DIR."/lib/porter_stemmer.php";
  * @package seek_quarry
  * @subpackage librarys
  */
-require_once BASE_DIR."/lib/crawl_constants.php";
-
 class PhraseParser
 {
     /**
diff --git a/lib/porter_stemmer.php b/lib/porter_stemmer.php
index 8e953dd36..74f6d3f99 100755
--- a/lib/porter_stemmer.php
+++ b/lib/porter_stemmer.php
@@ -71,8 +71,9 @@ class PorterStemmer
     }

     /**
-     *  Checks to see if the ith character in the buffer is a consonant
+     * Checks to see if the ith character in the buffer is a consonant
      *
+     * @param int $i the character to check
      */
     private static function cons($i)
     {
@@ -97,8 +98,6 @@ class PorterStemmer
      *    <c>vcvcvc<v> gives 3
      *    ....
      */
-
-
     private static function m()
     {
         $n = 0;
@@ -270,7 +269,6 @@ class PorterStemmer
     /* step2() maps double suffices to single ones. so -ization ( = -ize plus
        -ation) maps to -ize etc.Note that the string before the suffix must give
        m() > 0. */
-
     private static function step2()
     {
         if(self::$k < 1) return;
diff --git a/lib/string_array.php b/lib/string_array.php
index 4355753bf..7d61ec1c5 100755
--- a/lib/string_array.php
+++ b/lib/string_array.php
@@ -51,10 +51,25 @@ require_once "persistent_structure.php";
 class StringArray extends PersistentStructure
 {

+    /**
+     *
+     */
     var $filename;
+    /**
+     *
+     */
     var $num_values;
+    /**
+     *
+     */
     var $array_size;
+    /**
+     *
+     */
     var $data_size;
+    /**
+     *
+     */
     var $string_array;


diff --git a/lib/unit_test.php b/lib/unit_test.php
index 8dcb2dea2..0270714ed 100644
--- a/lib/unit_test.php
+++ b/lib/unit_test.php
@@ -42,20 +42,31 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
  */
 abstract class UnitTest
 {
+
+    /**
+     * Used to store the results for each test sub case
+     */
     var $test_case_results;
+    /**
+     * Used to hold objects to be used in tests
+     * @var array
+     */
     var $test_objects;
-
+    /**
+     * The suffix that all TestCase methods need to have to be called by run()
+     */
     const case_name = "TestCase";
-    const case_name_len = 8;
     /**
-     *
+     * Contructor should be overriden to do any set up that occurs before
+     * and test cases
      */
     public function __construct()
     {
     }

     /**
-     *
+     * Execute each of the test cases of this unit test and return the results
+     * @return array test case results
      */
     public function run()
     {
@@ -68,7 +79,7 @@ abstract class UnitTest
             $len = strlen($method);

             if(substr_compare(
-                $method, self::case_name, $len - self::case_name_len) == 0) {
+                $method, self::case_name, $len - strlen(self::case_name)) == 0){
                 $this->test_case_results = array();
                 $this->$method();
                 $test_results[$method] = $this->test_case_results;
@@ -80,7 +91,11 @@ abstract class UnitTest
     }

     /**
-     *
+     * Checks that $x can coerced to true, the result of the
+     * test is added to $this->test_case_results
+     *
+     * @param mixed $x item to check
+     * @param string $description information about this test subcase
      */
     public function assertTrue($x, $description = "")
     {
@@ -96,7 +111,11 @@ abstract class UnitTest
     }

     /**
-     *
+     * Checks that $x can coerced to false, the result of the
+     * test is added to $this->test_case_results
+     *
+     * @param mixed $x item to check
+     * @param string $description information about this test subcase
      */
     public function assertFalse($x, $description = "")
     {
@@ -112,7 +131,12 @@ abstract class UnitTest
     }

     /**
+     * Checks that $x and $y are the same, the result of the
+     * test is added to $this->test_case_results
      *
+     * @param mixed $x a first item to compare
+     * @param mixed $y a second item to compare
+     * @param string $description information about this test subcase
      */
     public function assertEqual($x, $y, $description = "")
     {
@@ -128,7 +152,12 @@ abstract class UnitTest
     }

     /**
+     * Checks that $x and $y are not the same, the result of the
+     * test is added to $this->test_case_results
      *
+     * @param mixed $x a first item to compare
+     * @param mixed $y a second item to compare
+     * @param string $description information about this test subcase
      */
     public function assertNotEqual($x, $y, $description = "")
     {
@@ -144,12 +173,13 @@ abstract class UnitTest
     }

     /**
-     *
+     * This method is called before each test case is run to set up the
+     * given test case
      */
     abstract public function setUp();

     /**
-     *
+     * This method is called after each test case is run to clean up
      */
     abstract public function tearDown();

diff --git a/lib/web_archive_bundle.php b/lib/web_archive_bundle.php
index aefbc891d..72c50d3d3 100755
--- a/lib/web_archive_bundle.php
+++ b/lib/web_archive_bundle.php
@@ -34,17 +34,26 @@
 if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}

 /**
- * Load files we're dependent on if neccesary
+ * A WebArchiveBundle is a collection of WebArchive, so need definition of
+ * web archive
  */
 require_once 'web_archive.php';
+/**
+ *
+ */
 require_once 'bloom_filter_file.php';
+/**
+ *
+ */
 require_once 'bloom_filter_bundle.php';
+/**
+ *
+ */
 require_once 'gzip_compressor.php';



 /**
- *
  * A web archive bundle is a collection of web archives which are managed
  * together.It is useful to split data across several archive files rather than
  * just store it in one, for both read efficiency and to keep filesizes from
@@ -59,13 +68,37 @@ require_once 'gzip_compressor.php';
 class WebArchiveBundle
 {

+    /**
+     *
+     */
     var $dir_name;
+    /**
+     *
+     */
     var $filter_size;
+    /**
+     *
+     */
     var $partition = array();
+    /**
+     *
+     */
     var $page_exists_filter_bundle;
+    /**
+     *
+     */
     var $num_partitions;
+    /**
+     *
+     */
     var $count;
+    /**
+     *
+     */
     var $description;
+    /**
+     *
+     */
     var $compressor;

     /**
diff --git a/lib/web_queue_bundle.php b/lib/web_queue_bundle.php
index dbdec0bc0..13a5abad3 100755
--- a/lib/web_queue_bundle.php
+++ b/lib/web_queue_bundle.php
@@ -34,14 +34,33 @@
 if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}

 /**
- * Load classes we're dependent on
+ * We use a variety of bloom filters for handling robots.txt data
  */
 require_once 'bloom_filter_file.php';
-require_once 'bloom_filter_bundle.php';
+/**
+ * Data on which urls we've already crawled is stored in a bloom filter bundle
+ */
+require_once 'bloom_filter_bundle.php';
+/**
+ * Priority queue is used to store a 8 byte ids of urls to crawl next
+ */
 require_once 'priority_queue.php';
+/**
+ * Hash table is used to store for each id in the priority queue an offset into
+ * a web archive for that urls id actual complete url
+ */
 require_once 'hash_table.php';
-require_once 'non_compressor.php';
+/**
+ * Urls are stored in a web archive using a filter that does no compression
+ */
+require_once 'non_compressor.php';
+/**
+ *  Used to store to crawl urls
+ */
 require_once 'web_archive.php';
+/**
+ *  Used for the crawlHash function
+ */
 require_once 'utility.php';

 /**
@@ -54,20 +73,67 @@ require_once 'utility.php';
 class WebQueueBundle implements Notifier
 {

+    /**
+     * The folder name of this WebQueueBundle
+     * @var string
+     */
     var $dir_name;
+    /**
+     *
+     * @var int
+     */
     var $filter_size;
+    /**
+     *
+     * @var int
+     */
     var $num_urls_ram;
+    /**
+     *
+     * @var int
+     */
     var $min_or_max;
-
+    /**
+     *
+     * @var object
+     */
     var $to_crawl_queue;
+    /**
+     *
+     * @var object
+     */
     var $to_crawl_table;
+    /**
+     *
+     * @var int
+     */
     var $hash_rebuild_count;
+    /**
+     *
+     * @var int
+     */
     var $max_hash_ops_before_rebuild;
+    /**
+     *
+     * @var object
+     */
     var $to_crawl_archive;

     var $url_exists_filter_bundle;
+    /**
+     *
+     * @var object
+     */
     var $got_robottxt_filter;
+    /**
+     *
+     * @var object
+     */
     var $dissallowed_robot_filter;
+    /**
+     *
+     * @var object
+     */
     var $crawl_delay_filter;

     const max_url_archive_offset = 1000000000;
diff --git a/locale/extract_merge.php b/locale/extract_merge.php
index 44bffb0ac..91f55b574 100755
--- a/locale/extract_merge.php
+++ b/locale/extract_merge.php
@@ -44,6 +44,7 @@ if(isset($_SERVER['DOCUMENT_ROOT']) && strlen($_SERVER['DOCUMENT_ROOT']) > 0) {
     exit();
 }

+/** Calculate base directory of script */
 define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].
     $_SERVER["SCRIPT_NAME"], 0,
     -strlen("locale/extract_merge.php")));
@@ -78,13 +79,13 @@ updateLocales($general_ini, $strings);

 /**
  * Cycles through locale subdirectories in LOCALE_DIR, for each
- * locale it merges out the current gneral_ini and strings data.
+ * locale it merges out the current gwneral_ini and strings data.
  * It deletes identifiers that are not in strings, it adds new identifiers
  * and it leaves existing identifier translation pairs untouched.
  *
  * @param array $general_ini  data that would typically come from the
  *      general.ini file
- * @param array $string lines from what is equivalent to an ini file of
+ * @param array $strings lines from what is equivalent to an ini file of
  *      msg_id msg_string pairs these lines also have comments on the file
  *      that strings were extracted from
  *
diff --git a/models/crawl_model.php b/models/crawl_model.php
index 6922de4b6..1dcdff303 100755
--- a/models/crawl_model.php
+++ b/models/crawl_model.php
@@ -60,7 +60,7 @@ class CrawlModel extends Model implements CrawlConstants
     /**
      * Stores the name of the current index archive to use to get search
      * results from
-     * @Var string
+     * @var string
      */
     var $index_name;

diff --git a/tests/index.php b/tests/index.php
index 1ea3c119b..f6546c77f 100644
--- a/tests/index.php
+++ b/tests/index.php
@@ -34,6 +34,7 @@
  * @filesource
  */

+/** Calculate base directory of script */
 define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].
     $_SERVER["SCRIPT_NAME"], 0,
     -strlen("tests/index.php")));
ViewGit