diff --git a/FAQ b/FAQ index 241699d2c..807cd41cc 100644 --- a/FAQ +++ b/FAQ @@ -57,7 +57,8 @@ Extending Yioop =============== Q: How do I write a page processor for a new mime type? -Q: How do I get the Yioop engine to work with a different database management system? +Q: How do I get the Yioop engine to work with a different database management +system? Q: How do I get the Yioop engine to use a different crawl order algorithm? diff --git a/bin/fetcher.php b/bin/fetcher.php index 5569be9d8..e633f1690 100755 --- a/bin/fetcher.php +++ b/bin/fetcher.php @@ -40,13 +40,16 @@ ini_set("memory_limit","500M"); //so have enough memory to crawl big pages /** Load in global configuration settings */ require_once BASE_DIR.'/configs/config.php'; if(!PROFILE) { - echo "Please configure the search engine instance by visiting its web interface on localhost.\n"; + echo "Please configure the search engine instance by visiting" . + "its web interface on localhost.\n"; exit(); } /** get the database library based on the current database type */ require_once BASE_DIR."/models/datasources/".DBMS."_manager.php"; -/** caches of web pages are stored in a web archive bundle, so we load in its definition */ +/** caches of web pages are stored in a + * web archive bundle, so we load in its definition + */ require_once BASE_DIR."/lib/web_archive_bundle.php"; /** get processors for different file types */ @@ -76,23 +79,25 @@ require_once BASE_DIR."/lib/crawl_constants.php"; mb_internal_encoding("UTF-8"); mb_regex_encoding("UTF-8"); - /** - * This class is responsible for fetching web pages for the SeekQuarry/Yioop search engine + * This class is responsible for fetching web pages for the + * SeekQuarry/Yioop search engine * - * Fetcher periodically queries the queue server asking for web pages to fetch. It gets at most - * MAX_FETCH_SIZE many web pages from the queue_server in one go. It then fetches these - * pages. Pages are fetched in batches of NUM_MULTI_CURL_PAGES many pages. - * Each SEEN_URLS_BEFORE_UPDATE_SCHEDULER many downloaded pages (not including robot pages), - * the fetcher sends summaries back to the machine on which the queue_server lives. It does - * this by making a request of the web server on that machine and POSTs the data to the - * seek quarry web app. This data is handled by the FetchController class. The summary data can - * include up to four things: (1) robot.txt data, (2) summaries of each web page downloaded in the - * batch, (3), a list of future urls to add to the to-crawl queue, and (4) a partial inverted index - * saying for each word that occurred in the current SEEN_URLS_BEFORE_UPDATE_SCHEDULER documents - * batch, what documents it occurred in. The inverted index also associates to each word document - * pair several scores. More information on these scores can be found in the documentation for - * {@link buildMiniInvertedIndex()} + * Fetcher periodically queries the queue server asking for web pages to fetch. + * It gets at most MAX_FETCH_SIZE many web pages from the queue_server in one + * go. It then fetches these pages. Pages are fetched in batches of + * NUM_MULTI_CURL_PAGES many pages. Each SEEN_URLS_BEFORE_UPDATE_SCHEDULER many + * downloaded pages (not including robot pages), the fetcher sends summaries + * back to the machine on which the queue_server lives. It does this by making a + * request of the web server on that machine and POSTs the data to the + * yioop web app. This data is handled by the FetchController class. The + * summary data can include up to four things: (1) robot.txt data, (2) summaries + * of each web page downloaded in the batch, (3), a list of future urls to add + * to the to-crawl queue, and (4) a partial inverted index saying for each word + * that occurred in the current SEEN_URLS_BEFORE_UPDATE_SCHEDULER documents + * batch, what documents it occurred in. The inverted index also associates to + * each word document pair several scores. More information on these scores can + * be found in the documentation for {@link buildMiniInvertedIndex()} * * @author Chris Pollett * @package seek_quarry @@ -141,14 +146,15 @@ class Fetcher implements CrawlConstants $this->sum_seen_site_link_length = 0; $this->num_seen_sites = 0; - //we will get the correct values for the next two things from the queue_server + //we will get the correct crawl order from the queue_server $this->crawl_order = "OPIC"; } /** - * This is the function that should be called to get the fetcher to start fetching. - * Calls init to handle the command line arguments then enters the fetcher's main loop + * This is the function that should be called to get the fetcher to start + * fetching. Calls init to handle the command line arguments then enters + * the fetcher's main loop */ function start() { @@ -172,10 +178,12 @@ class Fetcher implements CrawlConstants while ($info[self::STATUS] != self::STOP_STATE) { - if(file_exists(CRAWL_DIR."/schedules/fetcher_messages.txt")) { - $info = unserialize(file_get_contents(CRAWL_DIR."/schedules/fetcher_messages.txt")); - unlink(CRAWL_DIR."/schedules/fetcher_messages.txt"); - if(isset($info[self::STATUS]) && $info[self::STATUS] == self::STOP_STATE) {continue;} + $fetcher_message_file = CRAWL_DIR."/schedules/fetcher_messages.txt"; + if(file_exists($fetcher_message_file)) { + $info = unserialize(file_get_contents($fetcher_message_file)); + unlink($fetcher_message_file); + if(isset($info[self::STATUS]) && + $info[self::STATUS] == self::STOP_STATE) {continue;} } $info = $this->checkScheduler(); @@ -189,13 +197,15 @@ class Fetcher implements CrawlConstants continue; } + $tmp_base_name = CRAWL_DIR."/cache/". + self::archive_base_name.$info[self::CRAWL_TIME]; if($this->web_archive == NULL || (isset($info[self::CRAWL_TIME]) && - $this->web_archive->dir_name != CRAWL_DIR."/cache/".self::archive_base_name.$info[self::CRAWL_TIME])) { + $this->web_archive->dir_name != $tmp_base_name)) { if(isset($this->web_archive->dir_name)) { crawlLog("Old name: ".$this->web_archive->dir_name); } - $this->web_archive = new WebArchiveBundle(CRAWL_DIR.'/cache/'.self::archive_base_name.$info[self::CRAWL_TIME], - URL_FILTER_SIZE, NUM_ARCHIVE_PARTITIONS); + $this->web_archive = new WebArchiveBundle($tmp_base_name, + URL_FILTER_SIZE, NUM_ARCHIVE_PARTITIONS); $this->crawl_time = $info[self::CRAWL_TIME]; $this->sum_seen_title_length = 0; $this->sum_seen_description_length = 0; @@ -215,7 +225,8 @@ class Fetcher implements CrawlConstants $sites = $this->getFetchSites(); if(!$sites) { crawlLog("No seeds to fetch..."); - sleep(max(0, ceil(MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time)))); + sleep(max(0, ceil( + MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time)))); continue; } @@ -224,13 +235,15 @@ class Fetcher implements CrawlConstants $deduplicated_pages = $this->deleteSeenPages($site_pages); $start_time = microtime(); - $summarized_site_pages = $this->processFetchPages($deduplicated_pages); - + $summarized_site_pages = + $this->processFetchPages($deduplicated_pages); + crawlLog("Number summarize pages".count($summarized_site_pages)); $this->updateFoundSites($summarized_site_pages); - sleep(max(0, ceil(MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time)))); + sleep(max(0, ceil( + MINIMUM_FETCH_LOOP_TIME - changeInMicrotime($start_time)))); } //end while crawlLog("Fetcher shutting down!!"); @@ -243,7 +256,8 @@ class Fetcher implements CrawlConstants $dirs = glob(CRAWL_DIR.'/cache/*', GLOB_ONLYDIR); foreach($dirs as $dir) { - if(strlen($pre_timestamp = strstr($dir, self::archive_base_name)) > 0) { + if(strlen( + $pre_timestamp = strstr($dir, self::archive_base_name)) > 0) { $time = substr($pre_timestamp, strlen(self::archive_base_name)); if(!in_array($time, $still_active_crawls) ){ $this->db->unlinkRecursive($dir); @@ -254,10 +268,11 @@ class Fetcher implements CrawlConstants } /** - * Makes a request of the queue server machine to get the timestamp of the currently running crawl to see if changed + * Makes a request of the queue server machine to get the timestamp of the + * currently running crawl to see if changed * - * Get the timestamp from queue_server of the currently running crawl, if the timestamp has changed - * drop the rest of the current fetch batch. + * Get the timestamp from queue_server of the currently running crawl, + * if the timestamp has changed drop the rest of the current fetch batch. */ function checkCrawlTime() { @@ -267,13 +282,17 @@ class Fetcher implements CrawlConstants $time = time(); $session = md5($time . AUTH_KEY); - //if just restarted, check to make sure the crawl hasn't changed, if it has bail - $request = $queue_server."?c=fetch&a=crawlTime&time=$time&session=$session"; + /* if just restarted, check to make sure the crawl hasn't changed, + if it has bail + */ + $request = + $queue_server."?c=fetch&a=crawlTime&time=$time&session=$session"; $info_string = FetchUrl::getPage($request); $info = @unserialize(trim($info_string)); - if(isset($info[self::CRAWL_TIME]) && $info[self::CRAWL_TIME] != $this->crawl_time) { + if(isset($info[self::CRAWL_TIME]) + && $info[self::CRAWL_TIME] != $this->crawl_time) { $this->to_crawl = array(); // crawl has changed. Dump rest of batch. } @@ -296,7 +315,8 @@ class Fetcher implements CrawlConstants $time = time(); $session = md5($time . AUTH_KEY); - $request = $queue_server."?c=fetch&a=schedule&time=$time&session=$session"; + $request = + $queue_server."?c=fetch&a=schedule&time=$time&session=$session"; $info_string = FetchUrl::getPage($request); $info = unserialize(trim($info_string)); @@ -381,20 +401,24 @@ class Fetcher implements CrawlConstants function deleteSeenPages(&$site_pages) { $start_time = microtime(); - crawlLog(" Delete duplicated pages time".(changeInMicrotime($start_time))); $deduplicated_pages = array(); - $unseen_page_hashes = $this->web_archive->differencePageKeysFilter($site_pages, self::HASH); + $unseen_page_hashes = + $this->web_archive->differencePageKeysFilter($site_pages, + self::HASH); foreach($site_pages as $site) { if( isset($site[self::ROBOT_PATHS])) { $deduplicated_pages[] = $site; - } else if (isset($site[self::HASH]) && in_array($site[self::HASH], $unseen_page_hashes)) { + } else if (isset($site[self::HASH]) && in_array($site[self::HASH], + $unseen_page_hashes)) { $this->web_archive->addPageFilter(self::HASH, $site); $deduplicated_pages[] = $site; } } + crawlLog(" Delete duplicated pages time". + (changeInMicrotime($start_time))); return $deduplicated_pages; } @@ -448,7 +472,8 @@ class Fetcher implements CrawlConstants $processor = new $page_processor(); - $doc_info = $processor->process($site[self::PAGE], $site[self::URL]); + $doc_info = $processor->process($site[self::PAGE], + $site[self::URL]); if($doc_info) { @@ -472,7 +497,8 @@ class Fetcher implements CrawlConstants $stored_site_pages[$i][self::URL] = $site[self::URL]; - $stored_site_pages[$i][self::TIMESTAMP] = $site[self::TIMESTAMP]; + $stored_site_pages[$i][self::TIMESTAMP] = + $site[self::TIMESTAMP]; $stored_site_pages[$i][self::TYPE] = $site[self::TYPE]; if(isset($site[self::ENCODING])) { $encoding = $site[self::ENCODING]; @@ -480,38 +506,49 @@ class Fetcher implements CrawlConstants $encoding = "UTF-8"; } $stored_site_pages[$i][self::ENCODING] = $encoding; - $stored_site_pages[$i][self::HTTP_CODE] = $site[self::HTTP_CODE]; + $stored_site_pages[$i][self::HTTP_CODE] = + $site[self::HTTP_CODE]; $stored_site_pages[$i][self::HASH] = $site[self::HASH]; $stored_site_pages[$i][self::PAGE] = $site[self::PAGE]; - $summarized_site_pages[$i][self::URL] = strip_tags($site[self::URL]); - $summarized_site_pages[$i][self::TITLE] = strip_tags($site[self::DOC_INFO][self::TITLE]); // stripping html to be on the safe side - $summarized_site_pages[$i][self::DESCRIPTION] = strip_tags($site[self::DOC_INFO][self::DESCRIPTION]); - $summarized_site_pages[$i][self::TIMESTAMP] = $site[self::TIMESTAMP]; + $summarized_site_pages[$i][self::URL] = + strip_tags($site[self::URL]); + $summarized_site_pages[$i][self::TITLE] = strip_tags( + $site[self::DOC_INFO][self::TITLE]); + // stripping html to be on the safe side + $summarized_site_pages[$i][self::DESCRIPTION] = + strip_tags($site[self::DOC_INFO][self::DESCRIPTION]); + $summarized_site_pages[$i][self::TIMESTAMP] = + $site[self::TIMESTAMP]; $summarized_site_pages[$i][self::ENCODING] = $encoding; $summarized_site_pages[$i][self::HASH] = $site[self::HASH]; $summarized_site_pages[$i][self::TYPE] = $site[self::TYPE]; - $summarized_site_pages[$i][self::HTTP_CODE] = $site[self::HTTP_CODE]; + $summarized_site_pages[$i][self::HTTP_CODE] = + $site[self::HTTP_CODE]; $summarized_site_pages[$i][self::WEIGHT] = $site[self::WEIGHT]; if(isset($site[self::DOC_INFO][self::LINKS])) { - $summarized_site_pages[$i][self::LINKS] = $site[self::DOC_INFO][self::LINKS]; + $summarized_site_pages[$i][self::LINKS] = + $site[self::DOC_INFO][self::LINKS]; } if(isset($site[self::DOC_INFO][self::THUMB])) { - $summarized_site_pages[$i][self::THUMB] = $site[self::DOC_INFO][self::THUMB]; + $summarized_site_pages[$i][self::THUMB] = + $site[self::DOC_INFO][self::THUMB]; } $i++; } } // end for - $stored_site_pages = $this->web_archive->addPages(self::HASH, self::OFFSET, $stored_site_pages); + $stored_site_pages = $this->web_archive->addPages(self::HASH, + self::OFFSET, $stored_site_pages); $num_pages = count($stored_site_pages); for($i = 0; $i < $num_pages; $i++) { $summarized_site_pages[$i][self::INDEX] = $num_items + $i; if(isset($stored_site_pages[$i][self::OFFSET])) { - $summarized_site_pages[$i][self::OFFSET] = $stored_site_pages[$i][self::OFFSET]; + $summarized_site_pages[$i][self::OFFSET] = + $stored_site_pages[$i][self::OFFSET]; } } @@ -540,7 +577,8 @@ class Fetcher implements CrawlConstants $robot_rows = array(); foreach($lines as $line) { if(stristr($line, "User-agent") && (stristr($line, ":*") - || stristr($line, " *") || stristr($line, USER_AGENT_SHORT) || $add_rule_state)) { + || stristr($line, " *") || stristr($line, USER_AGENT_SHORT) + || $add_rule_state)) { $add_rule_state = ($add_rule_state) ? false : true; } @@ -557,7 +595,8 @@ class Fetcher implements CrawlConstants if(stristr($line, "Crawl-delay")) { - $delay_string = trim(preg_replace('/Crawl\-delay\:/i', "", $line)); + $delay_string = trim( + preg_replace('/Crawl\-delay\:/i', "", $line)); $delay_flag = true; } } @@ -588,9 +627,11 @@ class Fetcher implements CrawlConstants if(isset($site[self::ROBOT_PATHS])) { $host = UrlParser::getHost($site[self::URL]); - $this->found_sites[self::ROBOT_TXT][$host][self::PATHS] = $site[self::ROBOT_PATHS]; + $this->found_sites[self::ROBOT_TXT][$host][self::PATHS] = + $site[self::ROBOT_PATHS]; if(isset($site[self::CRAWL_DELAY])) { - $this->found_sites[self::ROBOT_TXT][$host][self::CRAWL_DELAY] = $site[self::CRAWL_DELAY]; + $this->found_sites[self::ROBOT_TXT][$host][ + self::CRAWL_DELAY] = $site[self::CRAWL_DELAY]; } } else { $this->found_sites[self::SEEN_URLS][] = $site; @@ -618,7 +659,8 @@ class Fetcher implements CrawlConstants foreach($link_urls as $link_url) { if(strlen($link_url) > 0) { - $this->found_sites[self::TO_CRAWL][] = array($link_url, $weight); + $this->found_sites[self::TO_CRAWL][] = + array($link_url, $weight); } } @@ -626,7 +668,8 @@ class Fetcher implements CrawlConstants } //end else if(isset($this->found_sites[self::TO_CRAWL])) { - $this->found_sites[self::TO_CRAWL] = array_filter($this->found_sites[self::TO_CRAWL]); + $this->found_sites[self::TO_CRAWL] = + array_filter($this->found_sites[self::TO_CRAWL]); } crawlLog($site[self::INDEX].". ".$site[self::URL]); @@ -634,7 +677,9 @@ class Fetcher implements CrawlConstants if(count($this->to_crawl) <= 0 || - ( isset($this->found_sites[self::SEEN_URLS]) && count($this->found_sites[self::SEEN_URLS]) > SEEN_URLS_BEFORE_UPDATE_SCHEDULER)) { + ( isset($this->found_sites[self::SEEN_URLS]) && + count($this->found_sites[self::SEEN_URLS]) > + SEEN_URLS_BEFORE_UPDATE_SCHEDULER)) { $this->updateScheduler(); } @@ -654,12 +699,15 @@ class Fetcher implements CrawlConstants $this->found_sites[self::SCHEDULE_TIME] = $this->schedule_time; } - if(isset($this->found_sites[self::SEEN_URLS]) && count($this->found_sites[self::SEEN_URLS]) > 0 ) { + if(isset($this->found_sites[self::SEEN_URLS]) && + count($this->found_sites[self::SEEN_URLS]) > 0 ) { $this->buildMiniInvertedIndex(); } - $post_data = array('c'=>'fetch', 'a'=>'update', 'crawl_time' => $this->crawl_time, 'machine_uri' => WEB_URI); - $post_data['found'] = urlencode(base64_encode(gzcompress(serialize($this->found_sites)))); + $post_data = array('c'=>'fetch', 'a'=>'update', + 'crawl_time' => $this->crawl_time, 'machine_uri' => WEB_URI); + $post_data['found'] = urlencode(base64_encode( + gzcompress(serialize($this->found_sites)))); $bytes_to_send = strlen($post_data['found']); $this->found_sites = array(); // reset found_sites so have more space. @@ -679,15 +727,19 @@ class Fetcher implements CrawlConstants $post_data['session'] = $session; $info_string = FetchUrl::getPage($queue_server, $post_data); - crawlLog("Updating Queue Server, sending approximately $bytes_to_send bytes:"); + crawlLog( + "Updating Queue Server, sending approximately" . + " $bytes_to_send bytes:"); $info = unserialize(trim($info_string)); crawlLog("Queue Server info response code: ".$info[self::STATUS]); crawlLog("Queue Server's crawl time is: ".$info[self::CRAWL_TIME]); - } while(!isset($info[self::STATUS]) || $info[self::STATUS] != self::CONTINUE_STATE); + } while(!isset($info[self::STATUS]) || + $info[self::STATUS] != self::CONTINUE_STATE); - if(isset($info[self::CRAWL_TIME]) && $info[self::CRAWL_TIME] != $this->crawl_time) { + if(isset($info[self::CRAWL_TIME]) && + $info[self::CRAWL_TIME] != $this->crawl_time) { $this->to_crawl = array(); // crawl has changed. Dump rest of batch. } @@ -702,8 +754,10 @@ class Fetcher implements CrawlConstants $words = array(); $doc_statistics = $this->computeDocumentStatistics(); $average_title_length = $doc_statistics[self::AVERAGE_TITLE_LENGTH]; - $average_description_length = $doc_statistics[self::AVERAGE_DESCRIPTION_LENGTH]; - $average_total_link_text_length = $doc_statistics[self::AVERAGE_TOTAL_LINK_TEXT_LENGTH]; + $average_description_length = + $doc_statistics[self::AVERAGE_DESCRIPTION_LENGTH]; + $average_total_link_text_length = + $doc_statistics[self::AVERAGE_TOTAL_LINK_TEXT_LENGTH]; foreach($doc_statistics as $doc_key => $info) { @@ -712,49 +766,72 @@ class Fetcher implements CrawlConstants $link_length = $info[self::LINK_LENGTH]; $title_ratio = $title_length/$average_title_length; - $description_ratio = $description_length/$average_description_length; + $description_ratio = + $description_length/$average_description_length; $link_ratio = $link_length/$average_total_link_text_length; if(isset($info[self::TITLE_WORDS])) { - foreach($info[self::TITLE_WORDS] as $word_key => $num_occurrences) { + foreach($info[self::TITLE_WORDS] + as $word_key => $num_occurrences) { $title_frequency = $num_occurrences/$title_length; - $words[crawlHash($word_key)][$doc_key][self::TITLE_WORD_SCORE] = - number_format(3 * $title_frequency/($title_frequency + .5 + 1.5* $title_ratio), - PRECISION); - $words[crawlHash($word_key)][$doc_key][self::DESCRIPTION_WORD_SCORE] = 0; // will set in a moment if has value - $words[crawlHash($word_key)][$doc_key][self::LINK_WORD_SCORE] = 0; + $words[crawlHash($word_key)][$doc_key][ + self::TITLE_WORD_SCORE] = + number_format(3 * $title_frequency/ + ($title_frequency + .5 + 1.5* $title_ratio), + PRECISION); + $words[crawlHash($word_key)][$doc_key][ + self::DESCRIPTION_WORD_SCORE] = 0; + // will set in a moment if has value + $words[crawlHash($word_key)][$doc_key][ + self::LINK_WORD_SCORE] = 0; } } if(isset($info[self::DESCRIPTION_WORDS])) { - foreach($info[self::DESCRIPTION_WORDS] as $word_key => $num_occurrences) { - $description_frequency = $num_occurrences/$description_length; - - $words[crawlHash($word_key)][$doc_key][self::DESCRIPTION_WORD_SCORE] = - number_format(3 * $description_frequency/($description_frequency - + .5 + 1.5* $description_ratio), PRECISION); - if(!isset($words[crawlHash($word_key)][$doc_key][self::TITLE_WORD_SCORE])) { - $words[crawlHash($word_key)][$doc_key][self::TITLE_WORD_SCORE] = 0; + foreach($info[self::DESCRIPTION_WORDS] + as $word_key => $num_occurrences) { + $description_frequency = + $num_occurrences/$description_length; + + $words[crawlHash($word_key)][$doc_key][ + self::DESCRIPTION_WORD_SCORE] = + number_format(3 * $description_frequency/ + ($description_frequency + + .5 + 1.5* $description_ratio), PRECISION); + + if(!isset($words[crawlHash($word_key)][$doc_key][ + self::TITLE_WORD_SCORE])) { + $words[crawlHash($word_key)][$doc_key][ + self::TITLE_WORD_SCORE] = 0; } - - $words[crawlHash($word_key)][$doc_key][self::LINK_WORD_SCORE] = 0; + + $words[crawlHash($word_key)][$doc_key][ + self::LINK_WORD_SCORE] = 0; } } if(isset($info[self::LINK_WORDS])) { - foreach($info[self::LINK_WORDS] as $word_key => $num_occurrences) { + foreach($info[self::LINK_WORDS] + as $word_key => $num_occurrences) { $link_frequency = $num_occurrences/$link_length; - $words[crawlHash($word_key)][$doc_key][self::LINK_WORD_SCORE] = - number_format(3 * $link_frequency/($link_frequency + .5 + 1.5* $link_ratio), PRECISION); + $words[crawlHash($word_key)][$doc_key][ + self::LINK_WORD_SCORE] = number_format( + 3 * $link_frequency/ + ($link_frequency + .5 + 1.5* $link_ratio), + PRECISION); - if(!isset($words[crawlHash($word_key)][$doc_key][self::TITLE_WORD_SCORE])) { - $words[crawlHash($word_key)][$doc_key][self::TITLE_WORD_SCORE] = 0; + if(!isset($words[crawlHash($word_key)][$doc_key][ + self::TITLE_WORD_SCORE])) { + $words[crawlHash($word_key)][$doc_key][ + self::TITLE_WORD_SCORE] = 0; } - if(!isset($words[crawlHash($word_key)][$doc_key][self::DESCRIPTION_WORD_SCORE])) { - $words[crawlHash($word_key)][$doc_key][self::DESCRIPTION_WORD_SCORE] = 0; + if(!isset($words[crawlHash($word_key)][$doc_key][ + self::DESCRIPTION_WORD_SCORE])) { + $words[crawlHash($word_key)][$doc_key][ + self::DESCRIPTION_WORD_SCORE] = 0; } } } @@ -764,10 +841,13 @@ class Fetcher implements CrawlConstants foreach($words as $word_key => $docs_info) { foreach($docs_info as $doc_key => $info) { $doc_depth = $doc_statistics[$doc_key][self::DOC_DEPTH]; - $doc_rank = (11 - $doc_depth) + $doc_statistics[$doc_key][self::URL_WEIGHT]; - $words[$word_key][$doc_key][self::DOC_RANK] = number_format($doc_rank, PRECISION); //our proxy for page rank + $doc_rank = (11 - $doc_depth) + + $doc_statistics[$doc_key][self::URL_WEIGHT]; + $words[$word_key][$doc_key][self::DOC_RANK] = + number_format($doc_rank, PRECISION); //proxy for page rank - $orphan = (isset($info[self::LINK_WORDS]) && count($info[self::LINK_WORDS]) > 0 ) ? 1 : .5; + $orphan = (isset($info[self::LINK_WORDS]) && + count($info[self::LINK_WORDS]) > 0 ) ? 1 : .5; $words[$word_key][$doc_key][self::SCORE] = number_format( .8*($doc_rank) @@ -778,22 +858,28 @@ class Fetcher implements CrawlConstants } } - if(STORE_INLINKS_IN_DICTIONARY && isset($doc_statistics[self::INLINKS])) { - foreach($doc_statistics[self::INLINKS] as $url_word_key => $docs_info) { + if(STORE_INLINKS_IN_DICTIONARY && + isset($doc_statistics[self::INLINKS])) { + foreach($doc_statistics[self::INLINKS] + as $url_word_key => $docs_info) { foreach($docs_info as $doc_key) { - $doc_depth = $doc_statistics[$doc_key][self::DOC_DEPTH] + 1; + $doc_depth = $doc_statistics[$doc_key][self::DOC_DEPTH] + 1; $words[$url_word_key][$doc_key][self::TITLE_WORD_SCORE] = 0; - $words[$url_word_key][$doc_key][self::DESCRIPTION_WORD_SCORE] = 0; + $words[$url_word_key][$doc_key][ + self::DESCRIPTION_WORD_SCORE] = 0; $words[$url_word_key][$doc_key][self::LINK_WORD_SCORE] = 0; - $words[$url_word_key][$doc_key][self::DOC_RANK] = number_format(11 - $doc_depth, PRECISION); - $words[$url_word_key][$doc_key][self::SCORE] = number_format(11 - $doc_depth, PRECISION); + $words[$url_word_key][$doc_key][self::DOC_RANK] = + number_format(11 - $doc_depth, PRECISION); + $words[$url_word_key][$doc_key][self::SCORE] = + number_format(11 - $doc_depth, PRECISION); } } } $this->found_sites[self::INVERTED_INDEX] = $words; - crawlLog(" Build mini inverted index time ".(changeInMicrotime($start_time))); + crawlLog(" Build mini inverted index time ". + (changeInMicrotime($start_time))); } @@ -807,43 +893,68 @@ class Fetcher implements CrawlConstants foreach($this->found_sites[self::SEEN_URLS] as $site) { $doc_key = crawlHash($site[self::URL]); - $doc_statistics[$doc_key][self::URL_WEIGHT] = 3 - log(strlen($site[self::URL])); //negative except for short urls - - $title_phrase_string = mb_ereg_replace("[[:punct:]]", " ", $site[self::TITLE]); - $doc_statistics[$doc_key][self::TITLE_WORDS] = PhraseParser::extractPhrasesAndCount($title_phrase_string); - $doc_statistics[$doc_key][self::TITLE_LENGTH] = $this->sumCountArray($doc_statistics[$doc_key][self::TITLE_WORDS]); - $this->sum_seen_site_title_length += $doc_statistics[$doc_key][self::TITLE_LENGTH]; - - $description_phrase_string = mb_ereg_replace("[[:punct:]]", " ", $site[self::DESCRIPTION]); - $doc_statistics[$doc_key][self::DESCRIPTION_WORDS] = PhraseParser::extractPhrasesAndCount($description_phrase_string); - $doc_statistics[$doc_key][self::DESCRIPTION_LENGTH] = $this->sumCountArray($doc_statistics[$doc_key][self::DESCRIPTION_WORDS]); - $this->sum_seen_site_description_length += $doc_statistics[$doc_key][self::DESCRIPTION_LENGTH]; + $doc_statistics[$doc_key][self::URL_WEIGHT] = + 3 - log(strlen($site[self::URL])); //negative except short urls + + $title_phrase_string = + mb_ereg_replace("[[:punct:]]", " ", $site[self::TITLE]); + $doc_statistics[$doc_key][self::TITLE_WORDS] = + PhraseParser::extractPhrasesAndCount($title_phrase_string); + $doc_statistics[$doc_key][self::TITLE_LENGTH] = + $this->sumCountArray( + $doc_statistics[$doc_key][self::TITLE_WORDS]); + $this->sum_seen_site_title_length += + $doc_statistics[$doc_key][self::TITLE_LENGTH]; + + $description_phrase_string = + mb_ereg_replace("[[:punct:]]", " ", $site[self::DESCRIPTION]); + $doc_statistics[$doc_key][self::DESCRIPTION_WORDS] = + PhraseParser::extractPhrasesAndCount( + $description_phrase_string); + $doc_statistics[$doc_key][self::DESCRIPTION_LENGTH] = + $this->sumCountArray( + $doc_statistics[$doc_key][self::DESCRIPTION_WORDS]); + $this->sum_seen_site_description_length += + $doc_statistics[$doc_key][self::DESCRIPTION_LENGTH]; $link_phrase_string = ""; $link_urls = array(); foreach($site[self::LINKS] as $url => $link_text) { $link_phrase_string .= " $link_text"; if(STORE_INLINKS_IN_DICTIONARY) { - $doc_statistics[self::INLINKS][crawlHash($url)][] = $doc_key; + $doc_statistics[self::INLINKS][crawlHash($url)][] =$doc_key; } } - $link_phrase_string = mb_ereg_replace("[[:punct:]]", " ", $link_phrase_string); - $doc_statistics[$doc_key][self::LINK_WORDS] = PhraseParser::extractPhrasesAndCount($link_phrase_string); - $doc_statistics[$doc_key][self::LINK_LENGTH] = $this->sumCountArray($doc_statistics[$doc_key][self::LINK_WORDS]); - $this->sum_seen_site_link_length += $doc_statistics[$doc_key][self::LINK_LENGTH]; - - $doc_statistics[$doc_key][self::DOC_DEPTH] = log($site[self::INDEX]*NUM_FETCHERS, 10); //our proxy for page rank, 10=average links/page + $link_phrase_string = + mb_ereg_replace("[[:punct:]]", " ", $link_phrase_string); + $doc_statistics[$doc_key][self::LINK_WORDS] = + PhraseParser::extractPhrasesAndCount($link_phrase_string); + $doc_statistics[$doc_key][self::LINK_LENGTH] = + $this->sumCountArray( + $doc_statistics[$doc_key][self::LINK_WORDS]); + $this->sum_seen_site_link_length += + $doc_statistics[$doc_key][self::LINK_LENGTH]; + + $doc_statistics[$doc_key][self::DOC_DEPTH] = + log($site[self::INDEX]*NUM_FETCHERS, 10); + //our proxy for page rank, 10=average links/page } - $doc_statistics[self::AVERAGE_TITLE_LENGTH] = $this->sum_seen_site_title_length/$this->num_seen_sites; + $doc_statistics[self::AVERAGE_TITLE_LENGTH] = + $this->sum_seen_site_title_length/$this->num_seen_sites; - $doc_statistics[self::AVERAGE_DESCRIPTION_LENGTH] = $this->sum_seen_site_description_length/$this->num_seen_sites; - - $doc_statistics[self::AVERAGE_TOTAL_LINK_TEXT_LENGTH] = $this->sum_seen_site_link_length/$this->num_seen_sites; - - crawlLog("AVERAGE TITLE LENGTH".$doc_statistics[self::AVERAGE_TITLE_LENGTH]); - crawlLog("AVERAGE DESCRIPTION LENGTH".$doc_statistics[self::AVERAGE_DESCRIPTION_LENGTH]); - crawlLog("AVERAGE TOTAL LINK TEXT LENGTH".$doc_statistics[self::AVERAGE_TOTAL_LINK_TEXT_LENGTH]); + $doc_statistics[self::AVERAGE_DESCRIPTION_LENGTH] = + $this->sum_seen_site_description_length/$this->num_seen_sites; + + $doc_statistics[self::AVERAGE_TOTAL_LINK_TEXT_LENGTH] = + $this->sum_seen_site_link_length/$this->num_seen_sites; + + crawlLog("AVERAGE TITLE LENGTH". + $doc_statistics[self::AVERAGE_TITLE_LENGTH]); + crawlLog("AVERAGE DESCRIPTION LENGTH". + $doc_statistics[self::AVERAGE_DESCRIPTION_LENGTH]); + crawlLog("AVERAGE TOTAL LINK TEXT LENGTH". + $doc_statistics[self::AVERAGE_TOTAL_LINK_TEXT_LENGTH]); return $doc_statistics; } diff --git a/bin/queue_server.php b/bin/queue_server.php index 48c82ef7c..39a158124 100755 --- a/bin/queue_server.php +++ b/bin/queue_server.php @@ -31,7 +31,8 @@ * @filesource */ -define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, +define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT']. + $_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, -strlen("bin/queue_server.php"))); ini_set("memory_limit","900M"); //so have enough memory to crawl big pages @@ -39,7 +40,8 @@ ini_set("memory_limit","900M"); //so have enough memory to crawl big pages /** Load in global configuration settings */ require_once BASE_DIR.'/configs/config.php'; if(!PROFILE) { - echo "Please configure the search engine instance by visiting its web interface on localhost.\n"; + echo "Please configure the search engine instance ". + "by visiting its web interface on localhost.\n"; exit(); } @@ -158,7 +160,8 @@ class QueueServer implements CrawlConstants $this->processRobotUrls(); - if($count < NUM_URLS_QUEUE_RAM - SEEN_URLS_BEFORE_UPDATE_SCHEDULER * MAX_LINKS_PER_PAGE) { + if($count < NUM_URLS_QUEUE_RAM - + SEEN_URLS_BEFORE_UPDATE_SCHEDULER * MAX_LINKS_PER_PAGE) { $info = $this->processQueueUrls(); } @@ -166,7 +169,10 @@ class QueueServer implements CrawlConstants $top = $this->web_queue->peekQueue(); if($top[1] < MIN_QUEUE_WEIGHT) { crawlLog("Normalizing Weights!!\n"); - $this->web_queue->normalize(); // this will undercount the weights of URLS from fetcher data that have not completed + $this->web_queue->normalize(); + /* this will undercount the weights of URLS + from fetcher data that have not completed + */ } if(!file_exists(CRAWL_DIR."/schedules/schedule.txt")) { @@ -187,14 +193,16 @@ class QueueServer implements CrawlConstants function handleAdminMessages($info) { if(file_exists(CRAWL_DIR."/schedules/queue_server_messages.txt")) { - $info = unserialize(file_get_contents(CRAWL_DIR."/schedules/queue_server_messages.txt")); + $info = unserialize(file_get_contents( + CRAWL_DIR."/schedules/queue_server_messages.txt")); unlink(CRAWL_DIR."/schedules/queue_server_messages.txt"); switch($info[self::STATUS]) { case "NEW_CRAWL": $this->startCrawl($info); - crawlLog("Starting new crawl. Timestamp:".$this->crawl_time); + crawlLog( + "Starting new crawl. Timestamp:".$this->crawl_time); break; case "STOP_CRAWL": @@ -203,24 +211,29 @@ class QueueServer implements CrawlConstants } if(isset($this->index_archive)) { $this->index_archive->forceSave(); - // chmod so web server can also write to these directories - $this->db->setWorldPermissionsRecursive(CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time); + // chmod so apahce can also write to these directories + $this->db->setWorldPermissionsRecursive( + CRAWL_DIR.'/cache/'. + self::index_data_base_name.$this->crawl_time); } crawlLog("Stopping crawl !!\n"); $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; break; case "RESUME_CRAWL": - if(isset($info[self::CRAWL_TIME]) && file_exists(CRAWL_DIR.'/cache/'.self::queue_base_name.$info[self::CRAWL_TIME])) { + if(isset($info[self::CRAWL_TIME]) && + file_exists(CRAWL_DIR.'/cache/'. + self::queue_base_name.$info[self::CRAWL_TIME])) { $this->startCrawl($info); crawlLog("Resuming crawl"); } else { $msg = "Restart failed!!! "; if(!isset($info[self::CRAWL_TIME])) { - $msg .= "crawl time of crawl to restart not given\n"; + $msg .="crawl time of crawl to restart not given\n"; } - if(!file_exists(CRAWL_DIR.'/cache/'.self::queue_base_name.$info[self::CRAWL_TIME])) { - $msg .= "queue bundle for crawl to restart does not exist\n"; + if(!file_exists(CRAWL_DIR.'/cache/'. + self::queue_base_name.$info[self::CRAWL_TIME])) { + $msg .= "bundle for crawl restart doesn't exist\n"; } $info["MESSAGE"] = $msg; $crawl_status = array(); @@ -230,7 +243,9 @@ class QueueServer implements CrawlConstants $crawl_status['COUNT'] = 0; $crawl_status['DESCRIPTION'] = $msg; crawlLog($msg); - file_put_contents(CRAWL_DIR."/schedules/crawl_status.txt", serialize($crawl_status)); + file_put_contents( + CRAWL_DIR."/schedules/crawl_status.txt", + serialize($crawl_status)); chmod(CRAWL_DIR."/schedules/crawl_status.txt", 0777); $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; } @@ -267,20 +282,31 @@ class QueueServer implements CrawlConstants $this->index_archive = NULL; gc_collect_cycles(); // garbage collect old crawls - $this->web_queue = new WebQueueBundle(CRAWL_DIR.'/cache/'.self::queue_base_name.$this->crawl_time, URL_FILTER_SIZE, NUM_URLS_QUEUE_RAM, $min_or_max); - - if(!file_exists(CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time)) { - $this->index_archive = new IndexArchiveBundle(CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time, - URL_FILTER_SIZE, NUM_ARCHIVE_PARTITIONS, NUM_INDEX_PARTITIONS, $info['DESCRIPTION']); + $this->web_queue = new WebQueueBundle( + CRAWL_DIR.'/cache/'.self::queue_base_name. + $this->crawl_time, URL_FILTER_SIZE, + NUM_URLS_QUEUE_RAM, $min_or_max); + + if(!file_exists( + CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time)) { + $this->index_archive = new IndexArchiveBundle( + CRAWL_DIR.'/cache/'. + self::index_data_base_name.$this->crawl_time, + URL_FILTER_SIZE, NUM_ARCHIVE_PARTITIONS, + NUM_INDEX_PARTITIONS, $info['DESCRIPTION']); } else { - $this->index_archive = new IndexArchiveBundle(CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time, + $this->index_archive = new IndexArchiveBundle( + CRAWL_DIR.'/cache/'. + self::index_data_base_name.$this->crawl_time, URL_FILTER_SIZE); } // chmod so web server can also write to these directories - $this->db->setWorldPermissionsRecursive(CRAWL_DIR.'/cache/'.self::queue_base_name.$this->crawl_time); - $this->db->setWorldPermissionsRecursive(CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time); - // initialize and store the description of this crawl in the index archive + $this->db->setWorldPermissionsRecursive( + CRAWL_DIR.'/cache/'.self::queue_base_name.$this->crawl_time); + $this->db->setWorldPermissionsRecursive( + CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time); + // initialize, store the description of this crawl in the index archive $info[self::STATUS] = self::CONTINUE_STATE; @@ -295,9 +321,13 @@ class QueueServer implements CrawlConstants $dirs = glob(CRAWL_DIR.'/cache/*', GLOB_ONLYDIR); foreach($dirs as $dir) { - if(strlen($pre_timestamp = strstr($dir, self::queue_base_name)) > 0) { - $timestamp = substr($pre_timestamp, strlen(self::queue_base_name)); - if(!file_exists(CRAWL_DIR.'/cache/'.self::index_data_base_name.$timestamp)) { + if(strlen( + $pre_timestamp = strstr($dir, self::queue_base_name)) > 0) { + $timestamp = + substr($pre_timestamp, strlen(self::queue_base_name)); + if(!file_exists( + CRAWL_DIR.'/cache/'. + self::index_data_base_name.$timestamp)) { $this->db->unlinkRecursive($dir, true); } } @@ -316,8 +346,10 @@ class QueueServer implements CrawlConstants if(isset($old_dir)) { crawlLog("Deleting $old_dir\n"); $this->db->unlinkRecursive($old_dir); - /* The idea is that only go through outer loop more than once if earlier data directory empty - * Note: older directories should only have data dirs or deleting like this might cause problems! + /* The idea is that only go through outer loop more than once + if earlier data directory empty. + Note: older directories should only have data dirs or + deleting like this might cause problems! */ } foreach($files as $file) { @@ -343,7 +375,8 @@ class QueueServer implements CrawlConstants { crawlLog("Checking for index data files to process..."); - $index_dir = CRAWL_DIR."/schedules/".self::index_data_base_name.$this->crawl_time; + $index_dir = CRAWL_DIR."/schedules/". + self::index_data_base_name.$this->crawl_time; $this->processDataFile($index_dir, "processIndexArchive"); crawlLog("done."); } @@ -354,7 +387,9 @@ class QueueServer implements CrawlConstants function processIndexArchive($file) { static $first = true; - crawlLog("Start processing index data memory usage".memory_get_usage() . "..."); + crawlLog( + "Start processing index data memory usage". + memory_get_usage() . "..."); crawlLog("Processing index data in $file..."); $start_time = microtime(); @@ -369,7 +404,8 @@ class QueueServer implements CrawlConstants $machine = $sites[self::MACHINE]; $machine_uri = $sites[self::MACHINE_URI]; - if(isset($sites[self::SEEN_URLS]) && count($sites[self::SEEN_URLS]) > 0) { + if(isset($sites[self::SEEN_URLS]) && + count($sites[self::SEEN_URLS]) > 0) { $seen_sites = $sites[self::SEEN_URLS]; $index_archive->differenceContainsPages($seen_sites, self::HASH); $seen_sites = array_values($seen_sites); @@ -383,15 +419,19 @@ class QueueServer implements CrawlConstants $index_archive->addPageFilter(self::HASH, $seen_sites[$i]); $seen_sites[$i][self::MACHINE] = $machine; $seen_sites[$i][self::MACHINE_URI] = $machine_uri; - $seen_sites[$i][self::HASH_URL] = crawlHash($seen_sites[$i][self::URL]); + $seen_sites[$i][self::HASH_URL] = + crawlHash($seen_sites[$i][self::URL]); } if(isset($seen_sites)) { - $seen_sites = $index_archive->addPages(self::HASH_URL, self::SUMMARY_OFFSET, $seen_sites); + $seen_sites = + $index_archive->addPages( + self::HASH_URL, self::SUMMARY_OFFSET, $seen_sites); $summary_offsets = array(); foreach($seen_sites as $site) { - $summary_offsets[$site[self::HASH_URL]] = $site[self::SUMMARY_OFFSET]; + $summary_offsets[$site[self::HASH_URL]] = + $site[self::SUMMARY_OFFSET]; } crawlLog("B memory usage".memory_get_usage() . " time: ".(changeInMicrotime($start_time))); @@ -402,7 +442,9 @@ class QueueServer implements CrawlConstants foreach( $index_data as $word_key => $docs_info) { foreach($docs_info as $doc_key => $info) { if(isset($summary_offsets[$doc_key])) { - $index_data[$word_key][$doc_key][self::SUMMARY_OFFSET] = $summary_offsets[$doc_key]; + $index_data[$word_key][$doc_key][ + self::SUMMARY_OFFSET] = + $summary_offsets[$doc_key]; } } } @@ -442,7 +484,9 @@ class QueueServer implements CrawlConstants } crawlLog("Checking for Robot.txt files to process..."); - $robot_dir = CRAWL_DIR."/schedules/".self::robot_data_base_name.$this->crawl_time; + $robot_dir = + CRAWL_DIR."/schedules/". + self::robot_data_base_name.$this->crawl_time; $this->processDataFile($robot_dir, "processRobotArchive"); crawlLog("done. "); @@ -469,12 +513,14 @@ class QueueServer implements CrawlConstants } if(isset($robot_info[self::CRAWL_DELAY])) { - $this->web_queue->setCrawlDelay($robot_host, $robot_info[self::CRAWL_DELAY]); + $this->web_queue->setCrawlDelay($robot_host, + $robot_info[self::CRAWL_DELAY]); } if(isset($robot_info[self::ROBOT_PATHS])) { foreach($robot_info[self::ROBOT_PATHS] as $path) { - $this->web_queue->addDisallowedRobotFilter($robot_host.$path); + $this->web_queue->addDisallowedRobotFilter( + $robot_host.$path); } } } @@ -494,7 +540,9 @@ class QueueServer implements CrawlConstants { crawlLog("... unlinking robot schedule files ..."); - $this->db->unlinkRecursive(CRAWL_DIR.'/schedules/'.self::robot_data_base_name.$this->crawl_time, true); + $robot_schedules = CRAWL_DIR.'/schedules/'. + self::robot_data_base_name.$this->crawl_time; + $this->db->unlinkRecursive($robot_schedules, true); crawlLog("... reseting robot bloom filters ..."); $this->web_queue->emptyRobotFilters(); @@ -505,18 +553,25 @@ class QueueServer implements CrawlConstants */ function processQueueUrls() { - crawlLog("Start checking for new URLs data memory usage".memory_get_usage()); + crawlLog("Start checking for new URLs data memory usage". + memory_get_usage()); $info = array(); $info[self::STATUS] = self::CONTINUE_STATE; if(file_exists(CRAWL_DIR."/schedules/".self::schedule_start_name)) { - crawlLog("Start schedule urls".CRAWL_DIR."/schedules/".self::schedule_start_name); - $info = array_merge($info, $this->processDataArchive(CRAWL_DIR."/schedules/".self::schedule_start_name)); + crawlLog( + "Start schedule urls".CRAWL_DIR. + "/schedules/".self::schedule_start_name); + $info = array_merge($info, + $this->processDataArchive( + CRAWL_DIR."/schedules/".self::schedule_start_name)); return $info; } - $schedule_dir = CRAWL_DIR."/schedules/".self::schedule_data_base_name.$this->crawl_time; + $schedule_dir = + CRAWL_DIR."/schedules/". + self::schedule_data_base_name.$this->crawl_time; $this->processDataFile($schedule_dir, "processDataArchive"); crawlLog("done."); @@ -543,10 +598,12 @@ class QueueServer implements CrawlConstants $start_time = microtime(); if(isset($sites[self::SCHEDULE_TIME])) { if(isset($this->waiting_hosts[$sites[self::SCHEDULE_TIME]])) { - $delayed_hosts = $this->waiting_hosts[$sites[self::SCHEDULE_TIME]]; + $delayed_hosts = + $this->waiting_hosts[$sites[self::SCHEDULE_TIME]]; unset($this->waiting_hosts[$sites[self::SCHEDULE_TIME]]); foreach($delayed_hosts as $hash_host) { - unset($this->waiting_hosts[$hash_host]); //allows crawl-delayed host to be scheduled again + unset($this->waiting_hosts[$hash_host]); + //allows crawl-delayed host to be scheduled again } } } @@ -560,7 +617,8 @@ class QueueServer implements CrawlConstants $cnt = 0; foreach($sites[self::SEEN_URLS] as $url) { if($this->web_queue->containsUrlQueue($url)) { - crawlLog("Removing $url from Queue (shouldn't still be there!)"); + crawlLog( + "Removing $url from Queue (shouldn't still be there!)"); $this->web_queue->removeQueue($url); } @@ -596,15 +654,18 @@ class QueueServer implements CrawlConstants $weight = $pair[1]; $host_url = UrlParser::getHost($url); $host_with_robots = $host_url."/robots.txt"; - $robots_in_queue = $this->web_queue->containsUrlQueue($host_with_robots); + $robots_in_queue = + $this->web_queue->containsUrlQueue($host_with_robots); if($this->web_queue->containsUrlQueue($url)) { if($robots_in_queue) { - $this->web_queue->adjustQueueWeight($host_with_robots, $weight); + $this->web_queue->adjustQueueWeight( + $host_with_robots, $weight); } $this->web_queue->adjustQueueWeight($url, $weight); - } else if($this->allowedToCrawlSite($url) && !$this->disallowedToCrawlSite($url) ) { + } else if($this->allowedToCrawlSite($url) && + !$this->disallowedToCrawlSite($url) ) { if(!$this->web_queue->containsGotRobotTxt($host_url) && !$robots_in_queue @@ -639,14 +700,18 @@ class QueueServer implements CrawlConstants $crawl_status['MOST_RECENT_FETCHER'] = $this->most_recent_fetcher; $crawl_status['MOST_RECENT_URLS_SEEN'] = $most_recent_urls; $crawl_status['CRAWL_TIME'] = $this->crawl_time; - $info_bundle = IndexArchiveBundle::getArchiveInfo(CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time); + $info_bundle = IndexArchiveBundle::getArchiveInfo( + CRAWL_DIR.'/cache/'.self::index_data_base_name.$this->crawl_time); $crawl_status['COUNT'] = $info_bundle['COUNT']; $crawl_status['DESCRIPTION'] = $info_bundle['DESCRIPTION']; - file_put_contents(CRAWL_DIR."/schedules/crawl_status.txt", serialize($crawl_status)); + file_put_contents( + CRAWL_DIR."/schedules/crawl_status.txt", serialize($crawl_status)); chmod(CRAWL_DIR."/schedules/crawl_status.txt", 0777); - crawlLog("End checking for new URLs data memory usage".memory_get_usage()); + crawlLog( + "End checking for new URLs data memory usage".memory_get_usage()); - crawlLog("The current crawl description is: ".$info_bundle['DESCRIPTION']); + crawlLog( + "The current crawl description is: ".$info_bundle['DESCRIPTION']); crawlLog("Total seen urls so far: ".$info_bundle['COUNT']); crawlLog("Of these, the most recent urls are:"); foreach($most_recent_urls as $url) { @@ -681,13 +746,15 @@ class QueueServer implements CrawlConstants $sites[self::CRAWL_TIME] = $this->crawl_time; $sites[self::SCHEDULE_TIME] = time(); - $sites[self::SAVED_CRAWL_TIMES] = $this->getCrawlTimes(); // fetcher should delete any crawl time not listed here + $sites[self::SAVED_CRAWL_TIMES] = $this->getCrawlTimes(); + // fetcher should delete any crawl time not listed here $sites[self::CRAWL_ORDER] = $this->crawl_order; $sites[self::SITES] = array(); $delete_urls = array(); $crawl_delay_hosts = array(); - $time_per_request_guess = MINIMUM_FETCH_LOOP_TIME ; // it would be impressive if we can achieve this speed + $time_per_request_guess = MINIMUM_FETCH_LOOP_TIME ; + // it would be impressive if we can achieve this speed $current_crawl_index = -1; @@ -713,12 +780,15 @@ class QueueServer implements CrawlConstants $i++; } else { - $next_slot = $this->getEarliestSlot($current_crawl_index, $sites[self::SITES]); + $next_slot = $this->getEarliestSlot($current_crawl_index, + $sites[self::SITES]); if($next_slot < MAX_FETCH_SIZE) { $sites[self::SITES][$next_slot] = array($url, $weight); $delete_urls[$i] = $url; - // note don't add to seen url filter since check robots every 24 hours as needed + /* note don't add to seen url filter + since check robots every 24 hours as needed + */ $current_crawl_index = $next_slot; $fetch_size++; $i++; @@ -738,9 +808,12 @@ class QueueServer implements CrawlConstants foreach($host_paths as $host_path) { if($this->web_queue->containsDisallowedRobot($host_path)) { $robots_okay = false; - $delete_urls[$i] = $url; //we want to remove from queue since robots forbid it + $delete_urls[$i] = $url; + //want to remove from queue since robots forbid it $this->web_queue->addSeenUrlFilter($url); - // at this point we might miss some sites by marking them seen: the robot url might change in 24 hours + /* at this point we might miss some sites by marking + them seen: the robot url might change in 24 hours + */ break; } } @@ -753,38 +826,55 @@ class QueueServer implements CrawlConstants $delay = $this->web_queue->getCrawlDelay($host_url); $num_waiting = count($this->waiting_hosts); - if($delay > 0 ) { // handle adding a url if there is a crawl delay + if($delay > 0 ) { + // handle adding a url if there is a crawl delay if((!isset($this->waiting_hosts[crawlHash($host_url)]) && $num_waiting < MAX_WAITING_HOSTS) || (isset($this->waiting_hosts[crawlHash($host_url)]) && - $this->waiting_hosts[crawlHash($host_url) ] == $sites[self::SCHEDULE_TIME])) { - - $this->waiting_hosts[crawlHash($host_url)] = $sites[self::SCHEDULE_TIME]; - $this->waiting_hosts[$sites[self::SCHEDULE_TIME]][] = crawlHash($host_url); - $request_batches_per_delay = ceil($delay/$time_per_request_guess); + $this->waiting_hosts[crawlHash($host_url) ] == + $sites[self::SCHEDULE_TIME])) { + + $this->waiting_hosts[crawlHash($host_url)] = + $sites[self::SCHEDULE_TIME]; + $this->waiting_hosts[$sites[self::SCHEDULE_TIME]][] = + crawlHash($host_url); + $request_batches_per_delay = + ceil($delay/$time_per_request_guess); if(!isset($crawl_delay_hosts[$host_url])) { $next_earliest_slot = $current_crawl_index; $crawl_delay_hosts[$host_url] = $next_earliest_slot; } else { - $next_earliest_slot = $crawl_delay_hosts[$host_url] + $request_batches_per_delay*NUM_MULTI_CURL_PAGES; + $next_earliest_slot = $crawl_delay_hosts[$host_url] + + $request_batches_per_delay + * NUM_MULTI_CURL_PAGES; } - if(($next_slot = $this->getEarliestSlot($next_earliest_slot, $sites[self::SITES])) < MAX_FETCH_SIZE) { + if(($next_slot = + $this->getEarliestSlot( $next_earliest_slot, + $sites[self::SITES])) < MAX_FETCH_SIZE) { $crawl_delay_hosts[$host_url] = $next_slot; - $sites[self::SITES][$next_slot] = array($url, $weight); + $sites[self::SITES][$next_slot] = + array($url, $weight); $delete_urls[$i] = $url; - $this->web_queue->addSeenUrlFilter($url); // we might miss some sites by marking them seen after only scheduling them + $this->web_queue->addSeenUrlFilter($url); + /* we might miss some sites by marking them + seen after only scheduling them + */ $fetch_size++; } } } else { // add a url no crawl delay - $next_slot = $this->getEarliestSlot($current_crawl_index, $sites[self::SITES]); + $next_slot = $this->getEarliestSlot( + $current_crawl_index, $sites[self::SITES]); if($next_slot < MAX_FETCH_SIZE) { $sites[self::SITES][$next_slot] = array($url, $weight); $delete_urls[$i] = $url; - $this->web_queue->addSeenUrlFilter($url); // we might miss some sites by marking them seen after only scheduling them + $this->web_queue->addSeenUrlFilter($url); + /* we might miss some sites by marking them + seen after only scheduling them + */ $current_crawl_index = $next_slot; $fetch_size++; @@ -806,8 +896,10 @@ class QueueServer implements CrawlConstants } if(isset($sites[self::SITES]) && count($sites[self::SITES]) > 0 ) { - $dummy_slot = array(self::DUMMY, 0.0); // dummy's are used for crawl delays of sites with longer delays - // when we don't have much else to crawl + $dummy_slot = array(self::DUMMY, 0.0); + /* dummy's are used for crawl delays of sites with longer delays + when we don't have much else to crawl + */ $cnt = 0; for($j = 0; $j < MAX_FETCH_SIZE; $j++) { if(isset( $sites[self::SITES][$j])) { @@ -821,9 +913,13 @@ class QueueServer implements CrawlConstants } ksort($sites[self::SITES]); - file_put_contents(CRAWL_DIR."/schedules/schedule.txt", serialize($sites)); - crawlLog("End Produce Fetch Memory usage".memory_get_usage() ); - crawlLog("Created fetch batch... Queue size is now ".$this->web_queue->to_crawl_queue->count."...Time to create batch: ".(changeInMicrotime($start_time))); + file_put_contents(CRAWL_DIR."/schedules/schedule.txt", + serialize($sites)); + crawlLog("End Produce Fetch Memory usage".memory_get_usage() ); + crawlLog("Created fetch batch... Queue size is now ". + $this->web_queue->to_crawl_queue->count. + "...Time to create batch: ". + (changeInMicrotime($start_time))); } else { crawlLog("No fetch batch created!!"); } @@ -879,7 +975,8 @@ class QueueServer implements CrawlConstants $flag = false; foreach($site_array as $site) { $site_parts = mb_split("domain:", $site); - if(isset($site_parts[1]) && mb_strstr(UrlParser::getHost($url), $site_parts[1]) ) { + if(isset($site_parts[1]) && + mb_strstr(UrlParser::getHost($url), $site_parts[1]) ) { $flag = true; break; } @@ -902,8 +999,10 @@ class QueueServer implements CrawlConstants $dirs = glob(CRAWL_DIR.'/cache/*', GLOB_ONLYDIR); foreach($dirs as $dir) { - if(strlen($pre_timestamp = strstr($dir, self::index_data_base_name)) > 0) { - $list[] = substr($pre_timestamp, strlen(self::index_data_base_name)); + if(strlen($pre_timestamp = strstr($dir, + self::index_data_base_name)) > 0) { + $list[] = substr($pre_timestamp, + strlen(self::index_data_base_name)); } } diff --git a/bot.php b/bot.php index 8698493cb..c0938914f 100755 --- a/bot.php +++ b/bot.php @@ -34,7 +34,8 @@ * @filesource */ -define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, +define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD']. + $_SERVER["SCRIPT_NAME"], 0, -strlen("bot.php"))); /** Load search engine wide configuration file */ @@ -50,7 +51,9 @@ if(!PROFILE) {echo "BAD REQUEST"; exit();} <head> <title><?php echo USER_AGENT_SHORT; ?></title> - <meta name="description" content="A description of a robot based on the SeekQuarry/Yioop! Search Engine" /> + <meta name="description" content= + "A description of a robot based on the SeekQuarry/Yioop! Search Engine" + /> <meta charset="utf-8" /> <link rel="stylesheet" type="text/css" href="css/search.css" /> @@ -60,7 +63,8 @@ if(!PROFILE) {echo "BAD REQUEST"; exit();} if(file_exists(WORK_DIRECTORY."/bot.txt")) { echo file_get_contents(WORK_DIRECTORY."/bot.txt"); } else { - echo "Unfortunately, the person who is using this software did not provide a description of their user-agent"; + echo "Unfortunately, the person who is using this software did not ". + "provide a description of their user-agent"; } ?> </body> diff --git a/configs/config.php b/configs/config.php index a813c0cdf..bcc092d64 100755 --- a/configs/config.php +++ b/configs/config.php @@ -40,7 +40,8 @@ define('QUERY_INFO', 2); define('ERROR_INFO', 4); date_default_timezone_set('America/Los_Angeles'); -/////////////////////////////////////////*+++ The next block of code is machine edited, change at your own risk, please use configure web page instead +++*/ +/*+++ The next block of code is machine edited, change at your own risk, + please use configure web page instead +++*/ define('WORK_DIRECTORY', '/Applications/xampp/xamppfiles/htdocs/crawls'); /*++++++*/ @@ -98,48 +99,88 @@ if( (DEBUG_LEVEL & QUERY_INFO) == QUERY_INFO) { if(!PROFILE) { return; } -/*+++ End machine generated code, feel free to edit the settings below as desired +++*/ - -define('USER_AGENT', 'Mozilla/5.0 (compatible; '.USER_AGENT_SHORT.' +'.QUEUE_SERVER.'bot.php)'); - //this is the User-Agent names the crawler provides a web-server it is crawling -define ('SESSION_NAME', "yioopbiscuit"); //name of the cookie used to manage the session (store language and perpage settings) - -define("MAX_LOG_FILE_SIZE", 5000000); // maximum size of a log file before it is rotated -define("NUMBER_OF_LOG_FILES", 5); // number of log files to rotate amongst - -define('CACHE_ROBOT_TXT_TIME', 86400); // how long in seconds to keep a cache of a robot.txt file before re-requesting it -define('MAXIMUM_CRAWL_DELAY', 64); // if the robots.txt has a Crawl-delay larger than this value don't crawl the site - // maximum value for this is 255 -define('MAX_WAITING_HOSTS', 1000); //maximum number of active crawl-delayed hosts - - -define('URL_FILTER_SIZE', 10000000); // bloom filters are used to keep track of which urls are visited, this parameter determines up to how many - // urls will be stored in a single filter. Additional filters are read to and from disk. -define('NUM_FETCHERS', 3); // number of fetchers that will be used in a given crawl -define('NUM_URLS_QUEUE_RAM', 300000); // maximum number of urls that will be held in ram (as opposed to in files) in the priority queue +/*+++ End machine generated code, feel free to edit the below as desired +++*/ + +define('USER_AGENT', + 'Mozilla/5.0 (compatible; '.USER_AGENT_SHORT.' +'.QUEUE_SERVER.'bot.php)'); + /* this is the User-Agent names the crawler provides + a web-server it is crawling + */ +define ('SESSION_NAME', "yioopbiscuit"); + /* name of the cookie used to manage the session + (store language and perpage settings) + */ + +define("MAX_LOG_FILE_SIZE", 5000000); + // maximum size of a log file before it is rotated +define("NUMBER_OF_LOG_FILES", 5); + // number of log files to rotate amongst + +define('CACHE_ROBOT_TXT_TIME', 86400); + /* how long in seconds to keep a cache of a robot.txt + file before re-requesting it + */ +define('MAXIMUM_CRAWL_DELAY', 64); + /* if the robots.txt has a Crawl-delay larger than this + value don't crawl the site. + maximum value for this is 255 + */ +define('MAX_WAITING_HOSTS', 1000); + //maximum number of active crawl-delayed hosts + + +define('URL_FILTER_SIZE', 10000000); + /* bloom filters are used to keep track of which urls are visited, + this parameter determines up to how many + urls will be stored in a single filter. Additional filters are + read to and from disk. + */ +define('NUM_FETCHERS', 3); + // number of fetchers that will be used in a given crawl +define('NUM_URLS_QUEUE_RAM', 300000); + /* maximum number of urls that will be held in ram + (as opposed to in files) in the priority queue + */ define('MIN_QUEUE_WEIGHT', 1/100000); -define('NUM_ARCHIVE_PARTITIONS', 10); // number of web archive files to use to store web pages in -define('NUM_INDEX_PARTITIONS', 250); // number of web archive files to use for the inverted index of word->docs in a given generation -define('NUM_WORDS_PER_GENERATION', 6*URL_FILTER_SIZE/NUM_INDEX_PARTITIONS); // number of words before next gen - -define('SAMPLE_GENERATIONS', 3); // number of generations to sample in estimating number of urls in a query - - -define('STORE_INLINKS_IN_DICTIONARY', false); //store inlink data in word inverted index -define('PRECISION', 10); // precision to round floating points document scores -define('BLOCK_SIZE', 50); //when index data from relatively uncommon words, how many docs should be grouped together in a block -define('COMMON_WORD_THRESHOLD', 1000); // how many documents a word needs to be to get its own index file. - -define('MAX_LINKS_PER_PAGE', 50); // maximum number of links to consider on any given page -define('MAX_LINKS_WORD_TEXT', 200); // maximum number of words from links to consider on any given page +define('NUM_ARCHIVE_PARTITIONS', 10); + // number of web archive files to use to store web pages in +define('NUM_INDEX_PARTITIONS', 250); + /* number of web archive files to use for the inverted index of + word->docs in a given generation + */ +define('NUM_WORDS_PER_GENERATION', 6*URL_FILTER_SIZE/NUM_INDEX_PARTITIONS); + // number of words before next gen + +define('SAMPLE_GENERATIONS', 3); + // number of generations to sample in estimating number of urls in a query + + +define('STORE_INLINKS_IN_DICTIONARY', false); + //store inlink data in word inverted index +define('PRECISION', 10); + // precision to round floating points document scores +define('BLOCK_SIZE', 50); + /* when index data from relatively uncommon words, + how many docs should be grouped together in a block + */ +define('COMMON_WORD_THRESHOLD', 1000); + // how many documents a word needs to be to get its own index file. + +define('MAX_LINKS_PER_PAGE', 50); + // maximum number of links to consider on any given page +define('MAX_LINKS_WORD_TEXT', 200); + // maximum number of words from links to consider on any given page define('PAGE_RANGE_REQUEST', 50000); // request this many bytes out of a page define('MAX_PHRASE_LEN', 2); //maximum length +1 exact phrase matches -define('NUM_MULTI_CURL_PAGES', 100); //number of multi curl page requests in one go -define('PAGE_TIMEOUT', 30); //time in seconds before we give up on a page +define('NUM_MULTI_CURL_PAGES', 100); + //number of multi curl page requests in one go +define('PAGE_TIMEOUT', 30); + //time in seconds before we give up on a page -define('NORMALIZE_FREQUENCY', 10000); // how often should we make in OPIC the sum of weights totals MAX_URLS +define('NORMALIZE_FREQUENCY', 10000); + // how often should we make in OPIC the sum of weights totals MAX_URLS $INDEXED_FILE_TYPES = @@ -190,15 +231,17 @@ $PAGE_PROCESSORS = array( "text/html" => "HtmlProcessor", define ('SEEN_URLS_BEFORE_UPDATE_SCHEDULER', 500); -define ('MAX_FETCH_SIZE', 5000); //maximum number of urls to schedule to a given fetcher in one go -define ('MINIMUM_FETCH_LOOP_TIME', 5); //fetcher must wait at least this long between multi-curl requests - -//searching and admin -define ('NUM_RESULTS_PER_PAGE', 10); //default number of search results to display per page - -define ('NUM_RECENT_URLS_TO_DISPLAY', 10); // Number of recently crawled urls to display on admin screen - - +define ('MAX_FETCH_SIZE', 5000); + //maximum number of urls to schedule to a given fetcher in one go +define ('MINIMUM_FETCH_LOOP_TIME', 5); + //fetcher must wait at least this long between multi-curl requests +/* + * searching and admin + */ +define ('NUM_RESULTS_PER_PAGE', 10); + //default number of search results to display per page +define ('NUM_RECENT_URLS_TO_DISPLAY', 10); + // Number of recently crawled urls to display on admin screen ?> diff --git a/configs/crawl.ini b/configs/crawl.ini index 89ad55c71..ac2da3f1d 100644 --- a/configs/crawl.ini +++ b/configs/crawl.ini @@ -22,7 +22,7 @@ ; [general] crawl_order = 'ad'; -restrict_sites_by_url = true; +restrict_sites_by_url = false; [allowed_sites] url[] = 'http://www.cs.sjsu.edu/'; diff --git a/configs/createdb.php b/configs/createdb.php index 3e5ab1949..83a577393 100755 --- a/configs/createdb.php +++ b/configs/createdb.php @@ -43,10 +43,12 @@ if(isset($_SERVER['DOCUMENT_ROOT']) && strlen($_SERVER['DOCUMENT_ROOT']) > 0) { * * */ -define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, +define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT']. + $_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, -strlen("configs/createdb.php"))); require_once BASE_DIR.'/configs/config.php'; -require_once BASE_DIR."/models/datasources/".DBMS."_manager.php"; //get the database library +require_once BASE_DIR."/models/datasources/".DBMS."_manager.php"; + //get the database library require_once BASE_DIR."/lib/utility.php"; //for crawlHash function @@ -59,7 +61,10 @@ if(in_array(DBMS, array("mysql"))) { $auto_increment = "AUTO_INCREMENT"; } if(in_array(DBMS, array("sqlite"))) { - $auto_increment = ""; //in sqlite2 a primary key column will act as auto_increment if don't give value + $auto_increment = ""; + /* in sqlite2 a primary key column will act + as auto_increment if don't give value + */ } if(!in_array(DBMS, array('sqlite', 'sqlite3'))) { $db->execute("DROP DATABASE IF EXISTS ".DB_NAME); @@ -69,27 +74,35 @@ if(!in_array(DBMS, array('sqlite', 'sqlite3'))) { } $db->selectDB(DB_NAME); -$db->execute("CREATE TABLE USER( USER_ID INTEGER PRIMARY KEY $auto_increment, USER_NAME VARCHAR(16) UNIQUE, PASSWORD VARCHAR(16))"); +$db->execute("CREATE TABLE USER( USER_ID INTEGER PRIMARY KEY $auto_increment, ". + "USER_NAME VARCHAR(16) UNIQUE, PASSWORD VARCHAR(16))"); //default account is root without a password $sql ="INSERT INTO USER VALUES (1, 'root', '".crawlCrypt('')."' ) "; $db->execute($sql); -$db->execute("CREATE TABLE TRANSLATION (TRANSLATION_ID INTEGER PRIMARY KEY $auto_increment, IDENTIFIER_STRING VARCHAR(512) UNIQUE)"); +$db->execute("CREATE TABLE TRANSLATION (TRANSLATION_ID INTEGER PRIMARY KEY ". + "$auto_increment, IDENTIFIER_STRING VARCHAR(512) UNIQUE)"); -$db->execute("CREATE TABLE LOCALE (LOCALE_ID INTEGER PRIMARY KEY $auto_increment, LOCALE_TAG VARCHAR(16), LOCALE_NAME VARCHAR(256), WRITING_MODE CHAR(5))"); -$db->execute("CREATE TABLE TRANSLATION_LOCALE (TRANSLATION_ID INTEGER, LOCALE_ID INTEGER, TRANSLATION VARCHAR(4096) )"); -//we insert 1 by 1 rather than comma separate as sqlite does not support comma separated inserts +$db->execute("CREATE TABLE LOCALE (LOCALE_ID INTEGER PRIMARY KEY ". + "$auto_increment, LOCALE_TAG VARCHAR(16), LOCALE_NAME VARCHAR(256),". + " WRITING_MODE CHAR(5))"); +$db->execute("CREATE TABLE TRANSLATION_LOCALE (TRANSLATION_ID INTEGER, ". + "LOCALE_ID INTEGER, TRANSLATION VARCHAR(4096) )"); +/* we insert 1 by 1 rather than comma separate as sqlite + does not support comma separated inserts + */ $db->execute("INSERT INTO LOCALE VALUES (1, 'en-US', 'English', 'lr-tb')"); $db->execute("INSERT INTO LOCALE VALUES (2, 'fr-FR', 'Français', 'lr-tb')"); $db->execute("INSERT INTO LOCALE VALUES (3, 'vi-VN', 'Tiếng Việt', 'lr-tb')"); -$db->execute("CREATE TABLE ROLE (ROLE_ID INTEGER PRIMARY KEY $auto_increment, NAME VARCHAR(512))"); +$db->execute("CREATE TABLE ROLE (ROLE_ID INTEGER PRIMARY KEY ". + "$auto_increment, NAME VARCHAR(512))"); $sql ="INSERT INTO ROLE VALUES (1, 'Admin' ) "; $db->execute($sql); -$db->execute("CREATE TABLE ROLE_ACTIVITY (ROLE_ID INTEGER, ACTIVITY_ID INTEGER)"); +$db->execute("CREATE TABLE ROLE_ACTIVITY (ROLE_ID INTEGER,ACTIVITY_ID INTEGER)"); $db->execute("INSERT INTO ROLE_ACTIVITY VALUES (1, 1)"); $db->execute("INSERT INTO ROLE_ACTIVITY VALUES (1, 2)"); $db->execute("INSERT INTO ROLE_ACTIVITY VALUES (1, 3)"); @@ -98,7 +111,8 @@ $db->execute("INSERT INTO ROLE_ACTIVITY VALUES (1, 5)"); $db->execute("INSERT INTO ROLE_ACTIVITY VALUES (1, 6)"); $db->execute( - "CREATE TABLE ACTIVITY (ACTIVITY_ID INTEGER PRIMARY KEY $auto_increment, TRANSLATION_ID INTEGER, METHOD_NAME VARCHAR(256))"); + "CREATE TABLE ACTIVITY (ACTIVITY_ID INTEGER PRIMARY KEY $auto_increment,". + " TRANSLATION_ID INTEGER, METHOD_NAME VARCHAR(256))"); $db->execute("INSERT INTO ACTIVITY VALUES (1, 1, 'manageAccount')"); $db->execute("INSERT INTO ACTIVITY VALUES (2, 2, 'manageUsers')"); $db->execute("INSERT INTO ACTIVITY VALUES (3, 3, 'manageRoles')"); @@ -106,11 +120,11 @@ $db->execute("INSERT INTO ACTIVITY VALUES (4, 4, 'manageCrawl')"); $db->execute("INSERT INTO ACTIVITY VALUES (5, 5, 'manageLocales')"); $db->execute("INSERT INTO ACTIVITY VALUES (6, 6, 'configure')"); -$db->execute("INSERT INTO TRANSLATION VALUES (1, 'db_activity_manage_account' )"); +$db->execute("INSERT INTO TRANSLATION VALUES (1,'db_activity_manage_account')"); $db->execute("INSERT INTO TRANSLATION VALUES (2, 'db_activity_manage_users')"); $db->execute("INSERT INTO TRANSLATION VALUES (3, 'db_activity_manage_roles')"); $db->execute("INSERT INTO TRANSLATION VALUES (4, 'db_activity_manage_crawl')"); -$db->execute("INSERT INTO TRANSLATION VALUES (5, 'db_activity_manage_locales')"); +$db->execute("INSERT INTO TRANSLATION VALUES (5,'db_activity_manage_locales')"); $db->execute("INSERT INTO TRANSLATION VALUES (6, 'db_activity_configure')"); $db->execute("INSERT INTO TRANSLATION_LOCALE VALUES (1, 1, 'Manage Account' )"); diff --git a/controllers/admin_controller.php b/controllers/admin_controller.php index 170301f83..03d73efa4 100755 --- a/controllers/admin_controller.php +++ b/controllers/admin_controller.php @@ -58,13 +58,15 @@ class AdminController extends Controller implements CrawlConstants * * @var array */ - var $models = array("signin", "user", "activity", "crawl", "role", "locale", "profile"); + var $models = array( + "signin", "user", "activity", "crawl", "role", "locale", "profile"); /** * * @var array */ var $activities = array("signin", "manageAccount", - "manageUsers", "manageCrawl", "manageRoles", "manageLocales", "crawlStatus", "configure"); + "manageUsers", "manageCrawl", "manageRoles", + "manageLocales", "crawlStatus", "configure"); /** * @@ -96,13 +98,18 @@ class AdminController extends Controller implements CrawlConstants $view = "crawlstatus"; } } else if ($this->checkSignin()){ - $_SESSION['USER_ID'] = $this->signinModel->getUserId($this->clean($_REQUEST['u'], "string")); - $data['YIOOP_TOKEN'] = $this->generateCSRFToken($_SESSION['USER_ID']); // now don't want to use remote address anymore - $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >".tl('admin_controller_login_successful')."</h1>')"; + $_SESSION['USER_ID'] = $this->signinModel->getUserId( + $this->clean($_REQUEST['u'], "string")); + $data['YIOOP_TOKEN'] = $this->generateCSRFToken( + $_SESSION['USER_ID']); + // now don't want to use remote address anymore + $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >". + tl('admin_controller_login_successful')."</h1>')"; $data = array_merge($data, $this->processSession()); $view = "admin"; } else { - $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >".tl('admin_controller_login_failed')."</h1>')"; + $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >". + tl('admin_controller_login_failed')."</h1>')"; unset($_SESSION['USER_ID']); } } @@ -137,7 +144,8 @@ class AdminController extends Controller implements CrawlConstants { if(!PROFILE) { $activity = "configure"; - } else if(isset($_REQUEST['a']) && in_array($_REQUEST['a'], $this->activities)) { + } else if(isset($_REQUEST['a']) && + in_array($_REQUEST['a'], $this->activities)) { $activity = $_REQUEST['a']; } else { $activity = "manageAccount"; @@ -145,11 +153,14 @@ class AdminController extends Controller implements CrawlConstants $allowed = false; if(!PROFILE) { - $allowed_activities = - array( array( "ACTIVITY_NAME" =>$this->activityModel->getActivityNameFromMethodName($activity), 'METHOD_NAME' => $activity)); + $allowed_activities = array( array( + "ACTIVITY_NAME" => + $this->activityModel->getActivityNameFromMethodName($activity), + 'METHOD_NAME' => $activity)); $allowed = true; } else { - $allowed_activities = $this->userModel->getUserActivities($_SESSION['USER_ID']); + $allowed_activities = + $this->userModel->getUserActivities($_SESSION['USER_ID']); } @@ -165,7 +176,8 @@ class AdminController extends Controller implements CrawlConstants $data['ACTIVITIES'] = $allowed_activities; } if($activity != "crawlStatus") { - $data['CURRENT_ACTIVITY'] = $this->activityModel->getActivityNameFromMethodName($activity); + $data['CURRENT_ACTIVITY'] = + $this->activityModel->getActivityNameFromMethodName($activity); } return $data; } @@ -176,7 +188,8 @@ class AdminController extends Controller implements CrawlConstants function signin() { $data = array(); - $_SESSION['USER_ID'] = $this->signinModel->getUserId($_REQUEST['username']); + $_SESSION['USER_ID'] = + $this->signinModel->getUserId($_REQUEST['username']); return $data; } @@ -197,17 +210,22 @@ class AdminController extends Controller implements CrawlConstants if(file_exists(CRAWL_DIR."/schedules/crawl_status.txt")) { - if(filemtime(CRAWL_DIR."/schedules/crawl_status.txt") + 1200 > time()) { + if(filemtime( + CRAWL_DIR."/schedules/crawl_status.txt") + 1200 > time()) { //assume if status not updated for 20min crawl not active - $crawl_status = unserialize(file_get_contents(CRAWL_DIR."/schedules/crawl_status.txt")); + $crawl_status = + unserialize(file_get_contents( + CRAWL_DIR."/schedules/crawl_status.txt")); $data = array_merge($data, $crawl_status); } } $data['RECENT_CRAWLS'] = $this->crawlModel->getCrawlList(); - if(isset($data['CRAWL_TIME'])) { //erase from previous crawl list any active crawl + if(isset($data['CRAWL_TIME'])) { + //erase from previous crawl list any active crawl $num_crawls = count($data['RECENT_CRAWLS']); for($i = 0; $i < $num_crawls; $i++) { - if($data['RECENT_CRAWLS'][$i]['CRAWL_TIME'] == $data['CRAWL_TIME']) { + if($data['RECENT_CRAWLS'][$i]['CRAWL_TIME'] == + $data['CRAWL_TIME']) { $data['RECENT_CRAWLS'][$i] = false; } } @@ -227,23 +245,32 @@ class AdminController extends Controller implements CrawlConstants $data["ELEMENT"] = "manageaccountElement"; $data['SCRIPT'] = ""; - if(isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { + if(isset($_REQUEST['arg']) && + in_array($_REQUEST['arg'], $possible_arguments)) { switch($_REQUEST['arg']) { case "changepassword": - if($_REQUEST['retypepassword'] != $_REQUEST['newpassword']) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_passwords_dont_match')."</h1>')"; + if($_REQUEST['retypepassword'] != $_REQUEST['newpassword']){ + $data['SCRIPT'] .= + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_passwords_dont_match'). + "</h1>')"; return $data; } - $username = $this->signinModel->getUserName($_SESSION['USER_ID']); + $username = + $this->signinModel->getUserName($_SESSION['USER_ID']); $result = $this->signinModel->checkValidSignin($username, $this->clean($_REQUEST['oldpassword'], "string") ); if(!$result) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_invalid_old_password')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_invalid_old_password'). + "</h1>')"; return $data; } - $this->signinModel->changePassword($username, $this->clean($_REQUEST['newpassword'], "string")); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_change_password')."</h1>')"; + $this->signinModel->changePassword($username, + $this->clean($_REQUEST['newpassword'], "string")); + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_change_password')."</h1>')"; break; } } @@ -256,10 +283,13 @@ class AdminController extends Controller implements CrawlConstants */ function manageUsers() { - $possible_arguments = array("adduser", "deleteuser", "adduserrole", "deleteuserrole"); + $possible_arguments = array("adduser", + "deleteuser", "adduserrole", "deleteuserrole"); $data["ELEMENT"] = "manageusersElement"; - $data['SCRIPT'] = "selectUser = elt('select-user'); selectUser.onchange = submitViewUserRole;"; + $data['SCRIPT'] = + "selectUser = elt('select-user'); ". + "selectUser.onchange = submitViewUserRole;"; $usernames = $this->userModel->getUserList(); if(isset($_REQUEST['username'])) { @@ -297,10 +327,12 @@ class AdminController extends Controller implements CrawlConstants } } - $available_roles = array_diff_assoc($all_roles, $data['SELECT_ROLES']); + $available_roles = array_diff_assoc( + $all_roles, $data['SELECT_ROLES']); - $data['AVAILABLE_ROLES'][-1] = tl('admin_controller_select_rolename'); + $data['AVAILABLE_ROLES'][-1] = + tl('admin_controller_select_rolename'); foreach($available_roles as $role) { $data['AVAILABLE_ROLES'][$role['ROLE_ID']]= $role['ROLE_NAME']; @@ -315,7 +347,8 @@ class AdminController extends Controller implements CrawlConstants $data['SELECT_USER'] = -1; } - if(isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { + if(isset($_REQUEST['arg']) && + in_array($_REQUEST['arg'], $possible_arguments)) { switch($_REQUEST['arg']) { @@ -324,17 +357,22 @@ class AdminController extends Controller implements CrawlConstants unset($data['AVAILABLE_ROLES']); unset($data['SELECT_ROLES']); if($_REQUEST['retypepassword'] != $_REQUEST['password']) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_passwords_dont_match')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_passwords_dont_match'). + "</h1>')"; return $data; } if($this->signinModel->getUserId($username) > 0) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_username_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_username_exists')."</h1>')"; return $data; } - $this->userModel->addUser($username, $this->clean($_REQUEST['password'], "string")); + $this->userModel->addUser($username, + $this->clean($_REQUEST['password'], "string")); $data['USER_NAMES'][$username] = $username; - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_username_added')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_username_added')."</h1>')"; break; case "deleteuser": @@ -342,45 +380,61 @@ class AdminController extends Controller implements CrawlConstants unset($data['AVAILABLE_ROLES']); unset($data['SELECT_ROLES']); if(!($this->signinModel->getUserId($username) > 0)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_username_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_username_doesnt_exists'). + "</h1>')"; return $data; } $this->userModel->deleteUser($username); unset($data['USER_NAMES'][$username]); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_username_deleted')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_username_deleted')."</h1>')"; break; case "adduserrole": if( $userid <= 0 ) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_username_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_username_doesnt_exists'). + "</h1>')"; return $data; } if(!in_array($select_role, $role_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_doesnt_exists'). + "</h1>')"; return $data; } $this->userModel->addUserRole($userid, $select_role); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_added')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_added'). + "</h1>')"; unset($data['AVAILABLE_ROLES'][$select_role]); $data['SELECT_ROLE'] = -1; - $data['SELECT_ROLES'] = $this->userModel->getUserRoles($userid); + $data['SELECT_ROLES'] = + $this->userModel->getUserRoles($userid); break; case "deleteuserrole": if($userid <= 0) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_username_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_username_doesnt_exists'). + "</h1>')"; return $data; } if(!in_array($select_role, $role_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_doesnt_exists'). + "</h1>')"; return $data; } $this->userModel->deleteUserRole($userid, $select_role); - $data['SELECT_ROLES'] = $this->userModel->getUserRoles($userid); + $data['SELECT_ROLES'] = + $this->userModel->getUserRoles($userid); $data['AVAILABLE_ROLES'][$select_role] = $select_rolename; $data['SELECT_ROLE'] = -1; - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_deleted')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_deleted')."</h1>')"; break; } } @@ -393,10 +447,13 @@ class AdminController extends Controller implements CrawlConstants */ function manageRoles() { - $possible_arguments = array("addrole", "deleterole", "addactivity", "deleteactivity"); + $possible_arguments = + array("addrole", "deleterole", "addactivity", "deleteactivity"); $data["ELEMENT"] = "managerolesElement"; - $data['SCRIPT'] = "selectRole = elt('select-role'); selectRole.onchange = submitViewRoleActivities;"; + $data['SCRIPT'] = + "selectRole = elt('select-role'); selectRole.onchange =". + " submitViewRoleActivities;"; $roles = $this->roleModel->getRoleList(); $role_ids = array(); @@ -421,25 +478,31 @@ class AdminController extends Controller implements CrawlConstants if($select_role != "" ) { $data['SELECT_ROLE'] = $select_role; - $data['ROLE_ACTIVITIES'] = $this->roleModel->getRoleActivities($select_role); + $data['ROLE_ACTIVITIES'] = + $this->roleModel->getRoleActivities($select_role); $all_activities = $this->activityModel->getActivityList(); $activity_ids = array(); $activity_names = array(); foreach($all_activities as $activity) { $activity_ids[] = $activity['ACTIVITY_ID']; - $activity_names[$activity['ACTIVITY_ID']] = $activity['ACTIVITY_NAME']; + $activity_names[$activity['ACTIVITY_ID']] = + $activity['ACTIVITY_NAME']; } - $available_activities = array_diff_assoc($all_activities, $data['ROLE_ACTIVITIES']); - $data['AVAILABLE_ACTIVITIES'][-1] = tl('admin_controller_select_activityname'); + $available_activities = + array_diff_assoc($all_activities, $data['ROLE_ACTIVITIES']); + $data['AVAILABLE_ACTIVITIES'][-1] = + tl('admin_controller_select_activityname'); foreach($available_activities as $activity) { - $data['AVAILABLE_ACTIVITIES'][$activity['ACTIVITY_ID']]= $activity['ACTIVITY_NAME']; + $data['AVAILABLE_ACTIVITIES'][$activity['ACTIVITY_ID']] = + $activity['ACTIVITY_NAME']; } if(isset($_REQUEST['selectactivity'])) { - $select_activity = $this->clean($_REQUEST['selectactivity'], "int" ); + $select_activity = + $this->clean($_REQUEST['selectactivity'], "int" ); } else { $select_activity = ""; @@ -451,8 +514,8 @@ class AdminController extends Controller implements CrawlConstants } } - if(isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { - + if(isset($_REQUEST['arg']) && + in_array($_REQUEST['arg'], $possible_arguments)) { switch($_REQUEST['arg']) { @@ -461,7 +524,9 @@ class AdminController extends Controller implements CrawlConstants unset($data['AVAILABLE_ACTIVITIES']); $data['SELECT_ROLE'] = -1; if($this->roleModel->getRoleId($rolename) > 0) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_exists'). + "</h1>')"; return $data; } @@ -469,7 +534,9 @@ class AdminController extends Controller implements CrawlConstants $roleid = $this->roleModel->getRoleId($rolename); $data['ROLE_NAMES'][$roleid] = $rolename; - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_added')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_added'). + "</h1>')"; break; case "deleterole": @@ -478,45 +545,63 @@ class AdminController extends Controller implements CrawlConstants unset($data['AVAILABLE_ACTIVITIES']); if(!in_array($select_role, $role_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_doesnt_exists'). + "</h1>')"; return $data; } $this->roleModel->deleteRole($select_role); unset($data['ROLE_NAMES'][$select_role]); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_deleted')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_deleted')."</h1>')"; break; case "addactivity": if(!in_array($select_role, $role_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_doesnt_exists'). + "</h1>')"; return $data; } if(!in_array($select_activity, $activity_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_activityname_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_activityname_doesnt_exists'). + "</h1>')"; return $data; } - $this->roleModel->addActivityRole($select_role, $select_activity); + $this->roleModel->addActivityRole( + $select_role, $select_activity); unset($data['AVAILABLE_ACTIVITIES'][$select_activity]); - $data['ROLE_ACTIVITIES'] = $this->roleModel->getRoleActivities($select_role); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_activity_added')."</h1>')"; + $data['ROLE_ACTIVITIES'] = + $this->roleModel->getRoleActivities($select_role); + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_activity_added')."</h1>')"; break; case "deleteactivity": if(!in_array($select_role, $role_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_rolename_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_rolename_doesnt_exists'). + "</h1>')"; return $data; } if(!in_array($select_activity, $activity_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_activityname_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_activityname_doesnt_exists'). + "</h1>')"; return $data; } - $this->roleModel->deleteActivityRole($select_role, $select_activity); - $data['ROLE_ACTIVITIES'] = $this->roleModel->getRoleActivities($select_role); - $data['AVAILABLE_ACTIVITIES'][$select_activity] = $activity_names[$select_activity]; + $this->roleModel->deleteActivityRole( + $select_role, $select_activity); + $data['ROLE_ACTIVITIES'] = + $this->roleModel->getRoleActivities($select_role); + $data['AVAILABLE_ACTIVITIES'][$select_activity] = + $activity_names[$select_activity]; $data['SELECT_ACTIVITY'] = -1; - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_activity_deleted')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_activity_deleted')."</h1>')"; break; } } @@ -529,36 +614,46 @@ class AdminController extends Controller implements CrawlConstants */ function manageCrawl() { - $possible_arguments = array("start", "resume", "delete", "stop", "index", "options"); + $possible_arguments = + array("start", "resume", "delete", "stop", "index", "options"); $data["ELEMENT"] = "managecrawlElement"; $data['SCRIPT'] = "doUpdate();"; - if(isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { + if(isset($_REQUEST['arg']) && + in_array($_REQUEST['arg'], $possible_arguments)) { switch($_REQUEST['arg']) { case "start": - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_starting_new_crawl')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_starting_new_crawl')."</h1>')"; $info = array(); $info[self::STATUS] = "NEW_CRAWL"; $info[self::CRAWL_TIME] = time(); $seed_info = $this->crawlModel->getSeedInfo(); - $info[self::CRAWL_ORDER] = $seed_info['general']['crawl_order']; - $info[self::RESTRICT_SITES_BY_URL] = $seed_info['general']['restrict_sites_by_url']; - $info[self::ALLOWED_SITES] = $seed_info['allowed_sites']['url']; - $info[self::DISALLOWED_SITES] = $seed_info['disallowed_sites']['url']; + $info[self::CRAWL_ORDER] = + $seed_info['general']['crawl_order']; + $info[self::RESTRICT_SITES_BY_URL] = + $seed_info['general']['restrict_sites_by_url']; + $info[self::ALLOWED_SITES] = + $seed_info['allowed_sites']['url']; + $info[self::DISALLOWED_SITES] = + $seed_info['disallowed_sites']['url']; if(isset($_REQUEST['description'])) { - $description = $this->clean($_REQUEST['description'], "string"); + $description = + $this->clean($_REQUEST['description'], "string"); } else { $description = tl('admin_controller_no_description'); } $info['DESCRIPTION'] = $description; $info_string = serialize($info); - file_put_contents(CRAWL_DIR."/schedules/queue_server_messages.txt", $info_string); + file_put_contents( + CRAWL_DIR."/schedules/queue_server_messages.txt", + $info_string); $scheduler_info[self::SEEN_URLS] = array(); @@ -568,84 +663,125 @@ class AdminController extends Controller implements CrawlConstants $scheduler_info[self::ROBOT_TXT] = array(); $scheduler_string = serialize($scheduler_info); @unlink(CRAWL_DIR."/schedules/schedule.txt"); - file_put_contents(CRAWL_DIR."/schedules/ScheduleDataStartCrawl.txt", $scheduler_string); + file_put_contents( + CRAWL_DIR."/schedules/ScheduleDataStartCrawl.txt", + $scheduler_string); break; case "stop": - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_stop_crawl')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_stop_crawl')."</h1>')"; $info = array(); $info[self::STATUS] = "STOP_CRAWL"; $info_string = serialize($info); - file_put_contents(CRAWL_DIR."/schedules/queue_server_messages.txt", $info_string); + file_put_contents( + CRAWL_DIR."/schedules/queue_server_messages.txt", + $info_string); break; case "resume": - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_resume_crawl')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_resume_crawl')."</h1>')"; $info = array(); $info[self::STATUS] = "RESUME_CRAWL"; - $info[self::CRAWL_TIME] = $this->clean($_REQUEST['timestamp'], "int"); + $info[self::CRAWL_TIME] = + $this->clean($_REQUEST['timestamp'], "int"); $info_string = serialize($info); - file_put_contents(CRAWL_DIR."/schedules/queue_server_messages.txt", $info_string); + file_put_contents( + CRAWL_DIR."/schedules/queue_server_messages.txt", + $info_string); break; case "delete": if(isset($_REQUEST['timestamp'])) { - $timestamp = $this->clean($_REQUEST['timestamp'], "int"); - $this->crawlModel->db->unlinkRecursive(CRAWL_DIR.'/cache/'.self::index_data_base_name.$timestamp, true); - $this->crawlModel->db->unlinkRecursive(CRAWL_DIR.'/schedules/'.self::index_data_base_name.$timestamp, true); - $this->crawlModel->db->unlinkRecursive(CRAWL_DIR.'/schedules/'.self::schedule_data_base_name.$timestamp, true); - $this->crawlModel->db->unlinkRecursive(CRAWL_DIR.'/schedules/'.self::robot_data_base_name.$timestamp, true); + $timestamp = + $this->clean($_REQUEST['timestamp'], "int"); + $this->crawlModel->db->unlinkRecursive( + CRAWL_DIR.'/cache/'.self::index_data_base_name . + $timestamp, true); + $this->crawlModel->db->unlinkRecursive( + CRAWL_DIR.'/schedules/'.self::index_data_base_name . + $timestamp, true); + $this->crawlModel->db->unlinkRecursive( + CRAWL_DIR.'/schedules/' . + self::schedule_data_base_name.$timestamp, true); + $this->crawlModel->db->unlinkRecursive( + CRAWL_DIR.'/schedules/'.self::robot_data_base_name. + $timestamp, true); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_delete_crawl_success')."</h1>'); crawlStatusUpdate(); "; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_delete_crawl_success'). + "</h1>'); crawlStatusUpdate(); "; } else { - $data['SCRIPT'] .= "crawlStatusUpdate(); doMessage('<h1 class=\"red\" >".tl('admin_controller_delete_crawl_fail')."</h1>')"; + $data['SCRIPT'] .= "crawlStatusUpdate(); ". + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_delete_crawl_fail'). + "</h1>')"; } break; case "index": - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_set_index')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_set_index')."</h1>')"; $timestamp = $this->clean($_REQUEST['timestamp'], "int"); $this->crawlModel->setCurrentIndexDatabaseName($timestamp); break; case "options": - $data["leftorright"] = (getLocaleDirection() == 'ltr') ? "right": "left"; + $data["leftorright"] = + (getLocaleDirection() == 'ltr') ? "right": "left"; $data["ELEMENT"] = "crawloptionsElement"; $seed_info = $this->crawlModel->getSeedInfo(); $data['available_crawl_orders'] = array( - self::BREADTH_FIRST => tl('admin_controller_breadth_first'), - self::PAGE_IMPORTANCE => tl('admin_controller_page_importance')); + self::BREADTH_FIRST => + tl('admin_controller_breadth_first'), + self::PAGE_IMPORTANCE => + tl('admin_controller_page_importance')); $update_flag = false; - if(isset($_REQUEST['crawl_order']) && in_array($_REQUEST['crawl_order'], array_keys($data['available_crawl_orders']))) { - $seed_info['general']['crawl_order'] = $_REQUEST['crawl_order']; + if(isset($_REQUEST['crawl_order']) && + in_array($_REQUEST['crawl_order'], + array_keys($data['available_crawl_orders']))) { + + $seed_info['general']['crawl_order'] = + $_REQUEST['crawl_order']; $update_flag = true; } $data['crawl_order'] = $seed_info['general']['crawl_order']; if(isset($_REQUEST['posted'])) { - $seed_info['general']['restrict_sites_by_url'] = (isset($_REQUEST['restrict_sites_by_url'])) ? + $seed_info['general']['restrict_sites_by_url'] = + (isset($_REQUEST['restrict_sites_by_url'])) ? true : false; $update_flag = true; } - $data['restrict_sites_by_url'] = $seed_info['general']['restrict_sites_by_url']; - $site_types = array('allowed_sites', 'disallowed_sites', 'seed_sites'); + $data['restrict_sites_by_url'] = + $seed_info['general']['restrict_sites_by_url']; + $site_types = + array('allowed_sites','disallowed_sites', 'seed_sites'); foreach($site_types as $type) { if(isset($_REQUEST[$type])) { - $seed_info[$type]['url'] = $this->convertStringCleanUrlsArray($_REQUEST[$type]); + $seed_info[$type]['url'] = + $this->convertStringCleanUrlsArray( + $_REQUEST[$type]); } - $data[$type] = $this->convertArrayCleanLines($seed_info[$type]['url']); + $data[$type] = $this->convertArrayCleanLines( + $seed_info[$type]['url']); } - $data['TOGGLE_STATE'] = ($data['restrict_sites_by_url']) ? "checked='checked'" : ""; - $data['SCRIPT'] = "setDisplay('toggle', '{$data['restrict_sites_by_url']}');"; + $data['TOGGLE_STATE'] = + ($data['restrict_sites_by_url']) ? + "checked='checked'" : ""; + $data['SCRIPT'] = "setDisplay('toggle', ". + "'{$data['restrict_sites_by_url']}');"; if($update_flag) { $this->crawlModel->setSeedInfo($seed_info); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_update_seed_info')."</h1>');"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_update_seed_info')."</h1>');"; } @@ -700,11 +836,13 @@ class AdminController extends Controller implements CrawlConstants $locale_ids = array(); foreach ($data["LOCALES"] as $locale) { - $data["LOCALE_NAMES"][$locale["LOCALE_TAG"]] = $locale["LOCALE_NAME"]; + $data["LOCALE_NAMES"][$locale["LOCALE_TAG"]] = + $locale["LOCALE_NAME"]; $locale_ids[] = $locale["LOCALE_TAG"]; } - if(isset($_REQUEST['arg']) && in_array($_REQUEST['arg'], $possible_arguments)) { + if(isset($_REQUEST['arg']) && + in_array($_REQUEST['arg'], $possible_arguments)) { if(isset($_REQUEST['localename'])) { $localename = $this->clean($_REQUEST['localename'], "string" ); } else { @@ -716,12 +854,14 @@ class AdminController extends Controller implements CrawlConstants $localetag = ""; } if(isset($_REQUEST['writingmode'])) { - $writingmode = $this->clean($_REQUEST['writingmode'], "string" ); + $writingmode = + $this->clean($_REQUEST['writingmode'], "string" ); } else { $writingmode = ""; } if(isset($_REQUEST['selectlocale'])) { - $select_locale = $this->clean($_REQUEST['selectlocale'], "string" ); + $select_locale = + $this->clean($_REQUEST['selectlocale'], "string" ); } else { $select_locale = ""; } @@ -729,30 +869,37 @@ class AdminController extends Controller implements CrawlConstants switch($_REQUEST['arg']) { case "addlocale": - $this->localeModel->addLocale($localename, $localetag, $writingmode); + $this->localeModel->addLocale( + $localename, $localetag, $writingmode); $this->localeModel->extractMergeLocales(); $data["LOCALES"] = $this->localeModel->getLocaleList(); $data['LOCALE_NAMES'][$localetag] = $localename; - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_locale_added')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_locale_added')."</h1>')"; break; case "deletelocale": if(!in_array($select_locale, $locale_ids)) { - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_localename_doesnt_exists')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_localename_doesnt_exists'). + "</h1>')"; return $data; } $this->localeModel->deleteLocale($select_locale); $data["LOCALES"] = $this->localeModel->getLocaleList(); unset($data['LOCALE_NAMES'][$select_locale]); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_localename_deleted')."</h1>')"; + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_localename_deleted')."</h1>')"; break; case "editlocale": - $data["leftorright"] = (getLocaleDirection() == 'ltr') ? "right": "left"; + $data["leftorright"] = + (getLocaleDirection() == 'ltr') ? "right": "left"; $data["ELEMENT"] = "editlocalesElement"; - $data['CURRENT_LOCALE_NAME'] = $data['LOCALE_NAMES'][$select_locale]; + $data['CURRENT_LOCALE_NAME'] = + $data['LOCALE_NAMES'][$select_locale]; $data['CURRENT_LOCALE_TAG'] = $select_locale; if(isset($_REQUEST['STRINGS'])) { $safe_strings = array(); @@ -761,12 +908,16 @@ class AdminController extends Controller implements CrawlConstants $clean_value = $this->clean($value, "string" ); $safe_strings[$clean_key] = $clean_value; } - $this->localeModel->updateStringData($select_locale, $safe_strings); - $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >".tl('admin_controller_localestrings_updated')."</h1>')"; + $this->localeModel->updateStringData( + $select_locale, $safe_strings); + $data['SCRIPT'] .= "doMessage('<h1 class=\"red\" >". + tl('admin_controller_localestrings_updated'). + "</h1>')"; } else { $this->localeModel->extractMergeLocales(); } - $data['STRINGS'] = $this->localeModel->getStringData($select_locale); + $data['STRINGS'] = + $this->localeModel->getStringData($select_locale); break; } @@ -786,7 +937,8 @@ class AdminController extends Controller implements CrawlConstants $languages = $this->localeModel->getLocaleList(); foreach($languages as $language) { - $data['LANGUAGES'][$language['LOCALE_TAG']] = $language['LOCALE_NAME']; + $data['LANGUAGES'][$language['LOCALE_TAG']] = + $language['LOCALE_NAME']; } if(isset($_POST['lang'])) { $data['lang'] = $this->clean($_POST['lang'], "string"); @@ -801,10 +953,12 @@ class AdminController extends Controller implements CrawlConstants if(isset($_REQUEST['WORK_DIRECTORY'])) { - $data['WORK_DIRECTORY'] = $this->clean($_REQUEST['WORK_DIRECTORY'], "string"); + $data['WORK_DIRECTORY'] = + $this->clean($_REQUEST['WORK_DIRECTORY'], "string"); $data['PROFILE'] = true; - } else if (defined("WORK_DIRECTORY") && strlen(WORK_DIRECTORY) > 0 && strcmp(realpath(WORK_DIRECTORY), realpath(BASE_DIR)) != 0 - && (is_dir(WORK_DIRECTORY) || is_dir(WORK_DIRECTORY."../"))) { + } else if (defined("WORK_DIRECTORY") && strlen(WORK_DIRECTORY) > 0 && + strcmp(realpath(WORK_DIRECTORY), realpath(BASE_DIR)) != 0 && + (is_dir(WORK_DIRECTORY) || is_dir(WORK_DIRECTORY."../"))) { $data['WORK_DIRECTORY'] = WORK_DIRECTORY; $data['PROFILE'] = true; } @@ -818,56 +972,90 @@ class AdminController extends Controller implements CrawlConstants { case "directory": if(!isset($data['WORK_DIRECTORY'])) {break;} - if($data['PROFILE'] && file_exists($data['WORK_DIRECTORY']."/profile.php")) { - $data = array_merge($data, $this->profileModel->getProfile($data['WORK_DIRECTORY'])); - $this->profileModel->setWorkDirectoryConfigFile($data['WORK_DIRECTORY']); + if($data['PROFILE'] && + file_exists($data['WORK_DIRECTORY']."/profile.php")) { + $data = array_merge($data, + $this->profileModel->getProfile( + $data['WORK_DIRECTORY'])); + $this->profileModel->setWorkDirectoryConfigFile( + $data['WORK_DIRECTORY']); $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_work_dir_set')."</h1>');". - "setTimeout('window.location.href=window.location.href', 3000);"; - } else if ($data['PROFILE'] && strlen($data['WORK_DIRECTORY']) > 0) { - if($this->profileModel->makeWorkDirectory($data['WORK_DIRECTORY'])) { + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_work_dir_set'). + "</h1>');setTimeout(". + "'window.location.href=window.location.href', 3000);"; + } else if ($data['PROFILE'] && + strlen($data['WORK_DIRECTORY']) > 0) { + if($this->profileModel->makeWorkDirectory( + $data['WORK_DIRECTORY'])) { $profile['DBMS'] = 'sqlite3'; $data['DBMS'] = 'sqlite3'; $profile['DB_NAME'] = 'default'; $data['DB_NAME'] = 'default'; - $profile['USER_AGENT_SHORT'] = tl('admin_controller_name_your_bot'); - $data['USER_AGENT_SHORT'] = $profile['USER_AGENT_SHORT']; - if($this->profileModel->updateProfile($data['WORK_DIRECTORY'], array(), $profile)) { - if($this->profileModel->setWorkDirectoryConfigFile($data['WORK_DIRECTORY'])) { + $profile['USER_AGENT_SHORT'] = + tl('admin_controller_name_your_bot'); + $data['USER_AGENT_SHORT'] = + $profile['USER_AGENT_SHORT']; + if($this->profileModel->updateProfile( + $data['WORK_DIRECTORY'], array(), $profile)) { + if($this->profileModel->setWorkDirectoryConfigFile( + $data['WORK_DIRECTORY'])) { $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_work_profile_made')."</h1>');"; + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_work_profile_made'). + "</h1>');"; } else { $data['PROFILE'] = false; $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_no_set_config')."</h1>');" . - "setTimeout('window.location.href=window.location.href', 3000);"; + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_no_set_config'). + "</h1>');" . + "setTimeout('window.location.href= ". + "window.location.href', 3000);"; } } else { - $this->profileModel->setWorkDirectoryConfigFile($data['WORK_DIRECTORY']); + $this->profileModel->setWorkDirectoryConfigFile( + $data['WORK_DIRECTORY']); $data['PROFILE'] = false; $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_no_create_profile')."</h1>');" . - "setTimeout('window.location.href=window.location.href', 3000);"; + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_no_create_profile'). + "</h1>'); setTimeout('window.location.href=". + "window.location.href', 3000);"; } } else { - $this->profileModel->setWorkDirectoryConfigFile($data['WORK_DIRECTORY']); + $this->profileModel->setWorkDirectoryConfigFile( + $data['WORK_DIRECTORY']); $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_work_dir_invalid')."</h1>');". - "setTimeout('window.location.href=window.location.href', 3000);"; + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_work_dir_invalid'). + "</h1>');". + "setTimeout('window.location.href=". + "window.location.href', 3000);"; $data['PROFILE'] = false; } } else { - $this->profileModel->setWorkDirectoryConfigFile($data['WORK_DIRECTORY']); + $this->profileModel->setWorkDirectoryConfigFile( + $data['WORK_DIRECTORY']); $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_work_dir_invalid')."</h1>');" . - "setTimeout('window.location.href=window.location.href', 3000);"; + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_work_dir_invalid'). + "</h1>');" . + "setTimeout('window.location.href=". + "window.location.href', 3000);"; $data['PROFILE'] = false; } break; case "profile": foreach($this->profileModel->profile_fields as $field) { if(isset($_POST[$field])) { - $data[$field] = $this->clean($_POST[$field], "string"); + if($field != "ROBOT_DESCRIPTION") { + $clean_field = + $this->clean($_POST[$field], "string"); + } else { + $clean_field = $_POST[$field]; + } + $data[$field] = $clean_field; $profile[$field] = $data[$field]; } if(!isset($data[$field])) { @@ -875,29 +1063,44 @@ class AdminController extends Controller implements CrawlConstants } } $data['DEBUG_LEVEL'] = 0; - $data['DEBUG_LEVEL'] |= (isset($_POST["ERROR_INFO"])) ? ERROR_INFO : 0; - $data['DEBUG_LEVEL'] |= (isset($_POST["QUERY_INFO"])) ? QUERY_INFO : 0; - $data['DEBUG_LEVEL'] |= (isset($_POST["TEST_INFO"])) ? TEST_INFO : 0; + $data['DEBUG_LEVEL'] |= + (isset($_POST["ERROR_INFO"])) ? ERROR_INFO : 0; + $data['DEBUG_LEVEL'] |= + (isset($_POST["QUERY_INFO"])) ? QUERY_INFO : 0; + $data['DEBUG_LEVEL'] |= + (isset($_POST["TEST_INFO"])) ? TEST_INFO : 0; $profile['DEBUG_LEVEL'] = $data['DEBUG_LEVEL']; - $old_profile = $this->profileModel->getProfile($data['WORK_DIRECTORY']); + $old_profile = + $this->profileModel->getProfile($data['WORK_DIRECTORY']); $db_problem = false; - if((isset($profile['DBMS']) && $profile['DBMS'] != $old_profile['DBMS']) || - (isset($profile['DB_NAME']) && $profile['DB_NAME'] != $old_profile['DB_NAME']) || - (isset($profile['DB_URL']) && $profile['DB_URL'] != $old_profile['DB_URL'])) { - if(!$this->profileModel->migrateDatabaseIfNecessary($profile)) { + if((isset($profile['DBMS']) && + $profile['DBMS'] != $old_profile['DBMS']) || + (isset($profile['DB_NAME']) && + $profile['DB_NAME'] != $old_profile['DB_NAME']) || + (isset($profile['DB_URL']) && + $profile['DB_URL'] != $old_profile['DB_URL'])) { + + if(!$this->profileModel->migrateDatabaseIfNecessary( + $profile)) { $db_problem = true; } - } else if ((isset($profile['DB_USER']) && $profile['DB_USER'] != $old_profile['DB_USER']) || - (isset($profile['DB_PASSWORD']) && $profile['DB_PASSWORD'] != $old_profile['DB_PASSWORD'])) { - if($this->profileModel->testDatabaseManager($profile) !== true) { + } else if ((isset($profile['DB_USER']) && + $profile['DB_USER'] != $old_profile['DB_USER']) || + (isset($profile['DB_PASSWORD']) && + $profile['DB_PASSWORD'] != $old_profile['DB_PASSWORD'])) { + + if($this->profileModel->testDatabaseManager( + $profile) !== true) { $db_problem = true; } } if($db_problem) { $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_no_change_db')."</h1>');"; + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_no_change_db'). + "</h1>');"; $data['DBMS'] = $old_profile['DBMS']; $data['DB_NAME'] = $old_profile['DB_NAME']; $data['DB_URL'] = $old_profile['DB_URL']; @@ -906,25 +1109,37 @@ class AdminController extends Controller implements CrawlConstants break; } - if($this->profileModel->updateProfile($data['WORK_DIRECTORY'], $profile, $old_profile)) { + if($this->profileModel->updateProfile( + $data['WORK_DIRECTORY'], $profile, $old_profile)) { $data['SCRIPT'] = - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_profile_change')."</h1>');"; - if($old_profile['DEBUG_LEVEL'] != $profile['DEBUG_LEVEL']) { + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_profile_change'). + "</h1>');"; + + if($old_profile['DEBUG_LEVEL'] != + $profile['DEBUG_LEVEL']) { $data['SCRIPT'] .= - "setTimeout('window.location.href=\"?c=admin&a=configure&YIOOP_TOKEN=".$_REQUEST['YIOOP_TOKEN']."\"', 3*sec);"; + "setTimeout('window.location.href=\"". + "?c=admin&a=configure&YIOOP_TOKEN=". + $_REQUEST['YIOOP_TOKEN']."\"', 3*sec);"; } } else { $data['PROFILE'] = false; $data['SCRIPT'] .= - "doMessage('<h1 class=\"red\" >".tl('admin_controller_configure_no_change_profile')."</h1>');"; + "doMessage('<h1 class=\"red\" >". + tl('admin_controller_configure_no_change_profile'). + "</h1>');"; break; } break; default: - if(isset($data['WORK_DIRECTORY']) && file_exists($data['WORK_DIRECTORY']."/profile.php")) { - $data = array_merge($data, $this->profileModel->getProfile($data['WORK_DIRECTORY'])); + if(isset($data['WORK_DIRECTORY']) && + file_exists($data['WORK_DIRECTORY']."/profile.php")) { + $data = array_merge($data, + $this->profileModel->getProfile( + $data['WORK_DIRECTORY'])); } else { $data['WORK_DIRECTORY'] = ""; $data['PROFILE'] = false; @@ -943,17 +1158,22 @@ class AdminController extends Controller implements CrawlConstants } } - if(!isset($data['ROBOT_DESCRIPTION']) || strlen($data['ROBOT_DESCRIPTION']) == "") { - $data['ROBOT_DESCRIPTION'] = tl('admin_controller_describe_robot'); + if(!isset($data['ROBOT_DESCRIPTION']) || + strlen($data['ROBOT_DESCRIPTION']) == "") { + $data['ROBOT_DESCRIPTION'] = + tl('admin_controller_describe_robot'); } $data['SCRIPT'] .= - "elt('database-system').onchange = function () {" . - "setDisplay('login-dbms',self.logindbms[elt('database-system').value]);};" . - "setDisplay('login-dbms', logindbms[elt('database-system').value]);\n"; + "elt('database-system').onchange = function () {" . + "setDisplay('login-dbms',". + "self.logindbms[elt('database-system').value]);};" . + "setDisplay('login-dbms', ". + "logindbms[elt('database-system').value]);\n"; } $data['SCRIPT'] .= - "elt('locale').onchange = function () { elt('configureProfileForm').submit();};\n"; + "elt('locale').onchange = ". + "function () { elt('configureProfileForm').submit();};\n"; return $data; } diff --git a/controllers/archive_controller.php b/controllers/archive_controller.php index 7fb8ee903..c6746dded 100755 --- a/controllers/archive_controller.php +++ b/controllers/archive_controller.php @@ -41,7 +41,8 @@ require_once BASE_DIR."/lib/web_archive_bundle.php"; require_once BASE_DIR."/lib/crawl_constants.php"; /** - * Fetcher machines also act as archives for complete caches of web pages, this controller is used to handle access to these web page caches + * Fetcher machines also act as archives for complete caches of web pages, + * this controller is used to handle access to these web page caches * * @author Chris Pollett * @package seek_quarry @@ -60,15 +61,17 @@ class ArchiveController extends Controller implements CrawlConstants */ var $views = array(); /** - * The only legal activity this controller will accept is a request for the cache of a web page - * @var array + * The only legal activity this controller will accept is a request + * for the cache of a web page + * @var array */ var $activities = array("cache"); /** - * Main method for this controller to handle requests. It first checks the request is valid, and then handles the corresponding activity + * Main method for this controller to handle requests. It first checks + * the request is valid, and then handles the corresponding activity * - * For this controller the only activity is to handle a cache request + * For this controller the only activity is to handle a cache request */ function processRequest() { @@ -76,7 +79,9 @@ class ArchiveController extends Controller implements CrawlConstants $data = array(); - // do a quick test to see if this is a request seems like from a legitimate machine + /* do a quick test to see if this is a request seems like from a + legitimate machine + */s if(!$this->checkRequest()) {return; } $activity = $_REQUEST['a']; @@ -86,11 +91,14 @@ class ArchiveController extends Controller implements CrawlConstants /** - * Retrieves the requested page from the WebArchiveBundle and echo it page, base64 encoded + * Retrieves the requested page from the WebArchiveBundle and echo it page, + * base64 encoded */ function cache() { - $web_archive = new WebArchiveBundle(CRAWL_DIR.'/cache/'.self::archive_base_name.$_REQUEST['crawl_time'], -1); + $web_archive = new WebArchiveBundle( + CRAWL_DIR.'/cache/'.self::archive_base_name. + $_REQUEST['crawl_time'], -1); $page = $web_archive->getPage($_REQUEST['hash'], $_REQUEST['offset']); echo base64_encode(serialize($page)); diff --git a/controllers/controller.php b/controllers/controller.php index f8c520d08..62de89811 100755 --- a/controllers/controller.php +++ b/controllers/controller.php @@ -50,12 +50,14 @@ require_once BASE_DIR."/lib/utility.php"; abstract class Controller { /** - * Array of the model classes used by this controller (contructor loads these) + * Array of the model classes used by this controller + * (contructor loads these) * @var array */ var $models = array(); /** - * Array of the view classes used by this controller (contructor loads these) + * Array of the view classes used by this controller + * (contructor loads these) * @var array */ var $views = array(); @@ -115,20 +117,22 @@ abstract class Controller foreach($this->models as $model) { $model_name = ucfirst($model)."Model"; $model_instance_name = lcfirst($model_name); - $data['QUERY_STATISTICS'] = array_merge($data['QUERY_STATISTICS'], + $data['QUERY_STATISTICS'] = array_merge( + $data['QUERY_STATISTICS'], $this->$model_instance_name->db->query_log); - $data['TOTAL_ELAPSED_TIME'] += $this->$model_instance_name->db->total_time; + $data['TOTAL_ELAPSED_TIME'] += + $this->$model_instance_name->db->total_time; } } $this->$view_instance_name->render($data); } /** - * Generates a cross site request forgery preventing token based on the - * provided user name, the current time and the hidden AUTH_KEY + * Generates a cross site request forgery preventing token based on the + * provided user name, the current time and the hidden AUTH_KEY * - * @param string $user username to use to generate token - * @return string a csrf token + * @param string $user username to use to generate token + * @return string a csrf token */ public function generateCSRFToken($user) { @@ -137,20 +141,22 @@ abstract class Controller } /** - * Checks if the form CSRF (cross-site request forgery preventing) token matches - * the given user and has not expired (1 hour till expires) + * Checks if the form CSRF (cross-site request forgery preventing) token + * matches the given user and has not expired (1 hour till expires) * - * @param string $token_name attribute of $_REQUEST that contains the CSRFToken - * @param string $user username - * @return bool whether the CSRF token was valid + * @param string $token_name attribute of $_REQUEST containing CSRFToken + * @param string $user username + * @return bool whether the CSRF token was valid */ public function checkCSRFToken($token_name, $user) { $token_okay = false; - if(isset($_REQUEST[$token_name]) && strlen($_REQUEST[$token_name]) == 22) { + if(isset($_REQUEST[$token_name]) && + strlen($_REQUEST[$token_name]) == 22) { $token_parts = explode("|", $_REQUEST[$token_name]); - if($token_parts[1] + 3600 > time() && crawlHash($user.$token_parts[1].AUTH_KEY) == $token_parts[0]) { + if($token_parts[1] + 3600 > time() && + crawlHash($user.$token_parts[1].AUTH_KEY) == $token_parts[0]) { $token_okay = true; } } @@ -159,13 +165,14 @@ abstract class Controller } /** - * Used to clean strings that might be tainted as they originate from the user + * Used to clean strings that might be tainted as originate from the user * - * @param mixed $value tainted data - * @param string $type the type of the data in value: one of int, hash, or string - * @param mixed $default if $value is not set the default value is returned, this - * isn't used much since if the error_reporting is E_ALL or -1 you would still get a Notice. - * @return string the clean input matching the type provided + * @param mixed $value tainted data + * @param string $type type of data in value: one of int, hash, or string + * @param mixed $default if $value is not set default value is returned, + * this isn't used much since if the error_reporting is E_ALL + * or -1 you would still get a Notice. + * @return string the clean input matching the type provided */ public function clean($value, $type, $default = NULL) { @@ -205,16 +212,19 @@ abstract class Controller } /** - * Checks the request if a request is for a valid activity and if it uses the correct authorization key + * Checks the request if a request is for a valid activity and if it uses + * the correct authorization key * - * @return bool whether the request was valid or not + * @return bool whether the request was valid or not */ function checkRequest() { - if(!isset($_REQUEST['time']) - || !isset($_REQUEST['session']) || !in_array($_REQUEST['a'], $this->activities)) { return; } + if(!isset($_REQUEST['time']) || + !isset($_REQUEST['session']) || + !in_array($_REQUEST['a'], $this->activities)) { return; } - $time = $_REQUEST['time']; // request must be within an hour of this machine's clock + $time = $_REQUEST['time']; + // request must be within an hour of this machine's clock if(abs(time() - $time) > 3600) { return false;} diff --git a/controllers/fetch_controller.php b/controllers/fetch_controller.php index 3d1fee609..ae6143797 100755 --- a/controllers/fetch_controller.php +++ b/controllers/fetch_controller.php @@ -73,7 +73,9 @@ class FetchController extends Controller implements CrawlConstants { $data = array(); - // do a quick test to see if this is a request seems like from a legitimate machine + /* do a quick test to see if this is a request seems like + from a legitimate machine + */ if(!$this->checkRequest()) {return; } $activity = $_REQUEST['a']; @@ -112,7 +114,8 @@ class FetchController extends Controller implements CrawlConstants if(isset($_REQUEST['found'])) { $info =array(); - $sites = unserialize(gzuncompress(base64_decode(urldecode($_REQUEST['found'])))); + $sites = unserialize(gzuncompress( + base64_decode(urldecode($_REQUEST['found'])))); $address = str_replace(".", "-", $_SERVER['REMOTE_ADDR']); $address = str_replace(":", "_", $address); @@ -126,7 +129,8 @@ class FetchController extends Controller implements CrawlConstants $info[self::STATUS] = self::CONTINUE_STATE; if(file_exists(CRAWL_DIR."/schedules/crawl_status.txt")) { - $crawl_status = unserialize(file_get_contents(CRAWL_DIR."/schedules/crawl_status.txt")); + $crawl_status = unserialize( + file_get_contents(CRAWL_DIR."/schedules/crawl_status.txt")); $info[self::CRAWL_TIME] = $crawl_status['CRAWL_TIME']; } else { $info[self::CRAWL_TIME] = 0; @@ -157,9 +161,12 @@ class FetchController extends Controller implements CrawlConstants if(isset($sites[self::INVERTED_INDEX])) { $index_sites[self::INVERTED_INDEX] = $sites[self::INVERTED_INDEX]; } - $index_dir = CRAWL_DIR."/schedules/".self::index_data_base_name.$_REQUEST['crawl_time']; + $index_dir = + CRAWL_DIR."/schedules/".self::index_data_base_name. + $_REQUEST['crawl_time']; - $this->addScheduleToScheduleDirectory($index_dir, $index_sites, $address, $day, $time); + $this->addScheduleToScheduleDirectory( + $index_dir, $index_sites, $address, $day, $time); $sites[self::INVERTED_INDEX] = NULL; } @@ -171,7 +178,8 @@ class FetchController extends Controller implements CrawlConstants */ function addToCrawlSchedules(&$sites, $address, $day, $time) { - $base_dir = CRAWL_DIR."/schedules/".self::schedule_data_base_name.$_REQUEST['crawl_time']; + $base_dir = CRAWL_DIR."/schedules/". + self::schedule_data_base_name.$_REQUEST['crawl_time']; $scheduler_info = array(); if(isset($sites[self::TO_CRAWL])) { @@ -189,10 +197,12 @@ class FetchController extends Controller implements CrawlConstants $num_seen = count($seen_sites); for($i = 0; $i < $num_seen; $i++) { - $scheduler_info[self::SEEN_URLS][$i] = $seen_sites[$i][self::URL]; + $scheduler_info[self::SEEN_URLS][$i] = + $seen_sites[$i][self::URL]; } } - $this->addScheduleToScheduleDirectory($base_dir, $scheduler_info, $address, $day, $time); + $this->addScheduleToScheduleDirectory( + $base_dir, $scheduler_info, $address, $day, $time); $sites[self::TO_CRAWL] = NULL; } @@ -204,13 +214,15 @@ class FetchController extends Controller implements CrawlConstants */ function addRobotSchedules(&$sites, $address, $day, $time) { - $robot_dir = CRAWL_DIR."/schedules/".self::robot_data_base_name.$_REQUEST['crawl_time']; + $robot_dir = CRAWL_DIR."/schedules/". + self::robot_data_base_name.$_REQUEST['crawl_time']; if(isset($sites[self::ROBOT_TXT])) { $data = $sites[self::ROBOT_TXT]; } else { $data = array(); } - $this->addScheduleToScheduleDirectory($robot_dir, $data, $address, $day, $time); + $this->addScheduleToScheduleDirectory( + $robot_dir, $data, $address, $day, $time); $sites[self::ROBOT_TXT] = NULL; } @@ -238,20 +250,24 @@ class FetchController extends Controller implements CrawlConstants $data_string = serialize($data); $data_hash = crawlHash($data_string); - file_put_contents($dir."/At".$time."From".$address."WithHash$data_hash.txt", $data_string); + file_put_contents( + $dir."/At".$time."From".$address. + "WithHash$data_hash.txt", $data_string); } /** - * Returns the time in seconds from the start of the current epoch of the active crawl if it exists; 0 otherwise - * - * @return int time of active crawl + * Returns the time in seconds from the start of the current epoch of the + * active crawl if it exists; 0 otherwise + * + * @return int time of active crawl */ function crawlTime() { $info = array(); $info[self::STATUS] = self::CONTINUE_STATE; if(file_exists(CRAWL_DIR."/schedules/crawl_status.txt")) { - $crawl_status = unserialize(file_get_contents(CRAWL_DIR."/schedules/crawl_status.txt")); + $crawl_status = unserialize(file_get_contents( + CRAWL_DIR."/schedules/crawl_status.txt")); $info[self::CRAWL_TIME] = $crawl_status[self::CRAWL_TIME]; } else { $info[self::CRAWL_TIME] = 0; diff --git a/controllers/search_controller.php b/controllers/search_controller.php index 4d2042f8d..cc2668737 100755 --- a/controllers/search_controller.php +++ b/controllers/search_controller.php @@ -55,29 +55,34 @@ class SearchController extends Controller implements CrawlConstants { /** * Says which models to load for this controller. - * PhraseModel is used to extract words from the query; CrawlModel is used for cached web page requests + * PhraseModel is used to extract words from the query; CrawlModel + * is used for cached web page requests * @var array */ var $models = array("phrase", "crawl"); /** * Says which views to load for this controller. - * The SearchView is used for displaying general search results as well as the initial search screen; NocacheView + * The SearchView is used for displaying general search results as well + * as the initial search screen; NocacheView * is used on a cached web page request that fails * @var array */ var $views = array("search", "nocache"); /** - * Says which activities (roughly methods invoke from the web) this controller will respond to + * Says which activities (roughly methods invoke from the web) this + * controller will respond to * @var array */ var $activities = array("query", "cache", "related", "signout"); /** - * This is the main entry point for handling a search request. + * This is the main entry point for handling a search request. * - * ProcessRequest determines the type of search request (normal request , cache request, or related request), or if its a - * user is returning from the admin panel via signout. It then calls the appropriate method to handle the given activity. - * Finally, it draw the search screen. + * ProcessRequest determines the type of search request (normal request , + * cache request, or related request), or if its a + * user is returning from the admin panel via signout. It then calls the + * appropriate method to handle the given activity.Finally, it draw the + * search screen. */ function processRequest() { @@ -106,7 +111,8 @@ class SearchController extends Controller implements CrawlConstants if($activity == "signout") { unset($_SESSION['USER_ID']); $user = $_SERVER['REMOTE_ADDR']; - $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >".tl('search_controller_logout_successful')."</h1>')"; + $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >". + tl('search_controller_logout_successful')."</h1>')"; } if(isset($_REQUEST['arg'])) { @@ -133,7 +139,10 @@ class SearchController extends Controller implements CrawlConstants if(!isset($query)) { $query = NULL; } - $data = $this->processQuery($query, $activity, $arg, $results_per_page); // calculate the results of a search if there is one + $data = + $this->processQuery( + $query, $activity, $arg, $results_per_page); + // calculate the results of a search if there is one } else { $highlight = true; if(!isset($query)) { @@ -152,15 +161,19 @@ class SearchController extends Controller implements CrawlConstants } /** - * Searches the database for the most relevant pages for the supplied search terms - * Renders the results to the HTML page. + * Searches the database for the most relevant pages for the supplied search + * terms. Renders the results to the HTML page. * - * @param string $query a string containing the words to search on - * @param string $activity besides a straight search for words query, one might have other searches, such as a search for related pages. + * @param string $query a string containing the words to search on + * @param string $activity besides a straight search for words query, + * one might have other searches, such as a search for related pages. * this argument says what kind of search to do. - * @param string $arg for a search other than a straight word query this argument provides auxiliary information on how to conduct the - * search. For instance on a related web page search, it might provide the url of the site with which to perform the related search. - * @param int $results_per_page the maixmum number of search results that can occur on a page + * @param string $arg for a search other than a straight word query this + * argument provides auxiliary information on how to conduct the + * search. For instance on a related web page search, it might provide + * the url of the site with which to perform the related search. + * @param int $results_per_page the maixmum number of search results + * that can occur on a page * @return array an array of at most results_per_page many search results */ function processQuery($query, $activity, $arg, $results_per_page) @@ -182,18 +195,23 @@ class SearchController extends Controller implements CrawlConstants $data['QUERY'] = "related:$arg"; $url = $arg; $summary_offset = $this->clean($_REQUEST['so'], "int"); - $crawl_item = $this->crawlModel->getCrawlItem(crawlHash($url), $summary_offset); + $crawl_item = $this->crawlModel->getCrawlItem( + crawlHash($url), $summary_offset); - $top_phrases = $this->phraseModel->getTopPhrases($crawl_item, 20); + $top_phrases = + $this->phraseModel->getTopPhrases($crawl_item, 20); $top_query = implode(" ", $top_phrases); - $phrase_results = $this->phraseModel->getPhrasePageResults($top_query, $limit, $results_per_page, false); - $data['PAGING_QUERY'] = "index.php?c=search&a=related&arg=".urlencode($url)."&so=$summary_offset"; + $phrase_results = $this->phraseModel->getPhrasePageResults( + $top_query, $limit, $results_per_page, false); + $data['PAGING_QUERY'] = "index.php?c=search&a=related&arg=". + urlencode($url)."&so=$summary_offset"; break; case "query": default: if(trim($query) != "") { - $phrase_results = $this->phraseModel->getPhrasePageResults($query, $limit, $results_per_page); + $phrase_results = $this->phraseModel->getPhrasePageResults( + $query, $limit, $results_per_page); } $data['PAGING_QUERY'] = "index.php?q=".urlencode($query); $data['QUERY'] = urlencode($this->clean($query,"string")); @@ -212,12 +230,13 @@ class SearchController extends Controller implements CrawlConstants } /** - * This method is responsible for parsing out the kind of query from the raw query string + * This method is responsible for parsing out the kind of query + * from the raw query string * - * This method parses the raw query string for query activities. It parses the name of each activity and - * its argument + * This method parses the raw query string for query activities. + * It parses the name of each activity and its argument * - * @return array a list of search activities parsed out of the search string + * @return array list of search activities parsed out of the search string */ function extractActivityQuery() { @@ -265,7 +284,8 @@ class SearchController extends Controller implements CrawlConstants $this->crawlModel->index_name = $crawl_time; $summary_offset = $this->clean($_REQUEST['so'], "int"); - if(!$crawl_item = $this->crawlModel->getCrawlItem(crawlHash($url), $summary_offset)) { + if(!$crawl_item = $this->crawlModel->getCrawlItem(crawlHash($url), + $summary_offset)) { $this->displayView("nocache", $data); exit(); @@ -276,7 +296,8 @@ class SearchController extends Controller implements CrawlConstants $machine_uri = $crawl_item[self::MACHINE_URI]; $page = $crawl_item[self::HASH]; $offset = $crawl_item[self::OFFSET]; - $cache_item = $this->crawlModel->getCacheFile($machine, $machine_uri, $page, $offset, $crawl_time); + $cache_item = $this->crawlModel->getCacheFile($machine, + $machine_uri, $page, $offset, $crawl_time); $cache_file = $cache_item[self::PAGE]; $request = $cache_item['REQUEST']; @@ -307,9 +328,12 @@ class SearchController extends Controller implements CrawlConstants $divNode = $dom->createElement('div'); $divNode = $body->insertBefore($divNode, $first_child); - $divNode->setAttributeNS("","style", "border-color: black; border-style:solid; border-width:3px;padding: 5px; background-color: white"); + $divNode->setAttributeNS("","style", "border-color: black; ". + "border-style:solid; border-width:3px; ". + "padding: 5px; background-color: white"); - $textNode = $dom->createTextNode(tl('search_controller_cached_version', "$page_url", $date)); + $textNode = $dom->createTextNode(tl('search_controller_cached_version', + "$page_url", $date)); $textNode = $divNode->appendChild($textNode); $body = $this->markChildren($body, $words, $dom); @@ -323,7 +347,9 @@ class SearchController extends Controller implements CrawlConstants foreach($words as $word) { if(strlen($word) > 0) { $match = crawlHash($word).$word; - $newDoc = preg_replace("/$match/i", '<span style="background-color:'.$colors[$i].'">$0</span>', $newDoc); + $newDoc = preg_replace("/$match/i", + '<span style="background-color:'. + $colors[$i].'">$0</span>', $newDoc); $i = ($i + 1) % $color_count; $newDoc = preg_replace("/".crawlHash($word)."/", "", $newDoc); } @@ -360,7 +386,8 @@ class SearchController extends Controller implements CrawlConstants foreach($words as $word) { if(strlen($word) > 0) { - $text = preg_replace("/$word/i", crawlHash($word).'$0', $text); + $text = preg_replace( + "/$word/i", crawlHash($word).'$0', $text); } } diff --git a/controllers/settings_controller.php b/controllers/settings_controller.php index 564ab56bf..6582ccdae 100755 --- a/controllers/settings_controller.php +++ b/controllers/settings_controller.php @@ -83,10 +83,12 @@ class SettingsController extends Controller $languages = $this->localeModel->getLocaleList(); foreach($languages as $language) { - $data['LANGUAGES'][$language['LOCALE_TAG']] = $language['LOCALE_NAME']; + $data['LANGUAGES'][$language['LOCALE_TAG']] = + $language['LOCALE_NAME']; } - if($token_okay && isset($_REQUEST['lang']) && in_array($_REQUEST['lang'], array_keys($data['LANGUAGES']))) { + if($token_okay && isset($_REQUEST['lang']) && + in_array($_REQUEST['lang'], array_keys($data['LANGUAGES']))) { $_SESSION['l'] = $_REQUEST['lang']; setLocaleObject( $_SESSION['l']); $changed_settings_flag = true; @@ -94,8 +96,10 @@ class SettingsController extends Controller $data['LOCALE_TAG'] = getLocaleTag(); $n = NUM_RESULTS_PER_PAGE; - $data['PER_PAGE'] = array($n => $n, 2*$n => 2*$n, 5*$n=> 5*$n, 10*$n=>10*$n); - if($token_okay && isset($_REQUEST['perpage']) && in_array($_REQUEST['perpage'], array_keys($data['PER_PAGE']))) { + $data['PER_PAGE'] = + array($n => $n, 2*$n => 2*$n, 5*$n=> 5*$n, 10*$n=>10*$n); + if($token_okay && isset($_REQUEST['perpage']) && + in_array($_REQUEST['perpage'], array_keys($data['PER_PAGE']))) { $_SESSION['MAX_PAGES_TO_SHOW'] = $_REQUEST['perpage']; $changed_settings_flag = true; } @@ -107,7 +111,8 @@ class SettingsController extends Controller } if($changed_settings_flag) { - $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >".tl('settings_controller_settings_saved')."</h1>')"; + $data['SCRIPT'] = "doMessage('<h1 class=\"red\" >". + tl('settings_controller_settings_saved')."</h1>')"; } $this->displayView($view, $data); diff --git a/index.php b/index.php index 4fb7f7043..2bfa007b2 100755 --- a/index.php +++ b/index.php @@ -35,7 +35,8 @@ * @filesource */ -define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, -strlen("index.php"))); +define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD']. + $_SERVER["SCRIPT_NAME"], 0, -strlen("index.php"))); /** * Load the configuration file @@ -67,7 +68,8 @@ if ( false === function_exists('lcfirst') ) { { return (string)(strtolower(substr($str,0,1)).substr($str,1));} } -$available_controllers = array("search", "fetch", "cache", "settings", "admin", "archive"); +$available_controllers = array("search", "fetch", "cache", + "settings", "admin", "archive"); //the request variable c is used to determine the controller if(!isset($_REQUEST['c'])) { @@ -81,7 +83,7 @@ if(!checkAllowedController($controller_name)) $controller_name = "search"; } -// if the install directory exists we force the page to be the configuration page +// if no profile exists we force the page to be the configuration page if(!PROFILE ) { $controller_name = "admin"; } @@ -113,7 +115,8 @@ setLocaleObject($locale_tag); /** - * Loads controller responsible for calculating the data needed to render the scene + * Loads controller responsible for calculating + * the data needed to render the scene * */ require_once(BASE_DIR."/controllers/".$controller_name."_controller.php"); @@ -124,11 +127,12 @@ $controller = new $controller_class(); $controller->processRequest(); /** - * Verifies that the supplied controller string is a controller for the - * SeekQuarry app + * Verifies that the supplied controller string is a controller for the + * SeekQuarry app * - * @param string $controller_name name of controller (this usually come from the query string) - * @return bool whether it is a valid controller + * @param string $controller_name name of controller + * (this usually come from the query string) + * @return bool whether it is a valid controller */ function checkAllowedController($controller_name) { @@ -138,9 +142,9 @@ function checkAllowedController($controller_name) } /** - * shorthand for echo + * shorthand for echo * - * @param string $text string to send to the current output + * @param string $text string to send to the current output */ function e($text) { @@ -173,7 +177,8 @@ function tl() /** * Sets the language to be used for locale settings * - * @param string $locale_tag the tag of the language to use to determine locale settings + * @param string $locale_tag the tag of the language to use to determine + * locale settings */ function setLocaleObject($locale_tag) { @@ -183,10 +188,11 @@ function setLocaleObject($locale_tag) } /** - * Gets the language tag (for instance, en_US for American English) of the locale that - * is currently being used. + * Gets the language tag (for instance, en_US for American English) of the + * locale that is currently being used. * - * @return string the tag of the language currently being used for locale settings + * @return string the tag of the language currently being used for locale + * settings */ function getLocaleTag() { @@ -195,9 +201,10 @@ function getLocaleTag() } /** - * Returns the current language directions. + * Returns the current language directions. * - * @return string ltr or rtl depending on if the language is left-to-right or right-to-left + * @return string ltr or rtl depending on if the language is left-to-right + * or right-to-left */ function getLocaleDirection() { @@ -206,11 +213,12 @@ function getLocaleDirection() } /** - * Returns the current locales method of writing blocks (things like divs or paragraphs). - * A language like English puts blocks one after another from the top of the page - * to the bottom. Other languages like classical Chinese list them from right to left. + * Returns the current locales method of writing blocks (things like divs or + * paragraphs).A language like English puts blocks one after another from the + * top of the page to the bottom. Other languages like classical Chinese list + * them from right to left. * - * @return string tb lr rl depending on the current locales block progression + * @return string tb lr rl depending on the current locales block progression */ function getBlockProgression() { @@ -220,9 +228,9 @@ function getBlockProgression() } /** - * Returns the writing mode of the current locale. This is a combination of the locale - * direction and the block progression. For instance, for English the writing mode is - * lr-tb (left-to-right top-to-bottom). + * Returns the writing mode of the current locale. This is a combination of the + * locale direction and the block progression. For instance, for English the + * writing mode is lr-tb (left-to-right top-to-bottom). * * @return string the locales writing mode */ diff --git a/lib/bloom_filter_bundle.php b/lib/bloom_filter_bundle.php index 8c82608d6..16d7cc5a7 100644 --- a/lib/bloom_filter_bundle.php +++ b/lib/bloom_filter_bundle.php @@ -58,7 +58,8 @@ class BloomFilterBundle /** * */ - public function __construct($dir_name, $filter_size = self::default_filter_size ) + public function __construct($dir_name, + $filter_size = self::default_filter_size ) { $this->dir_name = $dir_name; if(!is_dir($dir_name)) { @@ -68,14 +69,16 @@ class BloomFilterBundle $this->loadMetaData(); if($this->num_filters == 0) { - $this->current_filter = new BloomFilterFile($dir_name."/filter_0.ftr", $filter_size); + $this->current_filter = + new BloomFilterFile($dir_name."/filter_0.ftr", $filter_size); $this->num_filters++; $this->filter_size = $filter_size; $this->current_filter->save(); $this->saveMetaData(); } else { $last_filter = $this->num_filters - 1; - $this->current_filter = BloomFilterFile::load($dir_name."/filter_$last_filter.ftr"); + $this->current_filter = + BloomFilterFile::load($dir_name."/filter_$last_filter.ftr"); } @@ -91,7 +94,9 @@ class BloomFilterBundle $this->current_filter = NULL; gc_collect_cycles(); $last_filter = $this->num_filters; - $this->current_filter = new BloomFilterFile($this->dir_name."/filter_$last_filter.ftr", $this->filter_size); + $this->current_filter = + new BloomFilterFile($this->dir_name."/filter_$last_filter.ftr", + $this->filter_size); $this->current_filter_count = 0; $this->num_filters++; } @@ -114,7 +119,8 @@ class BloomFilterBundle if($i == $num_filters - 1) { $tmp_filter = $this->current_filter; } else { - $tmp_filter = BloomFilterFile::load($this->dir_name."/filter_$i.ftr"); + $tmp_filter = + BloomFilterFile::load($this->dir_name."/filter_$i.ftr"); } for($j = 0; $j < $count; $j++) { @@ -137,7 +143,8 @@ class BloomFilterBundle public function loadMetaData() { if(file_exists($this->dir_name.'/meta.txt')) { - $meta = unserialize(file_get_contents($this->dir_name.'/meta.txt') ); + $meta = unserialize( + file_get_contents($this->dir_name.'/meta.txt') ); $this->num_filters = $meta['NUM_FILTERS']; $this->current_filter_count = $meta['CURRENT_FILTER_COUNT']; $this->filter_size = $meta['FILTER_SIZE']; diff --git a/lib/bloom_filter_file.php b/lib/bloom_filter_file.php index 223dd4f55..d816928c9 100755 --- a/lib/bloom_filter_file.php +++ b/lib/bloom_filter_file.php @@ -59,14 +59,16 @@ class BloomFilterFile extends PersistentStructure /** * */ - public function __construct($fname, $num_values, $save_frequency = self::DEFAULT_SAVE_FREQUENCY) + public function __construct($fname, $num_values, + $save_frequency = self::DEFAULT_SAVE_FREQUENCY) { $log2 = log(2); $this->num_keys = ceil(log($num_values)/($log2*$log2)); $this->filter_size = ($this->num_keys)*$num_values; $mem_before = memory_get_usage(true); - $this->filter = pack("x". ceil(.125*$this->filter_size)); // 1/8 =.125 = num bits/bytes, want to make things floats + $this->filter = pack("x". ceil(.125*$this->filter_size)); + // 1/8 =.125 = num bits/bytes, want to make things floats $mem = memory_get_usage(true) - $mem_before; parent::__construct($fname, $save_frequency); diff --git a/lib/compressor.php b/lib/compressor.php index 7530d9206..08a80f39e 100755 --- a/lib/compressor.php +++ b/lib/compressor.php @@ -34,9 +34,9 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * A Compressor is used to apply a filter to objects before they are stored into a WebArchive. - * The filter is assumed to be invertible, and the typical intention is the filter carries out - * some kind of string compression. + * A Compressor is used to apply a filter to objects before they are stored + * into a WebArchive. The filter is assumed to be invertible, and the typical + * intention is the filter carries out some kind of string compression. * * @author Chris Pollett * @package seek_quarry @@ -46,19 +46,21 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} interface Compressor { /** - * Applies the Compressor compress filter to a string before it is inserted into a WebArchive. + * Applies the Compressor compress filter to a string before it is + * inserted into a WebArchive. * - * @param string $str string to apply filter to - * @return string the result of applying the filter + * @param string $str string to apply filter to + * @return string the result of applying the filter */ - public function compress($str); + function compress($str); /** - * Used to unapply the compress filter as when data is read out of a WebArchive. + * Used to unapply the compress filter as when data is read out of a + * WebArchive. * - * @param string $str data read from a string archive - * @return string result of uncompressing + * @param string $str data read from a string archive + * @return string result of uncompressing */ - public function uncompress($str); + function uncompress($str); } ?> diff --git a/lib/crawl_daemon.php b/lib/crawl_daemon.php index d7c261c88..6c46a3c3c 100644 --- a/lib/crawl_daemon.php +++ b/lib/crawl_daemon.php @@ -62,13 +62,16 @@ class CrawlDaemon implements CrawlConstants // handle shutdown tasks $info = array(); $info[self::STATUS] = self::STOP_STATE; - file_put_contents(CRAWL_DIR."/schedules/".self::$name."_messages.txt", serialize($info)); + file_put_contents( + CRAWL_DIR."/schedules/".self::$name."_messages.txt", + serialize($info)); unlink(CRAWL_DIR."/schedules/".self::$name."_lock.txt"); break; case SIGSEGV: // handle shutdown tasks - crawlLog("Segmentation Fault Caught!! Debug back trace follows:"); + crawlLog( + "Segmentation Fault Caught!! Debug back trace follows:"); crawlLog(var_dump(debug_backtrace(), true)); break; @@ -82,7 +85,8 @@ class CrawlDaemon implements CrawlConstants { self::$name = $name; //don't let our script be run from apache - if(isset($_SERVER['DOCUMENT_ROOT']) && strlen($_SERVER['DOCUMENT_ROOT']) > 0) { + if(isset($_SERVER['DOCUMENT_ROOT']) && + strlen($_SERVER['DOCUMENT_ROOT']) > 0) { echo "BAD REQUEST"; exit(); } @@ -91,7 +95,8 @@ class CrawlDaemon implements CrawlConstants echo "For example,\n"; echo "php $name.php start //starts the $name as a daemon\n"; echo "php $name.php stop //stops the $name daemon\n"; - echo "php $name.php terminal //runs $name within the current process not as a daemon\n"; + echo "php $name.php terminal //runs $name within the current ". + "process not as a daemon\n"; exit(); } @@ -108,7 +113,8 @@ class CrawlDaemon implements CrawlConstants } } else { //for Windows systems we fall back to console operation if(!$terminal_flag) { - echo "pcntl_fork function does not exist falling back to terminal mode\n"; + echo "pcntl_fork function does not exist falling back to ". + "terminal mode\n"; } $argv[1] = "terminal"; } @@ -130,18 +136,24 @@ class CrawlDaemon implements CrawlConstants // setup signal handler pcntl_signal(SIGTERM, "CrawlDaemon::processHandler"); - file_put_contents(CRAWL_DIR."/schedules/$name"."_lock.txt", serialize(getmypid())); + file_put_contents( + CRAWL_DIR."/schedules/$name"."_lock.txt", + serialize(getmypid())); $info = array(); $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; - file_put_contents(CRAWL_DIR."/schedules/$name"."_messages.txt", serialize($info)); + file_put_contents( + CRAWL_DIR."/schedules/$name"."_messages.txt", + serialize($info)); - define("LOG_TO_FILES", true); // if false log messages are sent to the console + define("LOG_TO_FILES", true); + // if false log messages are sent to the console break; case "stop": if(file_exists(CRAWL_DIR."/schedules/$name"."_lock.txt")) { - $pid = unserialize(file_get_contents(CRAWL_DIR."/schedules/$name"."_lock.txt")); + $pid = unserialize(file_get_contents( + CRAWL_DIR."/schedules/$name"."_lock.txt")); echo "Stopping $name...$pid\n"; posix_kill($pid, SIGTERM); } else { @@ -153,7 +165,9 @@ class CrawlDaemon implements CrawlConstants case "terminal": $info = array(); $info[self::STATUS] = self::WAITING_START_MESSAGE_STATE; - file_put_contents(CRAWL_DIR."/schedules/$name"."_messages.txt", serialize($info)); + file_put_contents( + CRAWL_DIR."/schedules/$name"."_messages.txt", + serialize($info)); define("LOG_TO_FILES", false); break; diff --git a/lib/fetch_url.php b/lib/fetch_url.php index 8170e5c99..0899b9d06 100755 --- a/lib/fetch_url.php +++ b/lib/fetch_url.php @@ -49,20 +49,23 @@ class FetchUrl implements CrawlConstants { /** - * Make multi_curl requests for an array of sites with urls + * Make multi_curl requests for an array of sites with urls * - * @param array $sites an array containing urls of pages to request - * @param bool $timer flag, true means print timing statistics to log - * @param string $key the component of $sites[$i] that has the value of a url to get - * defaults to URL - * @param string $value component of $sites[$i] in which to store the page that was gotten - * @param string $hash component of $sites[$i] in which to store a hash of page for de-deuplication - * purposes + * @param array $sites an array containing urls of pages to request + * @param bool $timer flag, true means print timing statistics to log + * @param string $key the component of $sites[$i] that has the value of + * a url to get defaults to URL + * @param string $value component of $sites[$i] in which to store the + * page that was gotten + * @param string $hash component of $sites[$i] in which to store a hash + * of page for de-deuplication purposes * * @return array an updated array with the contents of those pages */ - public static function getPages($sites, $timer = false, $key=CrawlConstants::URL, $value=CrawlConstants::PAGE, $hash=CrawlConstants::HASH) + public static function getPages($sites, $timer = false, + $key=CrawlConstants::URL, $value=CrawlConstants::PAGE, + $hash=CrawlConstants::HASH) { static $ex_cnt = 0; @@ -84,7 +87,8 @@ class FetchUrl implements CrawlConstants curl_setopt($sites[$i][0], CURLOPT_RETURNTRANSFER, true); curl_setopt($sites[$i][0], CURLOPT_CONNECTTIMEOUT, PAGE_TIMEOUT); curl_setopt($sites[$i][0], CURLOPT_TIMEOUT, PAGE_TIMEOUT); - curl_setopt($sites[$i][0], CURLOPT_HTTPHEADER, array('Range: bytes=0-'.PAGE_RANGE_REQUEST)); + curl_setopt($sites[$i][0], CURLOPT_HTTPHEADER, + array('Range: bytes=0-'.PAGE_RANGE_REQUEST)); curl_multi_add_handle($agent_handler, $sites[$i][0]); } if($timer) { @@ -96,7 +100,8 @@ class FetchUrl implements CrawlConstants //Wait for responses do { $mrc = @curl_multi_exec($agent_handler, $active); - } while (time() - $start < PAGE_TIMEOUT && $mrc == CURLM_CALL_MULTI_PERFORM ); + } while (time() - $start < PAGE_TIMEOUT && + $mrc == CURLM_CALL_MULTI_PERFORM ); if(time() - $start > PAGE_TIMEOUT) {crawlLog(" TIMED OUT!!!");} @@ -104,7 +109,8 @@ class FetchUrl implements CrawlConstants if (curl_multi_select($agent_handler, 1) != -1) { do { $mrc = @curl_multi_exec($agent_handler, $active); - } while (time()-$start < PAGE_TIMEOUT && $mrc == CURLM_CALL_MULTI_PERFORM); + } while (time()-$start < PAGE_TIMEOUT && + $mrc == CURLM_CALL_MULTI_PERFORM); } } @@ -119,18 +125,31 @@ class FetchUrl implements CrawlConstants // Get Data and Message Code $content = @curl_multi_getcontent($sites[$i][0]); - $sites[$i][self::HTTP_CODE] = curl_getinfo($sites[$i][0], CURLINFO_HTTP_CODE); + $sites[$i][self::HTTP_CODE] = + curl_getinfo($sites[$i][0], CURLINFO_HTTP_CODE); if(!$sites[$i][self::HTTP_CODE]) { $sites[$i][self::HTTP_CODE] = curl_error($sites[$i][0]); } - // Store Data into our $sites array, create a hash for deduplication purposes + /* + Store Data into our $sites array, create a hash for + deduplication purposes + */ if(isset($content)) { - $sites[$i][$value] = mb_substr($content, 0, PAGE_RANGE_REQUEST); - //to do dedup we strip script, noscript, and style tags as well as their content, then we strip tags, get rid of whitespace and hash - $strip_array = array('@<script[^>]*?>.*?</script>@si', '@<noscript[^>]*?>.*?</noscript>@si', '@<style[^>]*?>.*?</style>@si'); - $dedup_string = preg_replace($strip_array, '', $sites[$i][$value]); - $dedup_string = preg_replace('/\W+/', '', strip_tags($dedup_string)); + $sites[$i][$value] = + mb_substr($content, 0, PAGE_RANGE_REQUEST); + /* to do dedup we strip script, noscript, and style tags + as well as their content, then we strip tags, get rid + of whitespace and hash + */ + $strip_array = + array('@<script[^>]*?>.*?</script>@si', + '@<noscript[^>]*?>.*?</noscript>@si', + '@<style[^>]*?>.*?</style>@si'); + $dedup_string = preg_replace( + $strip_array, '', $sites[$i][$value]); + $dedup_string = preg_replace( + '/\W+/', '', strip_tags($dedup_string)); $sites[$i][$hash] = crawlHash($dedup_string); } @@ -138,16 +157,21 @@ class FetchUrl implements CrawlConstants //Get Time, Mime type and Character encoding $sites[$i][self::TIMESTAMP] = time(); - $type_parts = explode(";", curl_getinfo($sites[$i][0], CURLINFO_CONTENT_TYPE)); + $type_parts = + explode(";", curl_getinfo($sites[$i][0], + CURLINFO_CONTENT_TYPE)); $sites[$i][self::TYPE] = trim($type_parts[0]); if(isset($type_parts[1])) { $encoding_parts = explode("charset=", $type_parts[1]); if(isset($encoding_parts[1])) { - $sites[$i][self::ENCODING] = mb_strtoupper(trim($encoding_parts[1])); //hopefuly safe to trust encoding sent + $sites[$i][self::ENCODING] = + mb_strtoupper(trim($encoding_parts[1])); + //hopefuly safe to trust encoding sent } } else { - $sites[$i][self::ENCODING] = mb_detect_encoding($content, 'auto'); + $sites[$i][self::ENCODING] = + mb_detect_encoding($content, 'auto'); } @@ -158,7 +182,8 @@ class FetchUrl implements CrawlConstants } //end for if($timer) { - crawlLog(" Get Page Content time ".(changeInMicrotime($start_time))); + crawlLog(" Get Page Content time ". + (changeInMicrotime($start_time))); } curl_multi_close($agent_handler); diff --git a/lib/gzip_compressor.php b/lib/gzip_compressor.php index f4b8773c4..f09a4dd95 100755 --- a/lib/gzip_compressor.php +++ b/lib/gzip_compressor.php @@ -40,8 +40,9 @@ require_once "compressor.php"; /** - * Implementation of a Compressor using GZIP/GUNZIP as the filter. - * More details on these algorithms can be found at {@link http://en.wikipedia.org/wiki/Gzip} + * Implementation of a Compressor using GZIP/GUNZIP as the filter. + * More details on these algorithms can be found at + * {@link http://en.wikipedia.org/wiki/Gzip} * * @author Chris Pollett * @package seek_quarry @@ -54,25 +55,25 @@ class GzipCompressor implements Compressor function __construct() {} /** - * Applies the Compressor compress filter to a string before it is inserted into a WebArchive. - * In this case, applying the filter means gzipping. + * Applies the Compressor compress filter to a string before it is inserted + * into a WebArchive. In this case, applying the filter means gzipping. * - * @param string $str string to apply filter to - * @return string the result of applying the filter + * @param string $str string to apply filter to + * @return string the result of applying the filter */ - public function compress($str) + function compress($str) { return gzcompress($str, 9); } /** - * Used to unapply the compress filter as when data is read out of a WebArchive. - * In this case, unapplying the filter means gunzipping. + * Used to unapply the compress filter as when data is read out of a + * WebArchive. In this case, unapplying the filter means gunzipping. * - * @param string $str data read from a string archive - * @return string result of uncompressing + * @param string $str data read from a string archive + * @return string result of uncompressing */ - public function uncompress($str) + function uncompress($str) { return gzuncompress($str); } diff --git a/lib/hash_table.php b/lib/hash_table.php index 8e36878b3..7a547d06b 100755 --- a/lib/hash_table.php +++ b/lib/hash_table.php @@ -66,7 +66,8 @@ class HashTable extends StringArray /** */ - public function __construct($fname, $num_values, $key_size, $value_size, $save_frequency = self::DEFAULT_SAVE_FREQUENCY) + public function __construct($fname, $num_values, $key_size, $value_size, + $save_frequency = self::DEFAULT_SAVE_FREQUENCY) { $this->key_size = $key_size; $this->value_size = $value_size; @@ -75,7 +76,8 @@ class HashTable extends StringArray $this->count = 0; - parent::__construct($fname, $num_values, $key_size + $value_size, $save_frequency); + parent::__construct($fname, $num_values, + $key_size + $value_size, $save_frequency); } public function insert($key, $value) @@ -87,9 +89,11 @@ class HashTable extends StringArray if($probe === false) { /* this is a little slow - the idea is we can't use deleted slots until we are sure $key isn't in the table + the idea is we can't use deleted slots until we are sure + $key isn't in the table */ - $probe = $this->lookupArray($key, array($null, $deleted), self::ALWAYS_RETURN_PROBE); + $probe = $this->lookupArray( + $key, array($null, $deleted), self::ALWAYS_RETURN_PROBE); if($probe === false) { crawlLog("No space in hash table"); @@ -120,18 +124,21 @@ class HashTable extends StringArray } - public function lookup($key, $return_probe_value = self::RETURN_VALUE) + function lookup($key, $return_probe_value = self::RETURN_VALUE) { - return $this->lookupArray($key, array($this->null), $return_probe_value); + return $this->lookupArray( + $key, array($this->null), $return_probe_value); } - public function lookupArray($key, $null_array, $return_probe_value = self::RETURN_VALUE) + function lookupArray($key, $null_array, + $return_probe_value = self::RETURN_VALUE) { $index = $this->hash($key); $num_values = $this->num_values; - $probe_array = array(self::RETURN_PROBE_ON_KEY_FOUND, self::ALWAYS_RETURN_PROBE); + $probe_array = array(self::RETURN_PROBE_ON_KEY_FOUND, + self::ALWAYS_RETURN_PROBE); for($j = 0; $j < $num_values; $j++) { $probe = ($index + $j) % $num_values; diff --git a/lib/index_archive_bundle.php b/lib/index_archive_bundle.php index 0c22e333a..5b349929a 100644 --- a/lib/index_archive_bundle.php +++ b/lib/index_archive_bundle.php @@ -78,10 +78,13 @@ function setOffsetPointers($data, &$objects, $offset_field) if(isset($tmp[1]) ) { list($word_key, $block_num) = $tmp; if(strcmp($word_key, "offset") != 0) { - if(($block_num +1)*BLOCK_SIZE < COMMON_WORD_THRESHOLD) { + if(($block_num +1)*BLOCK_SIZE < + COMMON_WORD_THRESHOLD) { $data[$word_key][$block_num] = $offset; - } else if(isset($docs_info[IndexingConstants::POINT_BLOCK])) { - $data[$word_key][IndexingConstants::LIST_OFFSET] = $offset; + } else if(isset( + $docs_info[IndexingConstants::POINT_BLOCK])) { + $data[$word_key][IndexingConstants::LIST_OFFSET] = + $offset; } } } @@ -137,7 +140,9 @@ class WordIterator implements IndexingConstants, CrawlConstants */ public function reset() { - $partition = WebArchiveBundle::selectPartition($this->word_key, $this->index->num_partitions_index); + $partition = + WebArchiveBundle::selectPartition($this->word_key, + $this->index->num_partitions_index); $this->info_block = $this->index->getPhraseIndexInfo($this->word_key); @@ -147,13 +152,16 @@ class WordIterator implements IndexingConstants, CrawlConstants while($this->limit >= $count_till_generation) { $this->info_block['CURRENT_GENERATION_INDEX']++; - if($this->num_generations <= $this->info_block['CURRENT_GENERATION_INDEX']) { + if($this->num_generations <= + $this->info_block['CURRENT_GENERATION_INDEX']) { $this->num_docs = 0; $this->current_pointer = -1; return; } - $info_block = $this->index->getPhraseIndexInfo($this->word_key, - $this->info_block['CURRENT_GENERATION_INDEX'], $this->info_block); + $info_block = $this->index->getPhraseIndexInfo( + $this->word_key, + $this->info_block['CURRENT_GENERATION_INDEX'], + $this->info_block); if($info_block !== NULL) { $this->info_block = $info_block; } @@ -175,7 +183,9 @@ class WordIterator implements IndexingConstants, CrawlConstants { if($this->info_block !== NULL) { - $info_block = $this->index->getPhraseIndexInfo($this->word_key, $this->info_block['CURRENT_GENERATION_INDEX'], $this->info_block); + $info_block = $this->index->getPhraseIndexInfo( + $this->word_key, $this->info_block['CURRENT_GENERATION_INDEX'], + $this->info_block); if($info_block === NULL) { return false; } @@ -186,9 +196,11 @@ class WordIterator implements IndexingConstants, CrawlConstants $this->current_pointer = floor($this->limit / BLOCK_SIZE); $this->last_block = $info_block[self::END_BLOCK]; - $this->num_full_blocks = floor($this->num_docs_generation / BLOCK_SIZE); + $this->num_full_blocks = + floor($this->num_docs_generation / BLOCK_SIZE); if($this->num_docs_generation > COMMON_WORD_THRESHOLD) { - $this->last_pointed_block = floor(COMMON_WORD_THRESHOLD / BLOCK_SIZE); + $this->last_pointed_block = + floor(COMMON_WORD_THRESHOLD / BLOCK_SIZE); } else { $this->last_pointed_block = $this->num_full_blocks; } @@ -204,7 +216,7 @@ class WordIterator implements IndexingConstants, CrawlConstants $this->list_offset = NULL; } else { $this->list_offset = $info_block[self::LIST_OFFSET][0]; - $this->current_block_num = $info_block[self::LIST_OFFSET][1]; + $this->current_block_num =$info_block[self::LIST_OFFSET][1]; } } @@ -221,7 +233,9 @@ class WordIterator implements IndexingConstants, CrawlConstants */ public function currentDocsWithWord($restrict_phrases = NULL) { - $generation = $this->info_block['GENERATIONS'][$this->info_block['CURRENT_GENERATION_INDEX']]; + $generation = + $this->info_block['GENERATIONS'][ + $this->info_block['CURRENT_GENERATION_INDEX']]; if($this->current_pointer >= 0) { if($this->current_pointer == $this->num_full_blocks) { $pages = $this->last_block; @@ -236,9 +250,14 @@ class WordIterator implements IndexingConstants, CrawlConstants } else { if(isset($this->block_pointers[$this->current_pointer])) { $doc_block = $this->index->getWordDocBlock($this->word_key, - $this->block_pointers[$this->current_pointer], $generation); - if(isset($doc_block[$this->word_key.":".$this->current_pointer])) { - $pages = $doc_block[$this->word_key.":".$this->current_pointer]; + $this->block_pointers[$this->current_pointer], + $generation); + if(isset( + $doc_block[$this->word_key.":".$this->current_pointer] + )) { + $pages = + $doc_block[ + $this->word_key.":".$this->current_pointer]; } else { $pages = array(); } @@ -259,13 +278,19 @@ class WordIterator implements IndexingConstants, CrawlConstants if(isset($doc_info[self::SUMMARY_OFFSET])) { - $page = $this->index->getPage($doc_key, $doc_info[self::SUMMARY_OFFSET]); - // build a string out of title, links, and description - $page_string = mb_strtolower(PhraseParser::extractWordStringPageSummary($page)); + $page = $this->index->getPage( + $doc_key, $doc_info[self::SUMMARY_OFFSET]); + /* build a string out of title, links, + and description + */ + $page_string = mb_strtolower( + PhraseParser::extractWordStringPageSummary( + $page)); $found = true; foreach($restrict_phrases as $phrase) { - if(mb_strpos($page_string, $phrase) === false) { + if(mb_strpos($page_string, $phrase) + === false) { $found = false; } } @@ -295,18 +320,21 @@ class WordIterator implements IndexingConstants, CrawlConstants if($doc_block == -1 || !is_array($doc_block)) { return NULL; } - if(isset($doc_block[self::LIST_OFFSET]) && $doc_block[self::LIST_OFFSET] != NULL) { + if(isset($doc_block[self::LIST_OFFSET]) && + $doc_block[self::LIST_OFFSET] != NULL) { $this->list_offset = $doc_block[self::LIST_OFFSET]; } $this->current_pointer ++; if($this->current_pointer > $this->num_full_blocks) { $flag = false; - while ($this->info_block['CURRENT_GENERATION_INDEX'] < $this->num_generations -1 && !$flag) { + while ($this->info_block['CURRENT_GENERATION_INDEX'] < + $this->num_generations -1 && !$flag) { $this->info_block['CURRENT_GENERATION_INDEX']++; $flag = $this->initGeneration(); } - if ($this->info_block['CURRENT_GENERATION_INDEX'] >= $this->num_generations -1) { + if ($this->info_block['CURRENT_GENERATION_INDEX'] >= + $this->num_generations -1) { $this->current_pointer = -1; } } @@ -338,7 +366,9 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants /** * */ - public function __construct($dir_name, $filter_size = -1, $num_partitions_summaries = NULL, $num_partitions_index = NULL, $description = NULL) + public function __construct($dir_name, $filter_size = -1, + $num_partitions_summaries = NULL, $num_partitions_index = NULL, + $description = NULL) { $this->dir_name = $dir_name; @@ -353,17 +383,21 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants } if(file_exists($this->dir_name."/generation.txt")) { - $this->generation_info = unserialize(file_get_contents($this->dir_name."/generation.txt")); + $this->generation_info = unserialize( + file_get_contents($this->dir_name."/generation.txt")); } else { $this->generation_info['ACTIVE'] = 0; $this->generation_info['NUM_WORDS'] = 0; - file_put_contents($this->dir_name."/generation.txt", serialize($this->generation_info)); + file_put_contents($this->dir_name."/generation.txt", + serialize($this->generation_info)); } $this->summaries = new WebArchiveBundle($dir_name."/summaries", $filter_size, $num_partitions_summaries, $description); $this->num_partitions_summaries = $this->summaries->num_partitions; - $this->index = new WebArchiveBundle($dir_name."/index".$this->generation_info['ACTIVE'], -1, $num_partitions_index); + $this->index = new WebArchiveBundle( + $dir_name."/index".$this->generation_info['ACTIVE'], -1, + $num_partitions_index); $this->num_partitions_index = $this->index->num_partitions; $this->description = $this->summaries->description; @@ -392,7 +426,7 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants if(!count($index_data) > 0) return; /* Arrange the words according to the partitions they are in - */ + */ $this->diagnostics['SELECT_TIME'] = 0; $this->diagnostics['INFO_BLOCKS_TIME'] = 0; @@ -401,27 +435,35 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants $start_time = microtime(); foreach($index_data as $word_key => $docs_info) { - $partition = WebArchiveBundle::selectPartition($word_key, $this->num_partitions_index); + $partition = WebArchiveBundle::selectPartition( + $word_key, $this->num_partitions_index); $out_data[$partition][$word_key] = $docs_info; } $this->diagnostics['SELECT_TIME'] += changeInMicrotime($start_time); - //for each partition add the word data for the partition to the partition web archive + /* for each partition add the word data for the partition to the + partition web archive + */ $cnt = 0; foreach($out_data as $partition => $word_data) { $this->addPartitionWordData($partition, $word_data); $cnt++; } - file_put_contents($this->dir_name."/generation.txt", serialize($this->generation_info)); + file_put_contents($this->dir_name."/generation.txt", + serialize($this->generation_info)); $out_data = NULL; gc_collect_cycles(); crawlLog("**ADD INDEX DIAGNOSTIC INFO..."); - crawlLog("**Time calculating select partition functions ".$this->diagnostics['SELECT_TIME']); - crawlLog("**Time reading info blocks ".$this->diagnostics['INFO_BLOCKS_TIME']); - crawlLog("**Time adding objects to index ".$this->diagnostics['ADD_OBJECTS_TIME']); - crawlLog("**Time adding to filters ".$this->diagnostics['ADD_FILTER_TIME']); + crawlLog("**Time calculating select partition functions ". + $this->diagnostics['SELECT_TIME']); + crawlLog("**Time reading info blocks ". + $this->diagnostics['INFO_BLOCKS_TIME']); + crawlLog("**Time adding objects to index ". + $this->diagnostics['ADD_OBJECTS_TIME']); + crawlLog("**Time adding to filters ". + $this->diagnostics['ADD_FILTER_TIME']); crawlLog("**Number of partitions ".$cnt); } @@ -429,14 +471,16 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants /** * */ - public function addPartitionWordData($partition, &$word_data, $overwrite = false) + public function addPartitionWordData($partition, + &$word_data, $overwrite = false) { $start_time = microtime(); $block_data = $this->readPartitionInfoBlock($partition); if(isset($this->diagnostics['INFO_BLOCKS_TIME'])) { - $this->diagnostics['INFO_BLOCKS_TIME'] += changeInMicrotime($start_time); + $this->diagnostics['INFO_BLOCKS_TIME'] += + changeInMicrotime($start_time); } if($block_data == NULL) { @@ -453,9 +497,11 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants $start_time = microtime(); $this->addPartitionIndexFilter($partition, $word_key); - $this->addPartitionIndexFilter($partition, $word_key . $this->generation_info['ACTIVE']); + $this->addPartitionIndexFilter( + $partition, $word_key . $this->generation_info['ACTIVE']); if(isset($this->diagnostics['ADD_FILTER_TIME'])) { - $this->diagnostics['ADD_FILTER_TIME'] += changeInMicrotime($start_time); + $this->diagnostics['ADD_FILTER_TIME'] += + changeInMicrotime($start_time); } if(!isset($block_data[$word_key]) || $overwrite == true) { @@ -466,35 +512,42 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants $unfilled_block_num = 0; } else { - $unfilled_block_num = floor($block_data[$word_key][self::COUNT] / BLOCK_SIZE); + $unfilled_block_num = + floor($block_data[$word_key][self::COUNT] / BLOCK_SIZE); } $cnt = count($docs_info); $block_data[$word_key][self::COUNT] += $cnt; - $tmp = array_merge($block_data[$word_key][self::END_BLOCK], $docs_info); + $tmp = + array_merge($block_data[$word_key][self::END_BLOCK],$docs_info); uasort($tmp, "scoreOrderCallback"); $add_cnt = count($tmp); $num_blocks = floor($add_cnt / BLOCK_SIZE); - $block_data[$word_key][self::END_BLOCK] = array_slice($tmp, $num_blocks*BLOCK_SIZE); + $block_data[$word_key][self::END_BLOCK] = + array_slice($tmp, $num_blocks*BLOCK_SIZE); $first_common_flag = true; $min_common = NULL; $slice_cnt = $num_blocks - 1; - for($i = $unfilled_block_num + $num_blocks - 1 ; $i >= $unfilled_block_num ; $i--) { - $out_data[0][$word_key .":". $i] = array_slice($tmp, $slice_cnt*BLOCK_SIZE, BLOCK_SIZE); + for($i = $unfilled_block_num + $num_blocks - 1; + $i >= $unfilled_block_num ; $i--) { + $out_data[0][$word_key .":". $i] = + array_slice($tmp, $slice_cnt*BLOCK_SIZE, BLOCK_SIZE); if(($i+1)*BLOCK_SIZE > COMMON_WORD_THRESHOLD) { $min_common = $i; if($first_common_flag) { if(isset($block_data[$word_key][self::LIST_OFFSET])) { - $out_data[0][$word_key .":". $i][self::LIST_OFFSET] = + $out_data[0][$word_key .":". $i][self::LIST_OFFSET]= $block_data[$word_key][self::LIST_OFFSET]; } else { - $out_data[0][$word_key .":". $i][self::LIST_OFFSET] = NULL; + $out_data[0][$word_key .":". $i][self::LIST_OFFSET]= + NULL; } $first_common_flag = false; } else { - $out_data[0][$word_key .":". $i][self::LIST_OFFSET] = NULL; // next in list is in same block + $out_data[0][$word_key .":". $i][self::LIST_OFFSET] = + NULL; // next in list is in same block } } @@ -508,20 +561,25 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants } $start_time = microtime(); - $this->index->addObjectsPartition("offset", $partition, $out_data, $block_data, "setOffsetPointers", false); + $this->index->addObjectsPartition("offset", $partition, + $out_data, $block_data, "setOffsetPointers", false); if(isset($this->diagnostics['ADD_OBJECTS_TIME'])) { - $this->diagnostics['ADD_OBJECTS_TIME'] += changeInMicrotime($start_time); + $this->diagnostics['ADD_OBJECTS_TIME'] += + changeInMicrotime($start_time); } - if($this->generation_info['NUM_WORDS'] > $this->num_words_per_generation) { + if($this->generation_info['NUM_WORDS']>$this->num_words_per_generation){ $index_filter_size = $this->index->filter_size; $this->generation_info['ACTIVE']++; $this->generation_info['NUM_WORDS'] = 0; $this->index = new WebArchiveBundle( - $this->dir_name."/index".$this->generation_info['ACTIVE'], $index_filter_size, $this->num_partitions_index); - file_put_contents($this->dir_name."/generation.txt", serialize($this->generation_info)); + $this->dir_name."/index".$this->generation_info['ACTIVE'], + $index_filter_size, $this->num_partitions_index); + file_put_contents( + $this->dir_name."/generation.txt", + serialize($this->generation_info)); } } @@ -548,13 +606,18 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants public function initPartitionIndexFilter($partition) { if(!isset($this->index_partition_filters[$partition])) { - if(file_exists($this->dir_name."/index_filters/partition$partition.ftr")) { + if(file_exists($this->dir_name. + "/index_filters/partition$partition.ftr")) { $this->index_partition_filters[$partition] = - BloomFilterFile::load($this->dir_name."/index_filters/partition$partition.ftr"); + BloomFilterFile::load( + $this->dir_name . + "/index_filters/partition$partition.ftr"); } else { $filter_size = $this->num_words_per_generation; $this->index_partition_filters[$partition] = - new BloomFilterFile($this->dir_name."/index_filters/partition$partition.ftr", $filter_size); + new BloomFilterFile( + $this->dir_name . + "/index_filters/partition$partition.ftr", $filter_size); } } return true; @@ -563,7 +626,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants /** * */ - public function getSummariesByHash($word_key, $limit, $num, $restrict_phrases = NULL, $phrase_key = NULL) + public function getSummariesByHash($word_key, $limit, $num, + $restrict_phrases = NULL, $phrase_key = NULL) { if($phrase_key == NULL) { $phrase_key = $word_key; @@ -574,7 +638,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants if($phrase_info == NULL || (isset($phrase_info[self::PARTIAL_COUNT]) && $phrase_info[self::PARTIAL_COUNT] < $limit + $num)) { - $this->addPhraseIndex($word_key, $restrict_phrases, $phrase_key, $limit + $num); + $this->addPhraseIndex( + $word_key, $restrict_phrases, $phrase_key, $limit + $num); } $iterator = new WordIterator($phrase_key, $this, $limit); @@ -582,11 +647,13 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants $num_retrieved = 0; $pages = array(); - while(is_array($next_docs = $iterator->nextDocsWithWord()) && $num_retrieved < $num) { + while(is_array($next_docs = $iterator->nextDocsWithWord()) && + $num_retrieved < $num) { $num_docs_in_block = count($next_docs); foreach($next_docs as $doc_key => $doc_info) { if(isset($doc_info[self::SUMMARY_OFFSET])) { - $page = $this->getPage($doc_key, $doc_info[self::SUMMARY_OFFSET]); + $page = $this->getPage( + $doc_key, $doc_info[self::SUMMARY_OFFSET]); $pages[] = array_merge($doc_info, $page); $num_retrieved++; } @@ -616,7 +683,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants if($generation == -1) { return $this->index->getPage($word_key, $offset); } else { - $archive = new WebArchiveBundle($this->dir_name."/index".$generation); + $archive = + new WebArchiveBundle($this->dir_name."/index".$generation); return $archive->getPage($word_key, $offset); } } @@ -626,7 +694,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants */ public function getPageByPartition($partition, $offset, $file_handle = NULL) { - return $this->index->getPageByPartition($partition, $offset, $file_handle); + return $this->index->getPageByPartition( + $partition, $offset, $file_handle); } /** @@ -642,7 +711,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants */ public function differenceContainsPages(&$page_array, $field_name = NULL) { - return $this->summaries->differencePagesFilter($page_array, $field_name); + return $this->summaries->differencePagesFilter( + $page_array, $field_name); } /** @@ -661,10 +731,13 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants /** * */ - public function getPhraseIndexInfo($phrase_key, $generation_index = 0, $info_block = NULL) + public function getPhraseIndexInfo( + $phrase_key, $generation_index = 0, $info_block = NULL) { - $partition = WebArchiveBundle::selectPartition($phrase_key, $this->num_partitions_index); + $partition = + WebArchiveBundle::selectPartition( + $phrase_key, $this->num_partitions_index); $info = array(); if($info_block == NULL) { @@ -684,7 +757,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants for($i = 0; $i <= $active_generation; $i++) { if($filter->contains($phrase_key . $i)) { if($filter->contains("delete". $phrase_key . $i)) { - $info['GENERATIONS'] = array(); //truncate all previously seen + $info['GENERATIONS'] = array(); + //truncate all previously seen } else { $info['GENERATIONS'][] = $i; } @@ -699,24 +773,30 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants $sample_size = min($num_generations, SAMPLE_GENERATIONS); $sum_count = 0; for($i = 0; $i < $sample_size; $i++) { - $block_info = $this->readPartitionInfoBlock($partition, $info['GENERATIONS'][$i]); + $block_info = + $this->readPartitionInfoBlock( + $partition, $info['GENERATIONS'][$i]); $sum_count += $block_info[$phrase_key][self::COUNT]; } - $info['TOTAL_COUNT'] = ceil(($sum_count*$num_generations)/$sample_size); // this is an estimate + $info['TOTAL_COUNT'] = + ceil(($sum_count*$num_generations)/$sample_size); + // this is an estimate } else { $info['TOTAL_COUNT'] = $info_block['TOTAL_COUNT']; $info['GENERATIONS'] = $info_block['GENERATIONS']; } - $block_info = $this->readPartitionInfoBlock($partition, $info['GENERATIONS'][$generation_index]); + $block_info = $this->readPartitionInfoBlock( + $partition, $info['GENERATIONS'][$generation_index]); $phrase_info = $block_info[$phrase_key]; $info['CURRENT_GENERATION_INDEX'] = $generation_index; if(isset($phrase_info)) { - $phrase_info['CURRENT_GENERATION_INDEX'] = $info['CURRENT_GENERATION_INDEX']; + $phrase_info['CURRENT_GENERATION_INDEX'] = + $info['CURRENT_GENERATION_INDEX']; $phrase_info['TOTAL_COUNT'] = $info['TOTAL_COUNT']; $phrase_info['GENERATIONS'] = $info['GENERATIONS']; @@ -732,7 +812,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants */ public function setPhraseIndexInfo($phrase_key, $info) { - $partition = WebArchiveBundle::selectPartition($phrase_key, $this->num_partitions_index); + $partition = WebArchiveBundle::selectPartition( + $phrase_key, $this->num_partitions_index); $partition_block_data = $this->readPartitionInfoBlock($partition); @@ -749,13 +830,16 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants /** * */ - public function addPhraseIndex($word_key, $restrict_phrases, $phrase_key, $num_needed) + public function addPhraseIndex($word_key, $restrict_phrases, + $phrase_key, $num_needed) { if($phrase_key == NULL) { return; } - $partition = WebArchiveBundle::selectPartition($phrase_key, $this->num_partitions_index); + $partition = + WebArchiveBundle::selectPartition($phrase_key, + $this->num_partitions_index); $iterator = new WordIterator($word_key, $this); $current_count = 0; @@ -764,20 +848,24 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants $partial_flag = false; $first_time = true; - while(is_array($next_docs = $iterator->nextDocsWithWord($restrict_phrases))) { + while(is_array($next_docs = + $iterator->nextDocsWithWord($restrict_phrases))) { $buffer = array_merge($buffer, $next_docs); $cnt = count($buffer); if($cnt > COMMON_WORD_THRESHOLD) { - $word_data[$phrase_key] = array_slice($buffer, 0, COMMON_WORD_THRESHOLD); + $word_data[$phrase_key] = + array_slice($buffer, 0, COMMON_WORD_THRESHOLD); - $this->addPartitionWordData($partition, $word_data, $first_time); + $this->addPartitionWordData($partition,$word_data, $first_time); $first_time = false; $buffer = array_slice($buffer, COMMON_WORD_THRESHOLD); $current_count += COMMON_WORD_THRESHOLD; if($current_count > $num_needed) { - // notice $num_needed only plays a role when greater than COMMON_WORD_THRESHOLD + /* notice $num_needed only plays a role when + greater than COMMON_WORD_THRESHOLD + */ $partial_flag = true; break; } @@ -786,20 +874,25 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants $word_data[$phrase_key] = $buffer; - $this->addPartitionIndexFilter($partition, "delete". $phrase_key . ($this->generation_info['ACTIVE'] - 1)); + $this->addPartitionIndexFilter( + $partition, + "delete". $phrase_key . ($this->generation_info['ACTIVE'] - 1)); $this->addPartitionWordData($partition, $word_data); $this->addPartitionIndexFilter($partition, $phrase_key); - $this->addPartitionIndexFilter($partition, $phrase_key . $this->generation_info['ACTIVE']); + $this->addPartitionIndexFilter($partition, $phrase_key . + $this->generation_info['ACTIVE']); $this->index_partition_filters[$partition]->save(); - file_put_contents($this->dir_name."/generation.txt", serialize($this->generation_info)); + file_put_contents($this->dir_name."/generation.txt", + serialize($this->generation_info)); $block_info = $this->readPartitionInfoBlock($partition); $info = $block_info[$phrase_key]; $current_count += count($buffer); if($partial_flag) { $info[self::PARTIAL_COUNT] = $current_count; - $info[self::COUNT] = floor($current_count * $iterator->num_docs/$iterator->seen_docs); + $info[self::COUNT] = + floor($current_count*$iterator->num_docs/$iterator->seen_docs); $this->setPhraseIndexInfo($phrase_key, $info); } } @@ -807,7 +900,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants /** * */ - public function getSelectiveWords($word_keys, $num, $comparison = "lessThan") //lessThan is in utility.php + public function getSelectiveWords($word_keys, $num, $comparison="lessThan") + //lessThan is in utility.php { $words_array = array(); if(!is_array($word_keys) || count($word_keys) < 1) { return NULL;} @@ -834,7 +928,8 @@ class IndexArchiveBundle implements IndexingConstants, CrawlConstants if($generation == -1) { return $this->index->readPartitionInfoBlock($partition); } else { - $archive = new WebArchiveBundle($this->dir_name."/index".$generation); + $archive = new WebArchiveBundle( + $this->dir_name."/index".$generation); return $archive->readPartitionInfoBlock($partition); } diff --git a/lib/non_compressor.php b/lib/non_compressor.php index 757d98ea2..fc2e07cbb 100755 --- a/lib/non_compressor.php +++ b/lib/non_compressor.php @@ -56,25 +56,25 @@ class NonCompressor implements Compressor function __construct() {} /** - * Applies the Compressor compress filter to a string before it is inserted into a WebArchive. - * In this case, the filter does nothing. + * Applies the Compressor compress filter to a string before it is inserted + * into a WebArchive. In this case, the filter does nothing. * - * @param string $str string to apply filter to - * @return string the result of applying the filter + * @param string $str string to apply filter to + * @return string the result of applying the filter */ - public function compress($str) + function compress($str) { return $str; } /** - * Used to unapply the compress filter as when data is read out of a WebArchive. - * In this case, the unapplying filter does nothing. + * Used to unapply the compress filter as when data is read out of a + * WebArchive. In this case, the unapplying filter does nothing. * - * @param string $str data read from a string archive - * @return string result of uncompressing + * @param string $str data read from a string archive + * @return string result of uncompressing */ - public function uncompress($str) + function uncompress($str) { return $str; } diff --git a/lib/notifier.php b/lib/notifier.php index 8ca72932a..0e59a6143 100755 --- a/lib/notifier.php +++ b/lib/notifier.php @@ -34,18 +34,21 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * A Notifier is an object which will be notified by a priority queue - * when the index in the queue viewed as array of some data item has been changed. + * A Notifier is an object which will be notified by a priority queue + * when the index in the queue viewed as array of some data item has been + * changed. * - * A Notifier is notified when the index in the queue viewed as array of some data - * item has been changed, this gives the Notifier object the ability to update its value of - * the index for that data item. As an example, in the search engine, the WebQueueBundle class - * implements Notifier. Web queue bundles store url together with their weights - * and allow one to get out the url of highest weight. This is implemented by - * storing in a PriorityQueue keys consisting of hashes of urls (as fixed length) - * and values consisting of the weight. Then in a web archive the url and its index - * in the priority queue is stored. When the index in the queue changes, the WebQueueBundle's - * notify method is called to adjust the index that is stored in the web archive. + * A Notifier is notified when the index in the queue viewed as array of some + * data item has been changed, this gives the Notifier object the ability to + * update its value of the index for that data item. As an example, in the + * search engine, the WebQueueBundle class implements Notifier. Web queue + * bundles store url together with their weights and allow one to get out the + * url of highest weight. This is implemented by storing in a PriorityQueue + * keys consisting of hashes of urls (as fixed length) and values consisting of + * the weight. Then in a web archive the url and its index in the priority + * queue is stored. When the index in the queue changes, the WebQueueBundle's + * notify method is called to adjust the index that is stored in the web + * archive. * * @author Chris Pollett * @package seek_quarry @@ -56,8 +59,8 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} interface Notifier { /** - * Handles the update of the index of a data item in a queue with respect to the - * Notifier object. + * Handles the update of the index of a data item in a queue with respect + * to the Notifier object. * * @param int $index the index of a row in a heap-based priority queue * @param mixed $data the data that is stored at that index diff --git a/lib/persistent_structure.php b/lib/persistent_structure.php index aa364054d..3f6f7c3b6 100755 --- a/lib/persistent_structure.php +++ b/lib/persistent_structure.php @@ -34,9 +34,11 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * A PersistentStructure is a data structure which every so many operations will be saved to secondary storage (such as disk). - * An operation occurs whenever the PersistentStructure's checkSave method is called. A PersistentStructure - * also supports the ability to be load (read in from) secondary storage. + * A PersistentStructure is a data structure which every so many operations + * will be saved to secondary storage (such as disk). + * An operation occurs whenever the PersistentStructure's checkSave method is + * called. A PersistentStructure also supports the ability to be load + * (read in from) secondary storage. * * @author Chris Pollett * @@ -64,12 +66,15 @@ class PersistentStructure var $save_frequency; /** - * Sets up the file name and save frequency for the PersistentStructure, initializes the oepration count + * Sets up the file name and save frequency for the PersistentStructure, + * initializes the oepration count * - * @param string $fname the name of the file to store the PersistentStructure in - * @param int $save_frequency the number of operation before a save + * @param string $fname the name of the file to store the + * PersistentStructure in + * @param int $save_frequency the number of operation before a save */ - public function __construct($fname, $save_frequency = self::DEFAULT_SAVE_FREQUENCY) + public function __construct($fname, + $save_frequency = self::DEFAULT_SAVE_FREQUENCY) { $this->filename = $fname; $this->save_frequency = $save_frequency; @@ -96,8 +101,8 @@ class PersistentStructure } /** - * Add one to the unsaved_operations count. If this goes above the save_frquency - * then save the PersistentStructure to secondary storage + * Add one to the unsaved_operations count. If this goes above the + * save_frquency then save the PersistentStructure to secondary storage */ function checkSave() { diff --git a/lib/phrase_parser.php b/lib/phrase_parser.php index e24f3a4b7..3c4d9142a 100755 --- a/lib/phrase_parser.php +++ b/lib/phrase_parser.php @@ -55,10 +55,12 @@ class PhraseParser /** * */ - public static function extractWordStringPageSummary($page) + static function extractWordStringPageSummary($page) { - $title_phrase_string = mb_ereg_replace("[[:punct:]]", " ", $page[CrawlConstants::TITLE]); - $description_phrase_string = mb_ereg_replace("[[:punct:]]", " ", $page[CrawlConstants::DESCRIPTION]); + $title_phrase_string = mb_ereg_replace("[[:punct:]]", " ", + $page[CrawlConstants::TITLE]); + $description_phrase_string = mb_ereg_replace("[[:punct:]]", " ", + $page[CrawlConstants::DESCRIPTION]); $link_phrase_string = ""; $link_urls = array(); @@ -66,8 +68,10 @@ class PhraseParser $link_phrase_string .= " $link_text"; } - $link_phrase_string = mb_ereg_replace("[[:punct:]]", " ", $link_phrase_string); - $page_string = $title_phrase_string . " " . $description_phrase_string . " " . $link_phrase_string; + $link_phrase_string = mb_ereg_replace("[[:punct:]]", " ", + $link_phrase_string); + $page_string = $title_phrase_string . " " . $description_phrase_string . + " " . $link_phrase_string; $page_string = preg_replace("/(\s)+/", " ", $page_string); return $page_string; @@ -76,13 +80,15 @@ class PhraseParser /** * */ - public static function extractPhrasesAndCount($string, $len = MAX_PHRASE_LEN) + static function extractPhrasesAndCount($string, + $len = MAX_PHRASE_LEN) { $phrases = array(); for($i = 0; $i < $len; $i++) { - $phrases = array_merge($phrases, self::extractPhrasesOfLength($string, $i)); + $phrases = + array_merge($phrases,self::extractPhrasesOfLength($string, $i)); } $phrase_counts = array_count_values($phrases); @@ -93,12 +99,13 @@ class PhraseParser /** * */ - public static function extractPhrasesOfLength($string, $phrase_len) + static function extractPhrasesOfLength($string, $phrase_len) { $phrases = array(); for($i = 0; $i < $phrase_len; $i++) { - $phrases = array_merge($phrases, self::extractPhrasesOfLengthOffset($string, $phrase_len, $i)); + $phrases = array_merge($phrases, + self::extractPhrasesOfLengthOffset($string, $phrase_len, $i)); } return $phrases; @@ -107,7 +114,8 @@ class PhraseParser /** * */ - private static function extractPhrasesOfLengthOffset($string, $phrase_len, $offset) + static function extractPhrasesOfLengthOffset($string, + $phrase_len, $offset) { $words = mb_split("[[:space:]]", $string); diff --git a/lib/porter_stemmer.php b/lib/porter_stemmer.php index f611da538..8e953dd36 100755 --- a/lib/porter_stemmer.php +++ b/lib/porter_stemmer.php @@ -88,7 +88,7 @@ class PorterStemmer } /** m() measures the number of consonant sequences between k0 and j. if c is - * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary + * a consonant sequence and v a vowel sequence, and <.> indicates arbitrary * presence, * <c><v> gives 0 @@ -161,7 +161,8 @@ class PorterStemmer private static function cvc($i) { - if ($i < 2 || !self::cons($i) || self::cons($i - 1) || !self::cons($i - 2)) return false; + if ($i < 2 || !self::cons($i) || self::cons($i - 1) || + !self::cons($i - 2)) return false; $ch = self::$buffer[$i]; if ($ch == 'w' || $ch == 'x' || $ch == 'y') return false; @@ -176,7 +177,8 @@ class PorterStemmer $len = strlen($s); $loc = self::$k - $len + 1; - if($loc < 0 || substr_compare(self::$buffer, $s, $loc, $len) != 0) return false; + if($loc < 0 || + substr_compare(self::$buffer, $s, $loc, $len) != 0) return false; self::$j = self::$k - $len; @@ -236,7 +238,8 @@ class PorterStemmer } if (self::ends("eed")) { if (self::m() > 0) self::$k--; - } else if ((self::ends("ed") || self::ends("ing")) && self::vowelinstem()) { + } else if ((self::ends("ed") || self::ends("ing")) && + self::vowelinstem()) { self::$k = self::$j; if (self::ends("at")) { self::setto("ate"); @@ -265,7 +268,7 @@ class PorterStemmer /* step2() maps double suffices to single ones. so -ization ( = -ize plus - -ation) maps to -ize etc. note that the string before the suffix must give + -ation) maps to -ize etc.Note that the string before the suffix must give m() > 0. */ private static function step2() @@ -368,7 +371,9 @@ class PorterStemmer if (self::ends("ent")) break; return; case 'o': - if (self::ends("ion") && self::$j >= 0 && (self::$buffer[self::$j] == 's' || self::$buffer[self::$j] == 't')) break; + if (self::ends("ion") && self::$j >= 0 && + (self::$buffer[self::$j] == 's' || + self::$buffer[self::$j] == 't')) break; if (self::ends("ou")) break; return; /* takes care of -ous */ @@ -406,7 +411,8 @@ class PorterStemmer $a = self::m(); if ($a > 1 || $a == 1 && !self::cvc(self::$k - 1)) self::$k--; } - if (self::$buffer[self::$k] == 'l' && self::doublec(self::$k) && self::m() > 1) self::$k--; + if (self::$buffer[self::$k] == 'l' && + self::doublec(self::$k) && self::m() > 1) self::$k--; } diff --git a/lib/priority_queue.php b/lib/priority_queue.php index c48eadbb4..d6973d2ef 100755 --- a/lib/priority_queue.php +++ b/lib/priority_queue.php @@ -34,7 +34,7 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * Load in base classes and interfaces, get the crawlHash function, if necessary + * Load in base classes and interfaces,get the crawlHash function, if necessary */ require_once "string_array.php"; require_once "notifier.php"; @@ -65,7 +65,9 @@ class PriorityQueue extends StringArray implements CrawlConstants /** * */ - public function __construct($fname, $num_values, $value_size, $min_or_max, $notifier = NULL, $save_frequency = self::DEFAULT_SAVE_FREQUENCY) + public function __construct($fname, $num_values, $value_size, + $min_or_max, $notifier = NULL, + $save_frequency = self::DEFAULT_SAVE_FREQUENCY) { $this->num_values = $num_values; $this->value_size = $value_size; @@ -75,7 +77,8 @@ class PriorityQueue extends StringArray implements CrawlConstants $this->notifier = $notifier; - parent::__construct($fname, $num_values, $value_size + $this->weight_size, $save_frequency); + parent::__construct($fname, $num_values, + $value_size + $this->weight_size, $save_frequency); } @@ -193,7 +196,8 @@ class PriorityQueue extends StringArray implements CrawlConstants $total_weight = $this->totalWeight(); if($total_weight <= 0) { - crawlLog("Total queue weight was zero!!! Doing uniform renormalization!"); + crawlLog( + "Total queue weight was zero!! Doing uniform renormalization!"); } for($i = 1; $i <= $count; $i++) { @@ -254,7 +258,7 @@ class PriorityQueue extends StringArray implements CrawlConstants if($child < $count) { // this 'if' checks if there is a right child $right_child_row = $this->getRow($child + 1); - if($this->compare($left_child_row[1], $right_child_row[1]) < 0) { + if($this->compare($left_child_row[1], $right_child_row[1]) <0) { $child++; } } diff --git a/lib/processors/doc_processor.php b/lib/processors/doc_processor.php index 22c9076a3..018887458 100755 --- a/lib/processors/doc_processor.php +++ b/lib/processors/doc_processor.php @@ -81,8 +81,8 @@ class DocProcessor extends TextProcessor * A Word Doc conists of a FIB, Piece Table, and * DocumentStream. The last contains the text. * The piece table is supposed to be used to reconstruct - * the order of the text from the DocumentStream and the FIB, file information block, - * is supposed to tell us where the piece table is. + * the order of the text from the DocumentStream and the FIB, file + * information block,is supposed to tell us where the piece table is. * I am not using any of this for now. I am just brute * force looking for the text which I know has to be at a page (256 byte) * boundary. I then go until I no longer see ASCII. So the order @@ -124,7 +124,8 @@ class DocProcessor extends TextProcessor $is_text = true; for($i = 0; $i < 8; $i++) { $ascii = ord($doc[$pos]); - if (!((9 <= $ascii && $ascii <= 13) || (32 <= $ascii && $ascii <= 126)) ){ + if (!((9 <= $ascii && $ascii <= 13) || + (32 <= $ascii && $ascii <= 126)) ){ $is_text = false; break; } @@ -159,7 +160,8 @@ class DocProcessor extends TextProcessor for($i = 0; $i < 8; $i++) { if(isset($doc[$pos])) { $ascii = ord($doc[$pos]); - if((9<= $ascii && $ascii <= 13) ||(32<= $ascii && $ascii <= 126) ) { + if((9<= $ascii && $ascii <= 13) || + (32<= $ascii && $ascii <= 126) ) { $text .= chr($ascii); } } diff --git a/lib/processors/gif_processor.php b/lib/processors/gif_processor.php index 17031eee3..e8c0f417c 100755 --- a/lib/processors/gif_processor.php +++ b/lib/processors/gif_processor.php @@ -56,14 +56,19 @@ class GifProcessor extends ImageProcessor { if(is_string($page)) { file_put_contents(CRAWL_DIR."/cache/tmp.gif", $page); - $image = @imagecreatefromgif(CRAWL_DIR."/cache/tmp.gif"); + $image = @imagecreatefromgif(CRAWL_DIR."/cache/tmp.gif"); $thumb_string = self::createThumb($image); $summary[self::TITLE] = ""; - $summary[self::DESCRIPTION] = "Image of ".UrlParser::getDocumentFilename($url); + $summary[self::DESCRIPTION] = + "Image of ".UrlParser::getDocumentFilename($url); $summary[self::LINKS] = array(); - $summary[self::PAGE] = "<html><body><div><img src='data:image/gif;base64,".base64_encode($page) - . "' alt='".$summary[self::DESCRIPTION]."' /></div></body></html>"; - $summary[self::THUMB] = 'data:image/jpeg;base64,'.base64_encode($thumb_string); + $summary[self::PAGE] = + "<html><body><div><img src='data:image/gif;base64,". + base64_encode($page) . + "' alt='".$summary[self::DESCRIPTION]. + "' /></div></body></html>"; + $summary[self::THUMB] = 'data:image/jpeg;base64,'. + base64_encode($thumb_string); } return $summary; diff --git a/lib/processors/html_processor.php b/lib/processors/html_processor.php index fe17a3978..e24e3ad03 100755 --- a/lib/processors/html_processor.php +++ b/lib/processors/html_processor.php @@ -82,7 +82,8 @@ class HtmlProcessor extends TextProcessor /** - * Return a document object based on a string containing the contents of a web page + * Return a document object based on a string containing the contents of + * a web page * * @param string $page a web page * @@ -98,8 +99,8 @@ class HtmlProcessor extends TextProcessor } /** - * Check if there is a meta tag in the supplied document object that forbids robots - * from crawling the page corresponding to the dom object. + * Check if there is a meta tag in the supplied document object that + * forbids robots from crawling the page corresponding to the dom object. * * @param object $dom - a document object to check the meta tags for * @@ -112,7 +113,8 @@ class HtmlProcessor extends TextProcessor foreach($metas as $meta) { // don't crawl if either noindex or nofollow - if(mb_stristr($meta->getAttribute('content'),"NOINDEX") || mb_stristr($meta->getAttribute('content'), "NOFOLLOW")) + if(mb_stristr($meta->getAttribute('content'),"NOINDEX") || + mb_stristr($meta->getAttribute('content'), "NOFOLLOW")) { return false; } } @@ -123,7 +125,6 @@ class HtmlProcessor extends TextProcessor * Returns html head title of a webpage based on its document object * * @param object $dom a document object to extract a title from. - * * @return string a title of the page * */ @@ -144,11 +145,11 @@ class HtmlProcessor extends TextProcessor } /** - * Returns descriptive text concerning a webpage based on its document object - * - * @param object $dom a document object to extract a description from. + * Returns descriptive text concerning a webpage based on its document + * object * - * @return string a description of the page + * @param object $dom a document object to extract a description from. + * @return string a description of the page */ static function description($dom) { $sites = array(); @@ -166,7 +167,8 @@ class HtmlProcessor extends TextProcessor } //concatenate the contents of all the h1, h2 tags in the document - $headings = $xpath->evaluate("/html/body//h1|/html/body//h2|/html/body//h3|/html/body//p[1]"); + $headings = $xpath->evaluate( + "/html/body//h1|/html/body//h2|/html/body//h3|/html/body//p[1]"); foreach($headings as $h) { $description .= " ".$h->textContent; @@ -197,7 +199,8 @@ class HtmlProcessor extends TextProcessor foreach($hrefs as $href) { if($i < MAX_LINKS_PER_PAGE) { - $url = UrlParser::canonicalLink($href->getAttribute('href'), $site); + $url = UrlParser::canonicalLink( + $href->getAttribute('href'), $site); if(!UrlParser::checkRecursiveUrl($url)) { if(isset($sites[$url])) { $sites[$url] .=" ".strip_tags($href->textContent); @@ -214,7 +217,8 @@ class HtmlProcessor extends TextProcessor $frames = $xpath->evaluate("/html/frameset/frame|/html/body//iframe"); foreach($frames as $frame) { if($i < MAX_LINKS_PER_PAGE) { - $url = UrlParser::canonicalLink($frame->getAttribute('src'), $site); + $url = UrlParser::canonicalLink( + $frame->getAttribute('src'), $site); if(!UrlParser::checkRecursiveUrl($url)) { if(isset($sites[$url])) { @@ -238,7 +242,8 @@ class HtmlProcessor extends TextProcessor if(strlen($alt) < 1) { continue; } - $url = UrlParser::canonicalLink($img->getAttribute('src'), $site); + $url = UrlParser::canonicalLink( + $img->getAttribute('src'), $site); if(!UrlParser::checkRecursiveUrl($url)) { if(isset($sites[$url])) { $sites[$url] .=" ".$alt; diff --git a/lib/processors/image_processor.php b/lib/processors/image_processor.php index 6d65f7c40..052c876b1 100755 --- a/lib/processors/image_processor.php +++ b/lib/processors/image_processor.php @@ -34,13 +34,15 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * Used by subclasses, so have succinct access (i.e., can use self:: rather than CrawlConstants::) to constants like: - * CrawlConstants::TITLE, CrawlConstants::DESCRIPTION, etc. + * Used by subclasses, so have succinct access (i.e., can use self:: rather + * than CrawlConstants::) to constants like: + * CrawlConstants::TITLE, CrawlConstants::DESCRIPTION, etc. */ require_once BASE_DIR."/lib/crawl_constants.php"; /** - * Base abstract class common to all processors used to create crawl summary information from images + * Base abstract class common to all processors used to create crawl summary + * information from images * * @author Chris Pollett * @package seek_quarry @@ -50,12 +52,15 @@ abstract class ImageProcessor implements CrawlConstants { /** - * Extract summary data from the image provided in $page together the url in $url where it was downloaded from - * - * ImageProcessor class defers a proper implementation of this method to subclasses - * @param string $page the image represented as a character string - * @param string $url the url where the image was downloaded from - * @return array summary information including a thumbnail and a description (where the description is just the url) + * Extract summary data from the image provided in $page together the url + * in $url where it was downloaded from + * + * ImageProcessor class defers a proper implementation of this method to + * subclasses + * @param string $page the image represented as a character string + * @param string $url the url where the image was downloaded from + * @return array summary information including a thumbnail and a + * description (where the description is just the url) */ static function process($page, $url) { return NULL;} @@ -72,7 +77,8 @@ abstract class ImageProcessor implements CrawlConstants $size_x = imagesx($image); $size_y = imagesy($image); - @imagecopyresampled($thumb, $image, 0,0, 0,0, 50, 50, $size_x, $size_y); + @imagecopyresampled($thumb, + $image, 0,0, 0,0, 50, 50, $size_x, $size_y); imagedestroy($image); } imagejpeg( $thumb, CRAWL_DIR."/cache/thumb.jpg", 100 ); diff --git a/lib/processors/jpg_processor.php b/lib/processors/jpg_processor.php index 7e838719d..443f28f6e 100755 --- a/lib/processors/jpg_processor.php +++ b/lib/processors/jpg_processor.php @@ -59,11 +59,15 @@ class JpgProcessor extends ImageProcessor $image = @imagecreatefromjpeg(CRAWL_DIR."/cache/tmp.jpg"); $thumb_string = self::createThumb($image); $summary[self::TITLE] = ""; - $summary[self::DESCRIPTION] = "Image of ".UrlParser::getDocumentFilename($url); + $summary[self::DESCRIPTION] = "Image of ". + UrlParser::getDocumentFilename($url); $summary[self::LINKS] = array(); - $summary[self::PAGE] = "<html><body><div><img src='data:image/jpeg;base64," - . base64_encode($page)."' alt='".$summary[self::DESCRIPTION]."' /></div></body></html>"; - $summary[self::THUMB] = 'data:image/jpeg;base64,'.base64_encode($thumb_string); + $summary[self::PAGE] = + "<html><body><div><img src='data:image/jpeg;base64," . + base64_encode($page)."' alt='".$summary[self::DESCRIPTION]. + "' /></div></body></html>"; + $summary[self::THUMB] = 'data:image/jpeg;base64,'. + base64_encode($thumb_string); } return $summary; } diff --git a/lib/processors/pdf_processor.php b/lib/processors/pdf_processor.php index f67c487f8..2c5e84112 100755 --- a/lib/processors/pdf_processor.php +++ b/lib/processors/pdf_processor.php @@ -88,11 +88,15 @@ class PdfProcessor extends TextProcessor $i = 0; while($cur_pos < $len) { - list($cur_pos, $object_string) = self::getNextObject($pdf_string, $cur_pos); + list($cur_pos, $object_string) = + self::getNextObject($pdf_string, $cur_pos); $object_dictionary = self::getObjectDictionary($object_string); - if(!self::objectDictionaryHas($object_dictionary, array("Image", "Catalog"))) { - $stream_data = rtrim(ltrim(self::getObjectStream($object_string))); - if(self::objectDictionaryHas($object_dictionary, array("FlateDecode"))) { + if(!self::objectDictionaryHas( + $object_dictionary, array("Image", "Catalog"))) { + $stream_data = + rtrim(ltrim(self::getObjectStream($object_string))); + if(self::objectDictionaryHas( + $object_dictionary, array("FlateDecode"))) { $stream_data = @gzuncompress($stream_data); if(strpos($stream_data, "PS-AdobeFont")){ $out .= $stream_data; @@ -150,7 +154,8 @@ class PdfProcessor extends TextProcessor */ static function getObjectDictionary($object_string) { - list( , $object_dictionary) =self::getBetweenTags($object_string, 0, '<<', '>>'); + list( , $object_dictionary) = + self::getBetweenTags($object_string, 0, '<<', '>>'); return $object_dictionary; } @@ -159,7 +164,8 @@ class PdfProcessor extends TextProcessor */ static function getObjectStream($object_string) { - list( , $stream_data) = self::getBetweenTags($object_string, 0, 'stream', 'endstream'); + list( , $stream_data) = + self::getBetweenTags($object_string, 0, 'stream', 'endstream'); return $stream_data; } @@ -173,10 +179,12 @@ class PdfProcessor extends TextProcessor //replace ASCII codes in decimal with their value $data = preg_replace_callback('/\\\(\d{3})/', - create_function( '$matches', 'return chr(intval($matches[1]));'), $data); + create_function( '$matches', 'return chr(intval($matches[1]));'), + $data); //replace ASCII codes in hex with their value $data = preg_replace_callback('/\<([0-9A-F]{2})\>/', - create_function( '$matches', 'return chr(hexdec($matches[1]));'), $data); + create_function( '$matches', 'return chr(hexdec($matches[1]));'), + $data); $len = strlen($data); $out = ""; @@ -226,8 +234,10 @@ class PdfProcessor extends TextProcessor } if(isset($data[$cur_pos]) && isset($data[$cur_pos + 1]) && - ord($data[$cur_pos]) == ord('T') && ord($data[$cur_pos + 1]) == ord('J') ) { - if(isset($data[$cur_pos + 3]) && ord($data[$cur_pos + 3]) != ord('F')) { + ord($data[$cur_pos]) == ord('T') && + ord($data[$cur_pos + 1]) == ord('J') ) { + if(isset($data[$cur_pos + 3]) && + ord($data[$cur_pos + 3]) != ord('F')) { $out .= " "; } else { $out .= "\n"; @@ -257,7 +267,8 @@ class PdfProcessor extends TextProcessor } else { if($escape_flag || $cur_char !=")"){ $ascii = ord($cur_char); - if((9 <= $ascii && $ascii <= 13) || (32 <= $ascii && $ascii <= 126)) { + if((9 <= $ascii && $ascii <= 13) || + (32 <= $ascii && $ascii <= 126)) { $out .= $cur_char; } } diff --git a/lib/processors/png_processor.php b/lib/processors/png_processor.php index f4ec33311..562cd9758 100755 --- a/lib/processors/png_processor.php +++ b/lib/processors/png_processor.php @@ -59,11 +59,16 @@ class PngProcessor extends ImageProcessor $image = @imagecreatefrompng(CRAWL_DIR."/cache/tmp.png"); $thumb_string = self::createThumb($image); $summary[self::TITLE] = ""; - $summary[self::DESCRIPTION] = "Image of ".UrlParser::getDocumentFilename($url); + $summary[self::DESCRIPTION] = "Image of ". + UrlParser::getDocumentFilename($url); $summary[self::LINKS] = array(); - $summary[self::PAGE] = "<html><body><div><img src='data:image/png;base64,".base64_encode($page) - . "' alt='".$summary[self::DESCRIPTION]."' /></div></body></html>"; - $summary[self::THUMB] = 'data:image/jpeg;base64,'.base64_encode($thumb_string); + $summary[self::PAGE] = + "<html><body><div><img src='data:image/png;base64,". + base64_encode($page). + "' alt='".$summary[self::DESCRIPTION]. + "' /></div></body></html>"; + $summary[self::THUMB] = 'data:image/jpeg;base64,' . + base64_encode($thumb_string); } return $summary; diff --git a/lib/processors/ppt_processor.php b/lib/processors/ppt_processor.php index 28479ee87..e49d05ada 100755 --- a/lib/processors/ppt_processor.php +++ b/lib/processors/ppt_processor.php @@ -121,10 +121,12 @@ class PptProcessor extends TextProcessor break; case self::SCAN_TEXT_SEG: - if(strpos($out_text, "lick to edit Master title style") > 0) { + if(strpos($out_text, + "lick to edit Master title style") > 0) { $state = self::ALWAYS_IGNORE; } else if($scan_text_pos < $text_len) { - if(($ascii >= 32 && $ascii <= 126) || $ascii == 10) { + if(($ascii >= 32 && $ascii <= 126) || + $ascii == 10) { $out_text .= chr($ascii); $scan_text_pos++; } diff --git a/lib/processors/rtf_processor.php b/lib/processors/rtf_processor.php index 6a770f274..16e504292 100755 --- a/lib/processors/rtf_processor.php +++ b/lib/processors/rtf_processor.php @@ -99,7 +99,8 @@ class RtfProcessor extends TextProcessor $i = 0; while($cur_pos < $len) { - list($cur_pos, $object_string) = self::getNextObject($rtf_string, $cur_pos); + list($cur_pos, $object_string) = + self::getNextObject($rtf_string, $cur_pos); if(strpos($object_string, "{")) { $out .= self::getText($object_string); } else { @@ -108,7 +109,9 @@ class RtfProcessor extends TextProcessor } else if(preg_match('/\\\(par)/', $object_string) > 0) { $text = preg_replace('/\\\(\w)+/', "", $object_string); $out .= $text."\n"; - } else if(preg_match('/(\\\(title)|\\\(author)|\\\(operator)|\\\(company))/', $object_string) > 0) { + } else if(preg_match( + '/(\\\(title)|\\\(author)|\\\(operator)|\\\(company))/', + $object_string) > 0) { $text = preg_replace('/\\\(\w)+/', "", $object_string); $out .= $text."\n\n"; } diff --git a/lib/processors/text_processor.php b/lib/processors/text_processor.php index 0f65f4a50..8773576d3 100755 --- a/lib/processors/text_processor.php +++ b/lib/processors/text_processor.php @@ -53,7 +53,7 @@ class TextProcessor implements CrawlConstants /** * */ - public static function process($page, $url) + static function process($page, $url) { if(is_string($page)) { $summary[self::TITLE] = ""; @@ -67,21 +67,24 @@ class TextProcessor implements CrawlConstants /** * */ - public static function getBetweenTags($string, $cur_pos, $start_tag, $end_tag) + static function getBetweenTags($string, $cur_pos, $start_tag, $end_tag) { $len = strlen($string); - if(($between_start = strpos($string, $start_tag, $cur_pos)) === false ) { + if(($between_start = strpos($string, $start_tag, $cur_pos)) === + false ) { return array($len, ""); } $between_start += strlen($start_tag); - if(($between_end = strpos($string, $end_tag, $between_start)) === false ) { + if(($between_end = strpos($string, $end_tag, $between_start)) === + false ) { $between_end = $len; } $cur_pos = $between_end + strlen($end_tag); - $between_string = substr($string, $between_start, $between_end - $between_start); + $between_string = substr($string, $between_start, + $between_end - $between_start); return array($cur_pos, $between_string); } diff --git a/lib/string_array.php b/lib/string_array.php index 45a5933e1..4355753bf 100755 --- a/lib/string_array.php +++ b/lib/string_array.php @@ -61,7 +61,8 @@ class StringArray extends PersistentStructure /** * */ - public function __construct($fname, $num_values, $data_size, $save_frequency = self::DEFAULT_SAVE_FREQUENCY) + public function __construct($fname, $num_values, $data_size, + $save_frequency = self::DEFAULT_SAVE_FREQUENCY) { $this->filename = $fname; $this->num_values = $num_values; diff --git a/lib/unit_test.php b/lib/unit_test.php index ec0adc8c8..8dcb2dea2 100644 --- a/lib/unit_test.php +++ b/lib/unit_test.php @@ -67,7 +67,8 @@ abstract class UnitTest $this->setUp(); $len = strlen($method); - if(substr_compare($method, self::case_name, $len - self::case_name_len) == 0) { + if(substr_compare( + $method, self::case_name, $len - self::case_name_len) == 0) { $this->test_case_results = array(); $this->$method(); $test_results[$method] = $this->test_case_results; diff --git a/lib/url_parser.php b/lib/url_parser.php index f0e2017d8..be5623dfd 100755 --- a/lib/url_parser.php +++ b/lib/url_parser.php @@ -47,16 +47,18 @@ class UrlParser { /** - * Checks if the url scheme is either http or https. + * Checks if the url scheme is either http or https. * - * @param string $url the url to check - * @return bool returns true if it is either http or https and false otherwise + * @param string $url the url to check + * @return bool returns true if it is either http or https and false + * otherwise */ - public static function isSchemeHttpOrHttps($url) + static function isSchemeHttpOrHttps($url) { $url_parts = @parse_url($url); - if(isset($url_parts['scheme']) && $url_parts['scheme'] != "http" && $url_parts['scheme'] != "https") { + if(isset($url_parts['scheme']) && $url_parts['scheme'] != "http" && + $url_parts['scheme'] != "https") { return false; } @@ -65,12 +67,12 @@ class UrlParser } /** - * Checks if the url has a host part. + * Checks if the url has a host part. * - * @param string $url the url to check - * @return bool true if it does; false otherwise + * @param string $url the url to check + * @return bool true if it does; false otherwise */ - public static function hasHostUrl($url) + static function hasHostUrl($url) { $url_parts = @parse_url($url); @@ -78,12 +80,12 @@ class UrlParser } /** - * Get the host name portion of a url if present; if not return false + * Get the host name portion of a url if present; if not return false * - * @param string $url the url to parse - * @return the host portion of the url if present; false otherwise + * @param string $url the url to parse + * @return the host portion of the url if present; false otherwise */ - public static function getHost($url) + static function getHost($url) { $url_parts = @parse_url($url); @@ -123,12 +125,14 @@ class UrlParser } /** - * Gets an array of prefix urls from a given url. Each prefix contains at least the the hostname of the the start url + * Gets an array of prefix urls from a given url. Each prefix contains at + * least the the hostname of the the start url * - * http://host.com/b/c/ would yield http://host.com/ , http://host.com/b, http://host.com/b/, http://host.com/b/c, http://host.com/b/c/ + * http://host.com/b/c/ would yield http://host.com/ , http://host.com/b, + * http://host.com/b/, http://host.com/b/c, http://host.com/b/c/ * - * @param string $url the url to extract prefixes from - * @return array the array of url prefixes + * @param string $url the url to extract prefixes from + * @return array the array of url prefixes */ public static function getHostPaths($url) { @@ -159,14 +163,14 @@ class UrlParser } /** - * Given a url tries to make a guess at the file type of the file it points to + * Given a url, makes a guess at the file type of the file it points to * - * @param string $url a url to figure out the file type for + * @param string $url a url to figure out the file type for * - * @return string the guessed file type. + * @return string the guessed file type. * */ - public static function getDocumentType($url) + static function getDocumentType($url) { $url_parts = @parse_url($url); @@ -186,12 +190,13 @@ class UrlParser } /** - * Gets the filename portion of a url if present; otherwise returns "Some File" + * Gets the filename portion of a url if present; + * otherwise returns "Some File" * - * @param string $url a url to parse - * @return string the filename portion of this url + * @param string $url a url to parse + * @return string the filename portion of this url */ - public static function getDocumentFilename($url) + static function getDocumentFilename($url) { $url_parts = @parse_url($url); @@ -211,12 +216,12 @@ class UrlParser } /** - * Get the query string component of a url + * Get the query string component of a url * - * @param string $url a url to get the query string out of - * @return string the query string if present; NULL otherwise + * @param string $url a url to get the query string out of + * @return string the query string if present; NULL otherwise */ - public static function getQuery($url) + static function getQuery($url) { $url_parts = @parse_url($url); if(isset($url_parts['query'])) { @@ -271,7 +276,8 @@ class UrlParser } else { $pre_path = ""; } - if(isset($site_path_parts['basename']) && !isset($site_path_parts['extension'])) { + if(isset($site_path_parts['basename']) && + !isset($site_path_parts['extension'])) { $pre_path .="/".$site_path_parts['basename']; } @@ -318,18 +324,23 @@ class UrlParser } /** - * Checks if a url has a repeated set of subdirectories, and if the number of repeats occurs more than some threshold number of times + * Checks if a url has a repeated set of subdirectories, and if the number + * of repeats occurs more than some threshold number of times * - * A pattern like bob/.../bob counts as own reptition. bob/.../alice/.../bob/.../alice would count as two (... should be read as ellipsis, - * not a directory name).If the threshold is three and there are at least three repeated mathes this function return true; it returns - * false otherwise. + * A pattern like bob/.../bob counts as own reptition. + * bob/.../alice/.../bob/.../alice would count as two (... should be read + * as ellipsis, not a directory name).If the threshold is three and there + * are at least three repeated mathes this function return true; it returns + * false otherwise. * - * @param string $url the url to check - * @param int $repeat_threshold the number of repeats of a subdir name to trigger a true response - * @return bool whether a repeated subdirectory name with more matches than the threshold was found + * @param string $url the url to check + * @param int $repeat_threshold the number of repeats of a subdir name to + * trigger a true response + * @return bool whether a repeated subdirectory name with more matches than + * the threshold was found * */ - public static function checkRecursiveUrl($url, $repeat_threshold = 3) + static function checkRecursiveUrl($url, $repeat_threshold = 3) { $url_parts = mb_split("/", $url); diff --git a/lib/utility.php b/lib/utility.php index 56a4ea3c9..851f0a2f1 100755 --- a/lib/utility.php +++ b/lib/utility.php @@ -40,7 +40,8 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} * * @param string $msg message to log * @param string $lname name of log file in the LOG_DIR directory, rotated logs - * will also use this as their basename followed by a number followed by bz2 (since they are bzipped). + * will also use this as their basename followed by a number followed by + * bz2 (since they are bzipped). */ function crawlLog($msg, $lname = NULL) @@ -71,7 +72,8 @@ function crawlLog($msg, $lname = NULL) rename("$logfile.".($i-1).".bz2", "$logfile.$i.bz2"); } } - file_put_contents("$logfile.0.bz2", bzcompress(file_get_contents($logfile))); + file_put_contents("$logfile.0.bz2", + bzcompress(file_get_contents($logfile))); unlink($logfile); } error_log($out_msg."\n", 3, $logfile); @@ -106,7 +108,8 @@ function crawlHash($string, $raw=false) if(!$raw) { $hash = rtrim(base64_encode($combine), "="); $hash = str_replace("/", "_", $hash); - $hash = str_replace("+", "-" , $hash); // common variant of base64 safe for urls and paths + $hash = str_replace("+", "-" , $hash); + // common variant of base64 safe for urls and paths } else { $hash = $combine; } @@ -115,13 +118,15 @@ function crawlHash($string, $raw=false) } /** - * The search engine project's variation on the Unix crypt function using the crawlHash function instead of DES + * The search engine project's variation on the Unix crypt function using the + * crawlHash function instead of DES * - * The crawlHash function is used to encrypt passwords stored in the database + * The crawlHash function is used to encrypt passwords stored in the database * - * @param string $string the string to encrypt - * @param int $salt salt value to be used (needed to verify if a password is valid) - * @return string the crypted string where crypting is done using crawlHash + * @param string $string the string to encrypt + * @param int $salt salt value to be used (needed to verify if a password is + * valid) + * @return string the crypted string where crypting is done using crawlHash */ function crawlCrypt($string, $salt = NULL) { @@ -138,13 +143,14 @@ function crawlCrypt($string, $salt = NULL) /** - * Measures the change in time in seconds between two timestamps to microsecond precision - * - * @param string $start starting time with microseconds - * @param string $end ending time with microseconds - * @return float time difference in seconds - * @see SigninModel::changePassword() - * @see SigninModel::checkValidSignin() + * Measures the change in time in seconds between two timestamps to microsecond + * precision + * + * @param string $start starting time with microseconds + * @param string $end ending time with microseconds + * @return float time difference in seconds + * @see SigninModel::changePassword() + * @see SigninModel::checkValidSignin() */ function changeInMicrotime( $start, $end=NULL ) { @@ -155,7 +161,8 @@ function changeInMicrotime( $start, $end=NULL ) list($end_microseconds, $end_seconds) = explode(" ", $end); $change_in_seconds = intval($end_seconds) - intval($start_seconds); - $change_in_microseconds = floatval($end_microseconds) - floatval($start_microseconds); + $change_in_microseconds = + floatval($end_microseconds) - floatval($start_microseconds); return floatval( $change_in_seconds ) + $change_in_microseconds; } @@ -163,10 +170,11 @@ function changeInMicrotime( $start, $end=NULL ) // callbacks for Model::traverseDirectory /** - * This is a callback function used in the process of recursively deleting a directory + * This is a callback function used in the process of recursively deleting a + * directory * - * @param string $file_or_dir the filename or directory name to be deleted - * @see DatasourceManager::unlinkRecursive() + * @param string $file_or_dir the filename or directory name to be deleted + * @see DatasourceManager::unlinkRecursive() */ function deleteFileOrDir($file_or_dir) { @@ -196,19 +204,21 @@ function setWorldPermissions($file) */ function scoreOrderCallback($word_doc_a, $word_doc_b) { - return ((float)$word_doc_a[CrawlConstants::SCORE] > (float)$word_doc_b[CrawlConstants::SCORE]) ? -1 : 1; + return ((float)$word_doc_a[CrawlConstants::SCORE] > + (float)$word_doc_b[CrawlConstants::SCORE]) ? -1 : 1; } /** - * Callback to check if $a is less than $b + * Callback to check if $a is less than $b * - * Used to help sort document results returned in PhraseModel called in IndexArchiveBundle + * Used to help sort document results returned in PhraseModel called + * in IndexArchiveBundle * - * @param float $a first value to compare - * @param float $b second value to compare - * @return int -1 if $a is less than $b; 1 otherwise - * @see IndexArchiveBundle::getSelectiveWords() - * @see PhraseModel::getPhrasePageResults() + * @param float $a first value to compare + * @param float $b second value to compare + * @return int -1 if $a is less than $b; 1 otherwise + * @see IndexArchiveBundle::getSelectiveWords() + * @see PhraseModel::getPhrasePageResults() */ function lessThan($a, $b) { if ($a == $b) { @@ -220,13 +230,14 @@ function lessThan($a, $b) { /** * Callback to check if $a is greater than $b * - * Used to help sort document results returned in PhraseModel called in IndexArchiveBundle + * Used to help sort document results returned in PhraseModel called in + * IndexArchiveBundle * - * @param float $a first value to compare - * @param float $b second value to compare - * @return int -1 if $a is greater than $b; 1 otherwise - * @see IndexArchiveBundle::getSelectiveWords() - * @see PhraseModel::getTopPhrases() + * @param float $a first value to compare + * @param float $b second value to compare + * @return int -1 if $a is greater than $b; 1 otherwise + * @see IndexArchiveBundle::getSelectiveWords() + * @see PhraseModel::getTopPhrases() */ function greaterThan($a, $b) { if ($a == $b) { diff --git a/lib/web_archive.php b/lib/web_archive.php index 52f7538e4..bf4dabf78 100755 --- a/lib/web_archive.php +++ b/lib/web_archive.php @@ -91,7 +91,8 @@ class WebArchive $info_block = unserialize($info_string); $this->count = $info_block["count"]; if(isset($info_block["data"])) { - return unserialize($this->compressor->uncompress($info_block["data"])); + return unserialize( + $this->compressor->uncompress($info_block["data"])); } else { return NULL; } @@ -141,7 +142,8 @@ class WebArchive /** * */ - public function addObjects($offset_field, &$objects, $data = NULL, $callback = NULL, $return_flag = true) + public function addObjects($offset_field, &$objects, + $data = NULL, $callback = NULL, $return_flag = true) { $fh = fopen($this->filename, "r+"); @@ -235,7 +237,8 @@ class WebArchive $offset += $line_length + $len; $objects[] = array($offset, $object); } else { - crawlLog("Web archive saw blank line when looked for offset $offset"); + crawlLog("Web archive saw blank line ". + "when looked for offset $offset"); } } @@ -252,10 +255,11 @@ class WebArchive } /** - * Returns $num many objects from the web archive starting at the current iterator position, leaving the iterator position unchanged + * Returns $num many objects from the web archive starting at the current + * iterator position, leaving the iterator position unchanged * - * @param int $num number of objects to return - * @return array an array of objects from the web archive + * @param int $num number of objects to return + * @return array an array of objects from the web archive */ public function currentObjects($num) { @@ -263,11 +267,12 @@ class WebArchive } /** - * Returns $num many objects from the web archive starting at the current iterator position. - * The iterator is advance to the object after the last one returned + * Returns $num many objects from the web archive starting at the + * current iterator position. The iterator is advance to the object + * after the last one returned * - * @param int $num number of objects to return - * @return array an array of objects from the web archive + * @param int $num number of objects to return + * @return array an array of objects from the web archive */ public function nextObjects($num) { @@ -275,7 +280,8 @@ class WebArchive } /** - * Resets the iterator for this web archive to the first object in the archive + * Resets the iterator for this web archive to the first object + * in the archive */ public function reset() { diff --git a/lib/web_archive_bundle.php b/lib/web_archive_bundle.php index e3a1d2d8c..aefbc891d 100755 --- a/lib/web_archive_bundle.php +++ b/lib/web_archive_bundle.php @@ -45,11 +45,11 @@ require_once 'gzip_compressor.php'; /** * - * A web archive bundle is a collection of web archives which are managed together - * It is useful to split data across several archive files rather than just store - * it in one, for both read efficiency and to keep filesizes from getting too big. - * In some places we are using 4 byte int's to store file offset which restricts the - * size of the files we can use for wbe archives. + * A web archive bundle is a collection of web archives which are managed + * together.It is useful to split data across several archive files rather than + * just store it in one, for both read efficiency and to keep filesizes from + * getting too big. In some places we are using 4 byte int's to store file + * offset which restricts the size of the files we can use for wbe archives. * * @author Chris Pollett * @@ -71,7 +71,9 @@ class WebArchiveBundle /** * */ - public function __construct($dir_name, $filter_size = -1, $num_partitions = NULL, $description = NULL, $compressor = "GzipCompressor") + public function __construct($dir_name, $filter_size = -1, + $num_partitions = NULL, $description = NULL, + $compressor = "GzipCompressor") { //filter size = -1 used by web server to not get all partitions created @@ -91,7 +93,8 @@ class WebArchiveBundle //store/read archive description $info = NULL; if(file_exists($dir_name."/description.txt")) { - $info = unserialize(file_get_contents($this->dir_name."/description.txt")); + $info = unserialize( + file_get_contents($this->dir_name."/description.txt")); } $this->num_partitions = $num_partitions; @@ -119,12 +122,18 @@ class WebArchiveBundle $info['NUM_PARTITIONS'] = $this->num_partitions; $info['COUNT'] = $this->count; if(!$read_only_archive) { - file_put_contents($this->dir_name."/description.txt", serialize($info)); + file_put_contents( + $this->dir_name."/description.txt", serialize($info)); } - //filter bundle to check if a downloaded page should be put in archive (for de-duplication) + /* + filter bundle to check if a downloaded page should be put in archive + (for de-duplication) + */ if($this->filter_size > 0) { - $this->page_exists_filter_bundle = new BloomFilterBundle($dir_name."/PageExistsFilterBundle", $filter_size); + $this->page_exists_filter_bundle = + new BloomFilterBundle($dir_name."/PageExistsFilterBundle", + $filter_size); } } @@ -139,7 +148,8 @@ class WebArchiveBundle } $num_pages = count($pages); - for($i = 0; $i < $num_pages; $i++) { //we are doing this to preserve the order of the returned array + for($i = 0; $i < $num_pages; $i++) { + //we are doing this to preserve the order of the returned array $pages[$i]['TMP_INDEX'] = $i; } @@ -147,7 +157,8 @@ class WebArchiveBundle if(isset($page[$key_field])) { $this->count++; - $index = WebArchiveBundle::selectPartition($page[$key_field], $this->num_partitions); + $index = WebArchiveBundle::selectPartition( + $page[$key_field], $this->num_partitions); $partition_queue[$index][] = $page; } @@ -155,11 +166,14 @@ class WebArchiveBundle $pages_with_offsets = array(); for($i = 0; $i < $this->num_partitions; $i++) { - $pages_with_offsets = array_merge($pages_with_offsets, $this->addObjectsPartition($offset_field, $i, $partition_queue[$i])); + $pages_with_offsets = array_merge($pages_with_offsets, + $this->addObjectsPartition( + $offset_field, $i, $partition_queue[$i])); } foreach($pages_with_offsets as $off_page) { - $pages[$off_page['TMP_INDEX']][$offset_field] = $off_page[$offset_field]; + $pages[$off_page['TMP_INDEX']][$offset_field] = + $off_page[$offset_field]; unset($pages[$off_page['TMP_INDEX']]['TMP_INDEX'] ); } return $pages; @@ -170,7 +184,8 @@ class WebArchiveBundle */ public function getPage($key, $offset) { - $partition = WebArchiveBundle::selectPartition($key, $this->num_partitions); + $partition = + WebArchiveBundle::selectPartition($key, $this->num_partitions); return $this->getPageByPartition($partition, $offset); } @@ -180,7 +195,9 @@ class WebArchiveBundle */ public function getPageByPartition($partition, $offset, $file_handle = NULL) { - $page_array = $this->getPartition($partition)->getObjects($offset, 1, true, $file_handle); + $page_array = + $this->getPartition($partition)->getObjects( + $offset, 1, true, $file_handle); if(isset($page_array[0][1])) { return $page_array[0][1]; @@ -205,12 +222,14 @@ class WebArchiveBundle /** * */ - public function addObjectsPartition($offset_field, $partition, &$objects, $data = NULL, $callback = NULL, $return_flag = true) + public function addObjectsPartition($offset_field, $partition, + &$objects, $data = NULL, $callback = NULL, $return_flag = true) { $num_objects = count($objects); $this->addCount($num_objects); - return $this->getPartition($partition)->addObjects($offset_field, $objects, $data, $callback, $return_flag); + return $this->getPartition($partition)->addObjects( + $offset_field, $objects, $data, $callback, $return_flag); } /** @@ -253,7 +272,8 @@ class WebArchiveBundle */ public function differencePagesFilter(&$page_array, $field_name = NULL) { - $this->page_exists_filter_bundle->differenceFilter($page_array, $field_name); + $this->page_exists_filter_bundle->differenceFilter( + $page_array, $field_name); } /** @@ -271,12 +291,15 @@ class WebArchiveBundle */ public function getPartition($index, $fast_construct = true) { - if(!isset($this->partition[$index])) { //this might not have been open yet + if(!isset($this->partition[$index])) { + //this might not have been open yet $create_flag = false; if(!file_exists($this->dir_name."/web_archive_".$index)) { $create_flag = true; } - $this->partition[$index] = new WebArchive($this->dir_name."/web_archive_".$index, new $this->compressor(), $fast_construct); + $this->partition[$index] = + new WebArchive($this->dir_name."/web_archive_".$index, + new $this->compressor(), $fast_construct); if($create_flag) { chmod($this->dir_name."/web_archive_".$index, 0777); } @@ -290,7 +313,8 @@ class WebArchiveBundle */ function addCount($num) { - $info = unserialize(file_get_contents($this->dir_name."/description.txt")); + $info = + unserialize(file_get_contents($this->dir_name."/description.txt")); $info['COUNT'] += $num; file_put_contents($this->dir_name."/description.txt", serialize($info)); } @@ -302,7 +326,8 @@ class WebArchiveBundle { if(!is_dir($dir_name) || !file_exists($dir_name."/description.txt")) { $info = array(); - $info['DESCRIPTION'] = "Archive does not exist OR Archive description file not found"; + $info['DESCRIPTION'] = + "Archive does not exist OR Archive description file not found"; $info['COUNT'] = 0; $info['NUM_PARTITIONS'] = 0; return $info; diff --git a/lib/web_queue_bundle.php b/lib/web_queue_bundle.php index 63c0b00a4..dbdec0bc0 100755 --- a/lib/web_queue_bundle.php +++ b/lib/web_queue_bundle.php @@ -75,7 +75,8 @@ class WebQueueBundle implements Notifier /** * */ - public function __construct($dir_name, $filter_size, $num_urls_ram, $min_or_max) + public function __construct($dir_name, + $filter_size, $num_urls_ram, $min_or_max) { $this->dir_name = $dir_name; $this->filter_size = $filter_size; @@ -87,28 +88,38 @@ class WebQueueBundle implements Notifier } /* - if we are resuming a crawl we discard the old priority queue and associated hash table and archive - new queue data will be read in from any existing schedule + if we are resuming a crawl we discard the old priority queue and + associated hash table and archive new queue data will be read in + from any existing schedule */ // set up the priority queue... stores (hash(url), weight) pairs. - $this->to_crawl_queue = new PriorityQueue($dir_name."/queue.dat", $num_urls_ram, 8, $min_or_max, $this); + $this->to_crawl_queue = new PriorityQueue($dir_name."/queue.dat", + $num_urls_ram, 8, $min_or_max, $this); - // set up the hash table... stores (hash(url), offset into url archive, index in priority queue) triples. + /* set up the hash table... stores (hash(url), offset into url archive, i + ndex in priority queue) triples. + */ - /*to ensure we can always insert into table, because of how deletions work we will periodically want to - rebuild our table we will also want to give a little more than the usual twice the number we want to - insert slack + /*to ensure we can always insert into table, because of how deletions + work we will periodically want to + rebuild our table we will also want to give a little more than the + usual twice the number we want to insert slack */ - $this->to_crawl_table = $this->constructHashTable($dir_name."/hash_table.dat", 4*$num_urls_ram); + $this->to_crawl_table = $this->constructHashTable( + $dir_name."/hash_table.dat", 4*$num_urls_ram); - // set up url archive, used to store the full text of the urls which are on the priority queue + /* set up url archive, used to store the full text of the urls which + are on the priority queue + */ if(file_exists($dir_name."/url_archive")) { unlink($dir_name."/url_archive"); } - $this->to_crawl_archive = new WebArchive($dir_name."/url_archive", new NonCompressor()); + $this->to_crawl_archive = new WebArchive( + $dir_name."/url_archive", new NonCompressor()); //filter bundle to check if we have already visited a URL - $this->url_exists_filter_bundle = new BloomFilterBundle($dir_name."/UrlExistsFilterBundle", $filter_size); + $this->url_exists_filter_bundle = new BloomFilterBundle( + $dir_name."/UrlExistsFilterBundle", $filter_size); //timestamp for robot filters (so can delete if get too old) if(!file_exists($dir_name."/robot_timestamp.txt")) { @@ -117,26 +128,33 @@ class WebQueueBundle implements Notifier //filter to check if we have already have a copy of a robot.txt file if(file_exists($dir_name."/got_robottxt.ftr")) { - $this->got_robottxt_filter = BloomFilterFile::load($dir_name."/got_robottxt.ftr"); + $this->got_robottxt_filter = BloomFilterFile::load( + $dir_name."/got_robottxt.ftr"); } else { - $this->got_robottxt_filter = new BloomFilterFile($dir_name."/got_robottxt.ftr", $filter_size); + $this->got_robottxt_filter = new BloomFilterFile( + $dir_name."/got_robottxt.ftr", $filter_size); } //filter with disallowed robots.txt paths if(file_exists($dir_name."/dissallowed_robot.ftr")) { - $this->dissallowed_robot_filter = BloomFilterFile::load($dir_name."/dissallowed_robot.ftr"); + $this->dissallowed_robot_filter = + BloomFilterFile::load($dir_name."/dissallowed_robot.ftr"); } else { - $this->dissallowed_robot_filter = new BloomFilterFile($dir_name."/dissallowed_robot.ftr", $filter_size); + $this->dissallowed_robot_filter = + new BloomFilterFile( + $dir_name."/dissallowed_robot.ftr", $filter_size); } //filter to check for and determine crawl delay if(file_exists($dir_name."/crawl_delay.ftr")) { - $this->crawl_delay_filter = BloomFilterFile::load($dir_name."/crawl_delay.ftr"); + $this->crawl_delay_filter = + BloomFilterFile::load($dir_name."/crawl_delay.ftr"); } else { - $this->crawl_delay_filter = new BloomFilterFile($dir_name."/crawl_delay.ftr", $filter_size); + $this->crawl_delay_filter = + new BloomFilterFile($dir_name."/crawl_delay.ftr", $filter_size); } } @@ -163,8 +181,12 @@ class WebQueueBundle implements Notifier $data = pack('N', $offset).pack("N", 0); if($this->insertHashTable(crawlHash($url, true), $data)) { - //we will change 0 to priority queue index in the notify callback - $loc = $this->to_crawl_queue->insert(crawlHash($url, true), $weight); + /* + we will change 0 to priority queue index in the + notify callback + */ + $loc = $this->to_crawl_queue->insert( + crawlHash($url, true), $weight); } else { echo "Error inserting $url into hash table !!"; } @@ -321,7 +343,8 @@ class WebQueueBundle implements Notifier */ public function differenceSeenUrls(&$url_array, $field_name = NULL) { - $this->url_exists_filter_bundle->differenceFilter($url_array, $field_name); + $this->url_exists_filter_bundle->differenceFilter( + $url_array, $field_name); } /** @@ -362,7 +385,8 @@ class WebQueueBundle implements Notifier public function getRobotTxtAge() { - $creation_time = intval(file_get_contents($this->dir_name."/robot_timestamp.txt")); + $creation_time = intval( + file_get_contents($this->dir_name."/robot_timestamp.txt")); return (time() - $creation_time); } @@ -372,7 +396,8 @@ class WebQueueBundle implements Notifier */ public function setCrawlDelay($host, $value) { - $this->crawl_delay_filter->add("-1".$host); //used to say a crawl delay has been set + $this->crawl_delay_filter->add("-1".$host); + //used to say a crawl delay has been set for($i = 0; $i < 8; $i++) { if(($value & 1) == 1) { @@ -450,7 +475,8 @@ class WebQueueBundle implements Notifier { crawlLog("Rebuilding Hash table"); $num_values = $this->to_crawl_table->num_values; - $tmp_table = $this->constructHashTable($this->dir_name."/tmp_table.dat", $num_values); + $tmp_table = $this->constructHashTable( + $this->dir_name."/tmp_table.dat", $num_values); $null = $this->to_crawl_table->null; $deleted = $this->to_crawl_table->deleted; @@ -467,7 +493,8 @@ class WebQueueBundle implements Notifier if(file_exists($this->dir_name."/hash_table.dat")) { unlink($this->dir_name."/hash_table.dat"); if(file_exists($this->dir_name."/tmp_table.dat")) { - rename($this->dir_name."/tmp_table.dat", $this->dir_name."/hash_table.dat"); + rename($this->dir_name."/tmp_table.dat", + $this->dir_name."/hash_table.dat"); } } $tmp_table->filename = $this->dir_name."/hash_table.dat"; @@ -475,8 +502,8 @@ class WebQueueBundle implements Notifier } /** - * Since offsets are integers, even if the queue is kept relatively small, periodically - * we will need to rebuild the archive for storing urls. + * Since offsets are integers, even if the queue is kept relatively small, + * periodically we will need to rebuild the archive for storing urls. */ public function rebuildUrlTable() { @@ -484,7 +511,8 @@ class WebQueueBundle implements Notifier $dir_name = $this->dir_name; $count = $this->to_crawl_queue->count; - $tmp_archive = new WebArchive($dir_name."/tmp_archive", new NonCompressor()); + $tmp_archive = + new WebArchive($dir_name."/tmp_archive", new NonCompressor()); for($i = 1; $i <= $count; $i++) { @@ -533,9 +561,15 @@ class WebQueueBundle implements Notifier $this->crawl_delay_filter = NULL; gc_collect_cycles(); - $this->got_robottxt_filter = new BloomFilterFile($this->dir_name."/got_robottxt.ftr", $this->filter_size); - $this->dissallowed_robot_filter = new BloomFilterFile($this->dir_name."/dissallowed_robot.ftr", $this->filter_size); - $this->crawl_delay_filter = new BloomFilterFile($this->dir_name."/crawl_delay.ftr", $this->filter_size); + $this->got_robottxt_filter = + new BloomFilterFile( + $this->dir_name."/got_robottxt.ftr", $this->filter_size); + $this->dissallowed_robot_filter = + new BloomFilterFile( + $this->dir_name."/dissallowed_robot.ftr", $this->filter_size); + $this->crawl_delay_filter = + new BloomFilterFile( + $this->dir_name."/crawl_delay.ftr", $this->filter_size); } /** @@ -551,7 +585,8 @@ class WebQueueBundle implements Notifier $this->insertHashTable($hash_url, $data); } else { - echo "NOTIFY LOOKUP FAILED. INDEX WAS $index. DATA WAS ".bin2hex($data[0])."\n"; + echo "NOTIFY LOOKUP FAILED. INDEX WAS $index. DATA WAS ". + bin2hex($data[0])."\n"; } } diff --git a/locale/en-US/configure.ini b/locale/en-US/configure.ini index 0e0b5da99..fb6114752 100755 --- a/locale/en-US/configure.ini +++ b/locale/en-US/configure.ini @@ -28,250 +28,252 @@ ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//controllers ; -; admin_controller.php line: 100 +; admin_controller.php line: 106 admin_controller_login_successful = "Login Successful!!" ; -; admin_controller.php line: 104 +; admin_controller.php line: 111 admin_controller_login_failed = "Username or Password Incorrect!" ; -; admin_controller.php line: 234 +; admin_controller.php line: 255 admin_controller_passwords_dont_match = "Typed passwords do not match." ; -; admin_controller.php line: 241 +; admin_controller.php line: 265 admin_controller_invalid_old_password = "Current password incorrect." ; -; admin_controller.php line: 245 +; admin_controller.php line: 272 admin_controller_change_password = "Password change successful!!" ; -; admin_controller.php line: 267 +; admin_controller.php line: 297 admin_controller_select_username = "Select Username" ; -; admin_controller.php line: 302 +; admin_controller.php line: 334 admin_controller_select_rolename = "Select Role" ; -; admin_controller.php line: 326 +; admin_controller.php line: 360 admin_controller_passwords_dont_match = "Typed passwords do not match." ; -; admin_controller.php line: 331 +; admin_controller.php line: 367 admin_controller_username_exists = "Cannot Create User As Username Exists" ; -; admin_controller.php line: 336 +; admin_controller.php line: 374 admin_controller_username_added = "User Created" ; -; admin_controller.php line: 344 +; admin_controller.php line: 383 admin_controller_username_doesnt_exists = "Username Does Not Exist" ; -; admin_controller.php line: 349 +; admin_controller.php line: 390 admin_controller_username_deleted = "User Deleted" ; -; admin_controller.php line: 355 +; admin_controller.php line: 397 admin_controller_username_doesnt_exists = "Username Does Not Exist" ; -; admin_controller.php line: 359 +; admin_controller.php line: 403 admin_controller_rolename_doesnt_exists = "Role Name Does not Exist" ; -; admin_controller.php line: 363 +; admin_controller.php line: 409 admin_controller_rolename_added = "Role Name Added" ; -; admin_controller.php line: 371 +; admin_controller.php line: 420 admin_controller_username_doesnt_exists = "Username Does Not Exist" ; -; admin_controller.php line: 375 +; admin_controller.php line: 426 admin_controller_rolename_doesnt_exists = "Role Name Does not Exist" ; -; admin_controller.php line: 382 +; admin_controller.php line: 436 admin_controller_rolename_deleted = "Role Name Deleted" ; -; admin_controller.php line: 402 +; admin_controller.php line: 459 admin_controller_select_rolename = "Select Role" ; -; admin_controller.php line: 433 +; admin_controller.php line: 494 admin_controller_select_activityname = "Select Activity" ; -; admin_controller.php line: 463 +; admin_controller.php line: 527 admin_controller_rolename_exists = "Role Name Exists" ; -; admin_controller.php line: 471 +; admin_controller.php line: 537 admin_controller_rolename_added = "Role Name Added" ; -; admin_controller.php line: 480 +; admin_controller.php line: 548 admin_controller_rolename_doesnt_exists = "Role Name Does not Exist" ; -; admin_controller.php line: 486 +; admin_controller.php line: 556 admin_controller_rolename_deleted = "Role Name Deleted" ; -; admin_controller.php line: 491 +; admin_controller.php line: 562 admin_controller_rolename_doesnt_exists = "Role Name Does not Exist" ; -; admin_controller.php line: 495 +; admin_controller.php line: 568 admin_controller_activityname_doesnt_exists = "Activity Name Does not Exist" ; -; admin_controller.php line: 501 +; admin_controller.php line: 578 admin_controller_activity_added = "Activity Added" ; -; admin_controller.php line: 506 +; admin_controller.php line: 584 admin_controller_rolename_doesnt_exists = "Role Name Does not Exist" ; -; admin_controller.php line: 511 +; admin_controller.php line: 591 admin_controller_activityname_doesnt_exists = "Activity Name Does not Exist" ; -; admin_controller.php line: 518 +; admin_controller.php line: 603 admin_controller_activity_deleted = "Activity Deleted" ; -; admin_controller.php line: 540 +; admin_controller.php line: 628 admin_controller_starting_new_crawl = "Starting New Crawl!" ; -; admin_controller.php line: 555 +; admin_controller.php line: 648 admin_controller_no_description = "No Description for Crawl" ; -; admin_controller.php line: 575 +; admin_controller.php line: 673 admin_controller_stop_crawl = "Stopping crawl. . .This will take a moment to refresh." ; -; admin_controller.php line: 585 +; admin_controller.php line: 686 admin_controller_resume_crawl = "Resuming crawl. . .This will take a moment to refresh." ; -; admin_controller.php line: 603 +; admin_controller.php line: 717 admin_controller_delete_crawl_success = "Deleting Crawl. . .This will take a moment to refresh." ; -; admin_controller.php line: 605 +; admin_controller.php line: 722 admin_controller_delete_crawl_fail = "Delete Crawl Failed!!" ; -; admin_controller.php line: 610 +; admin_controller.php line: 729 admin_controller_set_index = "Setting Crawl To Use as Index" ; -; admin_controller.php line: 621 +; admin_controller.php line: 742 admin_controller_breadth_first = "Breadth First" ; -; admin_controller.php line: 622 +; admin_controller.php line: 744 admin_controller_page_importance = "Page Importance" ; -; admin_controller.php line: 647 +; admin_controller.php line: 783 admin_controller_update_seed_info = "Updating Seed Site Info!" ; -; admin_controller.php line: 698 +; admin_controller.php line: 834 admin_controller_select_localename = "Select Locale" ; -; admin_controller.php line: 735 +; admin_controller.php line: 877 admin_controller_locale_added = "Locale Added!" ; -; admin_controller.php line: 741 +; admin_controller.php line: 884 admin_controller_localename_doesnt_exists = "Locale Does Not Exist!" ; -; admin_controller.php line: 748 +; admin_controller.php line: 893 admin_controller_localename_deleted = "Locale Deleted" ; -; admin_controller.php line: 764 +; admin_controller.php line: 913 admin_controller_localestrings_updated = "Locale Strings Updated!" ; -; admin_controller.php line: 824 +; admin_controller.php line: 983 admin_controller_configure_work_dir_set = "Work Directory Set! You may need to re-login!" ; -; admin_controller.php line: 832 +; admin_controller.php line: 995 admin_controller_name_your_bot = "Please Name Your robot" ; -; admin_controller.php line: 837 +; admin_controller.php line: 1004 admin_controller_configure_work_profile_made = "Working Directory and Profile Created!" ; -; admin_controller.php line: 841 +; admin_controller.php line: 1010 admin_controller_configure_no_set_config = "Unable to Update config.php File!" ; -; admin_controller.php line: 848 +; admin_controller.php line: 1021 admin_controller_configure_no_create_profile = "Unable to Create Profile!" ; -; admin_controller.php line: 854 +; admin_controller.php line: 1030 admin_controller_configure_work_dir_invalid = "Work Directory is Invalid! Cannot Create Profile!" ; -; admin_controller.php line: 861 +; admin_controller.php line: 1041 admin_controller_configure_work_dir_invalid = "Work Directory is Invalid! Cannot Create Profile!" ; -; admin_controller.php line: 899 +; admin_controller.php line: 1101 admin_controller_configure_no_change_db = "Problem Updating Database!" ; -; admin_controller.php line: 910 +; admin_controller.php line: 1115 admin_controller_configure_profile_change = "Profile Updated!" ; -; admin_controller.php line: 918 +; admin_controller.php line: 1129 admin_controller_configure_no_change_profile = "There was a Problem Updating Profile!" ; -; admin_controller.php line: 946 +; admin_controller.php line: 1163 admin_controller_describe_robot = "Please Describe Your Robot" ; -; search_controller.php line: 108 +; search_controller.php line: 114 search_controller_logout_successful = "Logout Successful!!" ; -; search_controller.php line: 310 +; search_controller.php line: 334 search_controller_cached_version = "This cached version of %s was obtained by the Yioop crawler on %s." ; -; settings_controller.php line: 109 +; settings_controller.php line: 114 settings_controller_settings_saved = "Settings Saved!" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views ; -; admin_view.php line: 74 +; admin_view.php line: 76 admin_view_admin = "Admin" ; -; admin_view.php line: 91 +; admin_view.php line: 96 adminview_auto_logout_one_minute = "Auto-logout in One Minute!!" ; -; crawlstatus_view.php line: 59 +; crawlstatus_view.php line: 60 crawlstatus_view_currently_processing = "Currently Processing" ; -; crawlstatus_view.php line: 60 +; crawlstatus_view.php line: 61 crawlstatus_view_description = "Description:" ; -; crawlstatus_view.php line: 64 +; crawlstatus_view.php line: 68 managecrawl_element_stop_crawl = "Stop Crawl" ; -; crawlstatus_view.php line: 67 +; crawlstatus_view.php line: 71 crawlstatus_view_no_description = "No active crawl" ; -; crawlstatus_view.php line: 70 +; crawlstatus_view.php line: 74 crawlstatus_view_time_started = "Time started:" ; -; crawlstatus_view.php line: 72 +; crawlstatus_view.php line: 77 crawlstatus_view_no_crawl_time = "No start time found" ; -; crawlstatus_view.php line: 74 +; crawlstatus_view.php line: 79 crawlstatus_view_total_urls = "Total Urls Seen:" ; -; crawlstatus_view.php line: 75 +; crawlstatus_view.php line: 82 crawlstatus_view_most_recent_fetcher = "Most Recent Fetcher:" ; -; crawlstatus_view.php line: 81 +; crawlstatus_view.php line: 88 crawlstatus_view_no_fetcher = "No Fetcher Queries Yet" ; -; crawlstatus_view.php line: 84 +; crawlstatus_view.php line: 91 crawlstatus_view_most_recent_urls = "Most Recent Urls" ; -; crawlstatus_view.php line: 91 +; crawlstatus_view.php line: 99 crawlstatus_view_no_recent_urls = "No Recent Urls" ; -; crawlstatus_view.php line: 95 +; crawlstatus_view.php line: 103 crawlstatus_view_previous_crawls = "Previous Crawls" ; -; crawlstatus_view.php line: 101 +; crawlstatus_view.php line: 109 crawlstatus_view_description = "Description:" +; +; crawlstatus_view.php line: 110 crawlstatus_view_time_started = "Time started:" ; -; crawlstatus_view.php line: 102 +; crawlstatus_view.php line: 111 crawlstatus_view_total_urls = "Total Urls Seen:" ; -; crawlstatus_view.php line: 103 +; crawlstatus_view.php line: 112 crawlstatus_view_actions = "Actions:" ; -; crawlstatus_view.php line: 109 +; crawlstatus_view.php line: 121 crawlstatus_view_resume = "Resume" ; -; crawlstatus_view.php line: 113 +; crawlstatus_view.php line: 127 crawlstatus_view_set_index = "Set as Index" ; -; crawlstatus_view.php line: 116 +; crawlstatus_view.php line: 130 crawlstatus_view_search_index = "Search Index" ; -; crawlstatus_view.php line: 121 +; crawlstatus_view.php line: 137 crawlstatus_view_delete = "Delete" ; -; crawlstatus_view.php line: 128 +; crawlstatus_view.php line: 144 crawlstatus_view_no_previous_crawl = "No Previous Crawls" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/elements @@ -342,28 +344,28 @@ configure_element_robot_description = "Robot Description:" ; configure_element.php line: 148 configure_element_submit = "Submit" ; -; crawloptions_element.php line: 60 +; crawloptions_element.php line: 62 crawloptions_element_back_to_manage = "Back" ; -; crawloptions_element.php line: 62 +; crawloptions_element.php line: 64 crawloptions_element_edit_crawl_options = "Edit Crawl Options" ; -; crawloptions_element.php line: 70 +; crawloptions_element.php line: 74 crawloptions_element_crawl_order = "Crawl Order:" ; -; crawloptions_element.php line: 73 +; crawloptions_element.php line: 79 crawloptions_element_restrict_by_url = "Restrict Sites By Url:" ; -; crawloptions_element.php line: 75 +; crawloptions_element.php line: 86 crawloptions_element_allowed_to_crawl = "Allowed To Crawl Sites" ; -; crawloptions_element.php line: 79 +; crawloptions_element.php line: 91 crawloptions_element_disallowed_to_crawl = "Disallowed Sites" ; -; crawloptions_element.php line: 83 +; crawloptions_element.php line: 96 crawloptions_element_seed_sites = "Seed Sites" ; -; crawloptions_element.php line: 87 +; crawloptions_element.php line: 101 crawloptions_element_save_options = "Save Options" ; ; editlocales_element.php line: 60 @@ -441,175 +443,177 @@ managelocales_element_localetag = "Locale Tag" managelocales_element_writingmode = "Writing Mode" managelocales_element_percenttranslated = "Percent Translated" ; -; manageroles_element.php line: 55 +; manageroles_element.php line: 57 manageroles_element_add_role = "Add Role" ; -; manageroles_element.php line: 63 +; manageroles_element.php line: 67 manageroles_element_rolename = "Role Name: " ; -; manageroles_element.php line: 64 +; manageroles_element.php line: 71 manageroles_element_submit = "Submit" ; -; manageroles_element.php line: 69 +; manageroles_element.php line: 76 manageroles_element_delete_role = "Delete Role" ; -; manageroles_element.php line: 77 +; manageroles_element.php line: 86 manageusers_element_delete_rolename = "Role Name:" ; -; manageroles_element.php line: 78 +; manageroles_element.php line: 90 manageroles_element_submit = "Submit" ; -; manageroles_element.php line: 82 +; manageroles_element.php line: 94 manageroles_element_view_role_activities = "View Role Activities" ; -; manageroles_element.php line: 89 +; manageroles_element.php line: 103 manageusers_element_select_role = "Role Name: " ; -; manageroles_element.php line: 104 +; manageroles_element.php line: 123 manageusers_element_add_activity = "Add Activity:" ; -; manageroles_element.php line: 106 +; manageroles_element.php line: 128 manageroles_element_submit = "Submit" ; ; manageusers_element.php line: 57 manageusers_element_add_user = "Add User" ; -; manageusers_element.php line: 65 +; manageusers_element.php line: 67 manageusers_element_username = "Username: " ; -; manageusers_element.php line: 67 +; manageusers_element.php line: 71 manageusers_element_password = "Password: " ; -; manageusers_element.php line: 69 +; manageusers_element.php line: 75 manageusers_element_retype_password = "Retype Password: " ; -; manageusers_element.php line: 71 +; manageusers_element.php line: 80 manageusers_element_submit = "Submit" ; -; manageusers_element.php line: 76 +; manageusers_element.php line: 86 manageusers_element_delete_user = "Delete User" ; -; manageusers_element.php line: 84 +; manageusers_element.php line: 96 manageusers_element_delete_username = "Username: " ; -; manageusers_element.php line: 85 +; manageusers_element.php line: 100 manageusers_element_submit = "Submit" ; -; manageusers_element.php line: 90 +; manageusers_element.php line: 105 manageusers_element_view_user_roles = "View User Roles" ; -; manageusers_element.php line: 97 +; manageusers_element.php line: 114 manageusers_element_select_user = "Username: " ; -; manageusers_element.php line: 112 +; manageusers_element.php line: 135 manageusers_element_add_role = "Add Role: " ; -; manageusers_element.php line: 114 +; manageusers_element.php line: 140 manageusers_element_submit = "Submit" ; -; signin_element.php line: 56 +; signin_element.php line: 57 signin_element_settings = "Settings" ; -; signin_element.php line: 59 +; signin_element.php line: 61 signin_element_signin = "Sign In" ; -; signin_element.php line: 63 +; signin_element.php line: 67 signin_element_admin = "Admin" ; -; signin_element.php line: 64 +; signin_element.php line: 69 signin_element_signout = "Sign Out" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/helpers ; -; pagination_helper.php line: 86 +; pagination_helper.php line: 93 pagination_helper_previous = "Prev" ; -; pagination_helper.php line: 99 +; pagination_helper.php line: 109 pagination_helper_next = "Next" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/layouts ; -; web_layout.php line: 68 +; web_layout.php line: 70 web_layout_description = "Help find what you are searching for" ; -; web_layout.php line: 80 +; web_layout.php line: 83 web_layout_query_statistics = "Query Statistics" ; -; web_layout.php line: 81 +; web_layout.php line: 84 web_layout_total_elapsed_time = "Total Elapsed Time for Queries: %s seconds." ; -; web_layout.php line: 84 +; web_layout.php line: 89 web_layout_query_time = "Time: %s seconds." ; ; nocache_view.php line: 65 nocache_view_no_cache = "Did not have cache of requested item." ; -; search_view.php line: 83 -search_view_developed_seek_quarry = "Developed at SeekQuarry" -; -; search_view.php line: 88 +; search_view.php line: 85 search_view_input_label = "Enter the terms you would like to search the web for" ; -; search_view.php line: 89 +; search_view.php line: 88 search_view_input_placeholder = "Type what you are looking for" ; ; search_view.php line: 90 search_view_search = "Search" ; -; search_view.php line: 100 +; search_view.php line: 99 +search_view_developed_seek_quarry = "Developed at SeekQuarry" +; +; search_view.php line: 103 search_view_query_results = "Query Results:" +; +; search_view.php line: 104 search_view_calculated = "Calculated in %s seconds." ; -; search_view.php line: 101 +; search_view.php line: 105 search_view_results = "Showing results %s - %s of %s" ; -; search_view.php line: 117 +; search_view.php line: 126 search_view_rank = "Rank: %s " ; -; search_view.php line: 118 +; search_view.php line: 128 search_view_relevancy = "Rel: %s " ; -; search_view.php line: 119 +; search_view.php line: 131 search_view_score = "Score %s" ; -; search_view.php line: 124 +; search_view.php line: 140 search_view_cache = "Cached" ; -; search_view.php line: 127 +; search_view.php line: 143 search_view_as_text = "View as text" ; -; search_view.php line: 129 +; search_view.php line: 148 search_view_similar = "Similar" ; -; settings_view.php line: 70 +; settings_view.php line: 74 settings_view_settings = "Settings" ; -; settings_view.php line: 75 +; settings_view.php line: 80 settings_view_results_per_page = "Results per Page:" ; -; settings_view.php line: 78 +; settings_view.php line: 85 settings_view_language_label = "Language:" ; -; settings_view.php line: 80 +; settings_view.php line: 91 settings_view_return_yioop = "Return to Yioop!" ; -; settings_view.php line: 81 +; settings_view.php line: 94 settings_view_save = "Save Settings" ; -; settings_view.php line: 88 +; settings_view.php line: 102 setting_install_search_plugin = "Install Yioop! Open Search Plugin" ; -; signin_view.php line: 61 +; signin_view.php line: 62 signin_view_signin = "Signin" ; -; signin_view.php line: 66 +; signin_view.php line: 68 signin_view_username = "Username" ; -; signin_view.php line: 70 +; signin_view.php line: 74 signin_view_password = "Password" ; -; signin_view.php line: 76 +; signin_view.php line: 83 signin_view_login = "Login" ; -; signin_view.php line: 82 +; signin_view.php line: 90 signin_view_return_yioop = "Return to Yioop" \ No newline at end of file diff --git a/locale/en-US/statistics.txt b/locale/en-US/statistics.txt index b6bef56f0..5a165df53 100755 --- a/locale/en-US/statistics.txt +++ b/locale/en-US/statistics.txt @@ -1 +1 @@ -d:99; \ No newline at end of file +d:100; \ No newline at end of file diff --git a/locale/extract_merge.php b/locale/extract_merge.php index e5829682b..44bffb0ac 100755 --- a/locale/extract_merge.php +++ b/locale/extract_merge.php @@ -44,7 +44,8 @@ if(isset($_SERVER['DOCUMENT_ROOT']) && strlen($_SERVER['DOCUMENT_ROOT']) > 0) { exit(); } -define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, +define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD']. + $_SERVER["SCRIPT_NAME"], 0, -strlen("locale/extract_merge.php"))); /** Loads config info */ @@ -76,14 +77,16 @@ $general_ini = parse_ini_file(LOCALE_DIR."/general.ini", true); updateLocales($general_ini, $strings); /** - * Cycles through locale subdirectories in LOCALE_DIR, for each - * locale it merges out the current gneral_ini and strings data. - * It deletes identifiers that are not in strings, it adds new identifiers - * and it leaves existing identifier translation pairs untouched. + * Cycles through locale subdirectories in LOCALE_DIR, for each + * locale it merges out the current gneral_ini and strings data. + * It deletes identifiers that are not in strings, it adds new identifiers + * and it leaves existing identifier translation pairs untouched. * - * @param array $general_ini data that would typically come from the general.ini file - * @param array $string lines from what is equivalent to an ini file of msg_id msg_string pairs - * these lines also have comments on the file that strings were extracted from + * @param array $general_ini data that would typically come from the + * general.ini file + * @param array $string lines from what is equivalent to an ini file of + * msg_id msg_string pairs these lines also have comments on the file + * that strings were extracted from * */ function updateLocales($general_ini, $strings) @@ -104,18 +107,21 @@ function updateLocales($general_ini, $strings) } /** - * Updates the configure.ini file for a particular locale. + * Updates the configure.ini file for a particular locale. * - * The configure.ini has general information (at this point not really being used) about - * all locales together with specific msg_id (identifiers to be translated) and msg_string (translation) - * data. updateLocale takes line data coming from the general.ini file, strings extracted from - * documents that might need to be translation, as well as the old configure.ini file (this might have existing translations), - * and combines these to produce a new configure.ini file + * The configure.ini has general information (at this point not really being + * used) about all locales together with specific msg_id (identifiers to be + * translated) and msg_string (translation) data. updateLocale takes line data + * coming from the general.ini file, strings extracted from documents that + * might need to be translation, as well as the old configure.ini file (this + * might have existing translations), and combines these to produce a new + * configure.ini file * - * @param array $general_ini data from the general.ini file - * @param array $strings line array data extracted from files in directories that have strings in need of translation - * @param string $dir the directory of all the locales - * @param string $locale the particular locale in $dir to update + * @param array $general_ini data from the general.ini file + * @param array $strings line array data extracted from files in directories + * that have strings in need of translation + * @param string $dir the directory of all the locales + * @param string $locale the particular locale in $dir to update */ function updateLocale($general_ini, $strings, $dir, $locale) { @@ -154,14 +160,16 @@ EOT; $n[] = "[$general_name]"; foreach($general_value as $name => $value) { if(isset($old_configure[$general_name][$name])) { - $n[] = $name.' = "'.addslashes($old_configure[$general_name][$name]).'"'; + $n[] = $name.' = "'. + addslashes($old_configure[$general_name][$name]).'"'; } else { $n[] = $name.' = "'.$value.'"'; } } } else { if(isset($old_configure[$general_name])) { - $n[] = $general_name.' = "'.addslashes($old_configure[$general_name]).'"'; + $n[] = $general_name.' = "'. + addslashes($old_configure[$general_name]).'"'; } else { $n[] = $name.' = "'.$value.'"'; } @@ -175,7 +183,8 @@ EOT; $n[] = $string; } else { if(isset($old_configure['strings'][$string])) { - $n[] = $string.' = "'.addslashes($old_configure['strings'][$string]).'"'; + $n[] = $string.' = "'. + addslashes($old_configure['strings'][$string]).'"'; } else { $n[] = $string.' = ""'; } @@ -188,16 +197,20 @@ EOT; /** - * Searches the directories provided looking for files matching the extensions provided. When such - * a file is found it is loaded and scanned for tl() function calls. The identifier string in this - * function call is then extracted and added to a line array of strings to be translated. This line - * array is formatted so that each line looks like a line that might occur in an PHP ini file. - * To understand this format one can look at the parse_ini_string function in the PHP manual or - * look at the configure.ini files in the locale directory + * Searches the directories provided looking for files matching the extensions + * provided. When such a file is found it is loaded and scanned for tl() + * function calls. The identifier string in this function call is then + * extracted and added to a line array of strings to be translated. This line + * array is formatted so that each line looks like a line that might occur in + * an PHP ini file. To understand this format one can look at the + * parse_ini_string function in the PHP manual or look at the configure.ini + * files in the locale directory * - * @param array $extract_dirs directories to start looking for files with strings to be translated - * @param array $extensions file extensions of files which might contain such strings - * @return array of lines for any ini file of msg_id msg_string pairs + * @param array $extract_dirs directories to start looking for files with + * strings to be translated + * @param array $extensions file extensions of files which might contain such + * strings + * @return array of lines for any ini file of msg_id msg_string pairs */ function getTranslateStrings($extract_dirs, $extensions) { @@ -225,9 +238,11 @@ function getTranslateStrings($extract_dirs, $extensions) * the strings array. In addition, ini style comments are added givne the * line file and line number of the item to be translated * - * @param string $dir current directory to start looking for files with strings to be translated - * @param array $extensions file extensions of files which might contain such strings - * @return array of lines for any ini file of msg_id msg_string pairs + * @param string $dir current directory to start looking for files with + * strings to be translated + * @param array $extensions file extensions of files which might contain + * such strings + * @return array of lines for any ini file of msg_id msg_string pairs */ function traverseExtractRecursive($dir, $extensions) { @@ -254,12 +269,15 @@ function traverseExtractRecursive($dir, $extensions) if(is_file($cur_path)) { $path_parts = pathinfo($cur_path); - $extension = (isset($path_parts['extension'])) ? $path_parts['extension'] : ""; + $extension = (isset($path_parts['extension'])) ? + $path_parts['extension'] : ""; if(in_array($extension, $extensions)) { $lines = file($cur_path); $num_lines = count($lines); for($i = 0; $i < $num_lines; $i++) { - $num_matches = preg_match_all('/tl\([\'|\"]?([[:word:]]+?)[\'|\"]?[(\))|(\s+\,)]/', $lines[$i], $to_translates); + $num_matches = preg_match_all( + '/tl\([\'|\"]?([[:word:]]+?)[\'|\"]?[(\))|(\s+\,)]/', + $lines[$i], $to_translates); if($num_matches > 0) { $strings[] = ";"; $strings[] = "; $obj line: $i"; diff --git a/locale/fr-FR/configure.ini b/locale/fr-FR/configure.ini index 14b1a61cf..53125d072 100755 --- a/locale/fr-FR/configure.ini +++ b/locale/fr-FR/configure.ini @@ -28,250 +28,252 @@ ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//controllers ; -; admin_controller.php line: 100 +; admin_controller.php line: 106 admin_controller_login_successful = "" ; -; admin_controller.php line: 104 +; admin_controller.php line: 111 admin_controller_login_failed = "" ; -; admin_controller.php line: 234 +; admin_controller.php line: 255 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 241 +; admin_controller.php line: 265 admin_controller_invalid_old_password = "" ; -; admin_controller.php line: 245 +; admin_controller.php line: 272 admin_controller_change_password = "" ; -; admin_controller.php line: 267 +; admin_controller.php line: 297 admin_controller_select_username = "" ; -; admin_controller.php line: 302 +; admin_controller.php line: 334 admin_controller_select_rolename = "" ; -; admin_controller.php line: 326 +; admin_controller.php line: 360 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 331 +; admin_controller.php line: 367 admin_controller_username_exists = "" ; -; admin_controller.php line: 336 +; admin_controller.php line: 374 admin_controller_username_added = "" ; -; admin_controller.php line: 344 +; admin_controller.php line: 383 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 349 +; admin_controller.php line: 390 admin_controller_username_deleted = "" ; -; admin_controller.php line: 355 +; admin_controller.php line: 397 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 359 +; admin_controller.php line: 403 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 363 +; admin_controller.php line: 409 admin_controller_rolename_added = "" ; -; admin_controller.php line: 371 +; admin_controller.php line: 420 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 375 +; admin_controller.php line: 426 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 382 +; admin_controller.php line: 436 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 402 +; admin_controller.php line: 459 admin_controller_select_rolename = "" ; -; admin_controller.php line: 433 +; admin_controller.php line: 494 admin_controller_select_activityname = "" ; -; admin_controller.php line: 463 +; admin_controller.php line: 527 admin_controller_rolename_exists = "" ; -; admin_controller.php line: 471 +; admin_controller.php line: 537 admin_controller_rolename_added = "" ; -; admin_controller.php line: 480 +; admin_controller.php line: 548 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 486 +; admin_controller.php line: 556 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 491 +; admin_controller.php line: 562 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 495 +; admin_controller.php line: 568 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 501 +; admin_controller.php line: 578 admin_controller_activity_added = "" ; -; admin_controller.php line: 506 +; admin_controller.php line: 584 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 511 +; admin_controller.php line: 591 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 518 +; admin_controller.php line: 603 admin_controller_activity_deleted = "" ; -; admin_controller.php line: 540 +; admin_controller.php line: 628 admin_controller_starting_new_crawl = "" ; -; admin_controller.php line: 555 +; admin_controller.php line: 648 admin_controller_no_description = "" ; -; admin_controller.php line: 575 +; admin_controller.php line: 673 admin_controller_stop_crawl = "" ; -; admin_controller.php line: 585 +; admin_controller.php line: 686 admin_controller_resume_crawl = "" ; -; admin_controller.php line: 603 +; admin_controller.php line: 717 admin_controller_delete_crawl_success = "" ; -; admin_controller.php line: 605 +; admin_controller.php line: 722 admin_controller_delete_crawl_fail = "" ; -; admin_controller.php line: 610 +; admin_controller.php line: 729 admin_controller_set_index = "" ; -; admin_controller.php line: 621 +; admin_controller.php line: 742 admin_controller_breadth_first = "" ; -; admin_controller.php line: 622 +; admin_controller.php line: 744 admin_controller_page_importance = "" ; -; admin_controller.php line: 647 +; admin_controller.php line: 783 admin_controller_update_seed_info = "" ; -; admin_controller.php line: 698 +; admin_controller.php line: 834 admin_controller_select_localename = "" ; -; admin_controller.php line: 735 +; admin_controller.php line: 877 admin_controller_locale_added = "" ; -; admin_controller.php line: 741 +; admin_controller.php line: 884 admin_controller_localename_doesnt_exists = "" ; -; admin_controller.php line: 748 +; admin_controller.php line: 893 admin_controller_localename_deleted = "" ; -; admin_controller.php line: 764 +; admin_controller.php line: 913 admin_controller_localestrings_updated = "" ; -; admin_controller.php line: 824 +; admin_controller.php line: 983 admin_controller_configure_work_dir_set = "" ; -; admin_controller.php line: 832 +; admin_controller.php line: 995 admin_controller_name_your_bot = "" ; -; admin_controller.php line: 837 +; admin_controller.php line: 1004 admin_controller_configure_work_profile_made = "" ; -; admin_controller.php line: 841 +; admin_controller.php line: 1010 admin_controller_configure_no_set_config = "" ; -; admin_controller.php line: 848 +; admin_controller.php line: 1021 admin_controller_configure_no_create_profile = "" ; -; admin_controller.php line: 854 +; admin_controller.php line: 1030 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 861 +; admin_controller.php line: 1041 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 899 +; admin_controller.php line: 1101 admin_controller_configure_no_change_db = "" ; -; admin_controller.php line: 910 +; admin_controller.php line: 1115 admin_controller_configure_profile_change = "" ; -; admin_controller.php line: 918 +; admin_controller.php line: 1129 admin_controller_configure_no_change_profile = "" ; -; admin_controller.php line: 946 +; admin_controller.php line: 1163 admin_controller_describe_robot = "" ; -; search_controller.php line: 108 +; search_controller.php line: 114 search_controller_logout_successful = "" ; -; search_controller.php line: 310 +; search_controller.php line: 334 search_controller_cached_version = "" ; -; settings_controller.php line: 109 +; settings_controller.php line: 114 settings_controller_settings_saved = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views ; -; admin_view.php line: 74 +; admin_view.php line: 76 admin_view_admin = "" ; -; admin_view.php line: 91 +; admin_view.php line: 96 adminview_auto_logout_one_minute = "" ; -; crawlstatus_view.php line: 59 +; crawlstatus_view.php line: 60 crawlstatus_view_currently_processing = "" ; -; crawlstatus_view.php line: 60 +; crawlstatus_view.php line: 61 crawlstatus_view_description = "" ; -; crawlstatus_view.php line: 64 +; crawlstatus_view.php line: 68 managecrawl_element_stop_crawl = "" ; -; crawlstatus_view.php line: 67 +; crawlstatus_view.php line: 71 crawlstatus_view_no_description = "" ; -; crawlstatus_view.php line: 70 +; crawlstatus_view.php line: 74 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 72 +; crawlstatus_view.php line: 77 crawlstatus_view_no_crawl_time = "" ; -; crawlstatus_view.php line: 74 +; crawlstatus_view.php line: 79 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 75 +; crawlstatus_view.php line: 82 crawlstatus_view_most_recent_fetcher = "" ; -; crawlstatus_view.php line: 81 +; crawlstatus_view.php line: 88 crawlstatus_view_no_fetcher = "" ; -; crawlstatus_view.php line: 84 +; crawlstatus_view.php line: 91 crawlstatus_view_most_recent_urls = "" ; -; crawlstatus_view.php line: 91 +; crawlstatus_view.php line: 99 crawlstatus_view_no_recent_urls = "" ; -; crawlstatus_view.php line: 95 +; crawlstatus_view.php line: 103 crawlstatus_view_previous_crawls = "" ; -; crawlstatus_view.php line: 101 +; crawlstatus_view.php line: 109 crawlstatus_view_description = "" +; +; crawlstatus_view.php line: 110 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 102 +; crawlstatus_view.php line: 111 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 103 +; crawlstatus_view.php line: 112 crawlstatus_view_actions = "" ; -; crawlstatus_view.php line: 109 +; crawlstatus_view.php line: 121 crawlstatus_view_resume = "" ; -; crawlstatus_view.php line: 113 +; crawlstatus_view.php line: 127 crawlstatus_view_set_index = "" ; -; crawlstatus_view.php line: 116 +; crawlstatus_view.php line: 130 crawlstatus_view_search_index = "" ; -; crawlstatus_view.php line: 121 +; crawlstatus_view.php line: 137 crawlstatus_view_delete = "" ; -; crawlstatus_view.php line: 128 +; crawlstatus_view.php line: 144 crawlstatus_view_no_previous_crawl = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/elements @@ -342,28 +344,28 @@ configure_element_robot_description = "" ; configure_element.php line: 148 configure_element_submit = "" ; -; crawloptions_element.php line: 60 +; crawloptions_element.php line: 62 crawloptions_element_back_to_manage = "" ; -; crawloptions_element.php line: 62 +; crawloptions_element.php line: 64 crawloptions_element_edit_crawl_options = "" ; -; crawloptions_element.php line: 70 +; crawloptions_element.php line: 74 crawloptions_element_crawl_order = "" ; -; crawloptions_element.php line: 73 +; crawloptions_element.php line: 79 crawloptions_element_restrict_by_url = "" ; -; crawloptions_element.php line: 75 +; crawloptions_element.php line: 86 crawloptions_element_allowed_to_crawl = "" ; -; crawloptions_element.php line: 79 +; crawloptions_element.php line: 91 crawloptions_element_disallowed_to_crawl = "" ; -; crawloptions_element.php line: 83 +; crawloptions_element.php line: 96 crawloptions_element_seed_sites = "" ; -; crawloptions_element.php line: 87 +; crawloptions_element.php line: 101 crawloptions_element_save_options = "" ; ; editlocales_element.php line: 60 @@ -441,175 +443,177 @@ managelocales_element_localetag = "" managelocales_element_writingmode = "" managelocales_element_percenttranslated = "" ; -; manageroles_element.php line: 55 +; manageroles_element.php line: 57 manageroles_element_add_role = "" ; -; manageroles_element.php line: 63 +; manageroles_element.php line: 67 manageroles_element_rolename = "" ; -; manageroles_element.php line: 64 +; manageroles_element.php line: 71 manageroles_element_submit = "" ; -; manageroles_element.php line: 69 +; manageroles_element.php line: 76 manageroles_element_delete_role = "" ; -; manageroles_element.php line: 77 +; manageroles_element.php line: 86 manageusers_element_delete_rolename = "" ; -; manageroles_element.php line: 78 +; manageroles_element.php line: 90 manageroles_element_submit = "" ; -; manageroles_element.php line: 82 +; manageroles_element.php line: 94 manageroles_element_view_role_activities = "" ; -; manageroles_element.php line: 89 +; manageroles_element.php line: 103 manageusers_element_select_role = "" ; -; manageroles_element.php line: 104 +; manageroles_element.php line: 123 manageusers_element_add_activity = "" ; -; manageroles_element.php line: 106 +; manageroles_element.php line: 128 manageroles_element_submit = "" ; ; manageusers_element.php line: 57 manageusers_element_add_user = "" ; -; manageusers_element.php line: 65 +; manageusers_element.php line: 67 manageusers_element_username = "" ; -; manageusers_element.php line: 67 +; manageusers_element.php line: 71 manageusers_element_password = "" ; -; manageusers_element.php line: 69 +; manageusers_element.php line: 75 manageusers_element_retype_password = "" ; -; manageusers_element.php line: 71 +; manageusers_element.php line: 80 manageusers_element_submit = "" ; -; manageusers_element.php line: 76 +; manageusers_element.php line: 86 manageusers_element_delete_user = "" ; -; manageusers_element.php line: 84 +; manageusers_element.php line: 96 manageusers_element_delete_username = "" ; -; manageusers_element.php line: 85 +; manageusers_element.php line: 100 manageusers_element_submit = "" ; -; manageusers_element.php line: 90 +; manageusers_element.php line: 105 manageusers_element_view_user_roles = "" ; -; manageusers_element.php line: 97 +; manageusers_element.php line: 114 manageusers_element_select_user = "" ; -; manageusers_element.php line: 112 +; manageusers_element.php line: 135 manageusers_element_add_role = "" ; -; manageusers_element.php line: 114 +; manageusers_element.php line: 140 manageusers_element_submit = "" ; -; signin_element.php line: 56 +; signin_element.php line: 57 signin_element_settings = "" ; -; signin_element.php line: 59 +; signin_element.php line: 61 signin_element_signin = "" ; -; signin_element.php line: 63 +; signin_element.php line: 67 signin_element_admin = "" ; -; signin_element.php line: 64 +; signin_element.php line: 69 signin_element_signout = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/helpers ; -; pagination_helper.php line: 86 +; pagination_helper.php line: 93 pagination_helper_previous = "Préc." ; -; pagination_helper.php line: 99 +; pagination_helper.php line: 109 pagination_helper_next = "Proch." ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/layouts ; -; web_layout.php line: 68 +; web_layout.php line: 70 web_layout_description = "Trouvez ce que vous cherchez" ; -; web_layout.php line: 80 +; web_layout.php line: 83 web_layout_query_statistics = "" ; -; web_layout.php line: 81 +; web_layout.php line: 84 web_layout_total_elapsed_time = "" ; -; web_layout.php line: 84 +; web_layout.php line: 89 web_layout_query_time = "" ; ; nocache_view.php line: 65 nocache_view_no_cache = "" ; -; search_view.php line: 83 -search_view_developed_seek_quarry = "" -; -; search_view.php line: 88 +; search_view.php line: 85 search_view_input_label = "" ; -; search_view.php line: 89 +; search_view.php line: 88 search_view_input_placeholder = "" ; ; search_view.php line: 90 search_view_search = "Rechercher" ; -; search_view.php line: 100 +; search_view.php line: 99 +search_view_developed_seek_quarry = "" +; +; search_view.php line: 103 search_view_query_results = "Résultats:" +; +; search_view.php line: 104 search_view_calculated = "Trouver en %s secondes." ; -; search_view.php line: 101 +; search_view.php line: 105 search_view_results = "Affichage de %s - %s sur %s résultats" ; -; search_view.php line: 117 +; search_view.php line: 126 search_view_rank = "Rang:" ; -; search_view.php line: 118 +; search_view.php line: 128 search_view_relevancy = "Pertinence:" ; -; search_view.php line: 119 +; search_view.php line: 131 search_view_score = "" ; -; search_view.php line: 124 +; search_view.php line: 140 search_view_cache = "En Cache" ; -; search_view.php line: 127 +; search_view.php line: 143 search_view_as_text = "Version texte" ; -; search_view.php line: 129 +; search_view.php line: 148 search_view_similar = "Pages similaires" ; -; settings_view.php line: 70 +; settings_view.php line: 74 settings_view_settings = "Préférences" ; -; settings_view.php line: 75 +; settings_view.php line: 80 settings_view_results_per_page = "" ; -; settings_view.php line: 78 +; settings_view.php line: 85 settings_view_language_label = "" ; -; settings_view.php line: 80 +; settings_view.php line: 91 settings_view_return_yioop = "Retourner à Yioop" ; -; settings_view.php line: 81 +; settings_view.php line: 94 settings_view_save = "Enregistrer les préférences" ; -; settings_view.php line: 88 +; settings_view.php line: 102 setting_install_search_plugin = "" ; -; signin_view.php line: 61 +; signin_view.php line: 62 signin_view_signin = "" ; -; signin_view.php line: 66 +; signin_view.php line: 68 signin_view_username = "" ; -; signin_view.php line: 70 +; signin_view.php line: 74 signin_view_password = "" ; -; signin_view.php line: 76 +; signin_view.php line: 83 signin_view_login = "" ; -; signin_view.php line: 82 +; signin_view.php line: 90 signin_view_return_yioop = "" \ No newline at end of file diff --git a/locale/ko/configure.ini b/locale/ko/configure.ini index 6411482dc..b4038bc85 100755 --- a/locale/ko/configure.ini +++ b/locale/ko/configure.ini @@ -28,250 +28,252 @@ ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//controllers ; -; admin_controller.php line: 100 +; admin_controller.php line: 106 admin_controller_login_successful = "" ; -; admin_controller.php line: 104 +; admin_controller.php line: 111 admin_controller_login_failed = "" ; -; admin_controller.php line: 234 +; admin_controller.php line: 255 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 241 +; admin_controller.php line: 265 admin_controller_invalid_old_password = "" ; -; admin_controller.php line: 245 +; admin_controller.php line: 272 admin_controller_change_password = "" ; -; admin_controller.php line: 267 +; admin_controller.php line: 297 admin_controller_select_username = "" ; -; admin_controller.php line: 302 +; admin_controller.php line: 334 admin_controller_select_rolename = "" ; -; admin_controller.php line: 326 +; admin_controller.php line: 360 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 331 +; admin_controller.php line: 367 admin_controller_username_exists = "" ; -; admin_controller.php line: 336 +; admin_controller.php line: 374 admin_controller_username_added = "" ; -; admin_controller.php line: 344 +; admin_controller.php line: 383 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 349 +; admin_controller.php line: 390 admin_controller_username_deleted = "" ; -; admin_controller.php line: 355 +; admin_controller.php line: 397 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 359 +; admin_controller.php line: 403 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 363 +; admin_controller.php line: 409 admin_controller_rolename_added = "" ; -; admin_controller.php line: 371 +; admin_controller.php line: 420 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 375 +; admin_controller.php line: 426 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 382 +; admin_controller.php line: 436 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 402 +; admin_controller.php line: 459 admin_controller_select_rolename = "" ; -; admin_controller.php line: 433 +; admin_controller.php line: 494 admin_controller_select_activityname = "" ; -; admin_controller.php line: 463 +; admin_controller.php line: 527 admin_controller_rolename_exists = "" ; -; admin_controller.php line: 471 +; admin_controller.php line: 537 admin_controller_rolename_added = "" ; -; admin_controller.php line: 480 +; admin_controller.php line: 548 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 486 +; admin_controller.php line: 556 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 491 +; admin_controller.php line: 562 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 495 +; admin_controller.php line: 568 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 501 +; admin_controller.php line: 578 admin_controller_activity_added = "" ; -; admin_controller.php line: 506 +; admin_controller.php line: 584 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 511 +; admin_controller.php line: 591 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 518 +; admin_controller.php line: 603 admin_controller_activity_deleted = "" ; -; admin_controller.php line: 540 +; admin_controller.php line: 628 admin_controller_starting_new_crawl = "" ; -; admin_controller.php line: 555 +; admin_controller.php line: 648 admin_controller_no_description = "" ; -; admin_controller.php line: 575 +; admin_controller.php line: 673 admin_controller_stop_crawl = "" ; -; admin_controller.php line: 585 +; admin_controller.php line: 686 admin_controller_resume_crawl = "" ; -; admin_controller.php line: 603 +; admin_controller.php line: 717 admin_controller_delete_crawl_success = "" ; -; admin_controller.php line: 605 +; admin_controller.php line: 722 admin_controller_delete_crawl_fail = "" ; -; admin_controller.php line: 610 +; admin_controller.php line: 729 admin_controller_set_index = "" ; -; admin_controller.php line: 621 +; admin_controller.php line: 742 admin_controller_breadth_first = "" ; -; admin_controller.php line: 622 +; admin_controller.php line: 744 admin_controller_page_importance = "" ; -; admin_controller.php line: 647 +; admin_controller.php line: 783 admin_controller_update_seed_info = "" ; -; admin_controller.php line: 698 +; admin_controller.php line: 834 admin_controller_select_localename = "" ; -; admin_controller.php line: 735 +; admin_controller.php line: 877 admin_controller_locale_added = "" ; -; admin_controller.php line: 741 +; admin_controller.php line: 884 admin_controller_localename_doesnt_exists = "" ; -; admin_controller.php line: 748 +; admin_controller.php line: 893 admin_controller_localename_deleted = "" ; -; admin_controller.php line: 764 +; admin_controller.php line: 913 admin_controller_localestrings_updated = "" ; -; admin_controller.php line: 824 +; admin_controller.php line: 983 admin_controller_configure_work_dir_set = "" ; -; admin_controller.php line: 832 +; admin_controller.php line: 995 admin_controller_name_your_bot = "" ; -; admin_controller.php line: 837 +; admin_controller.php line: 1004 admin_controller_configure_work_profile_made = "" ; -; admin_controller.php line: 841 +; admin_controller.php line: 1010 admin_controller_configure_no_set_config = "" ; -; admin_controller.php line: 848 +; admin_controller.php line: 1021 admin_controller_configure_no_create_profile = "" ; -; admin_controller.php line: 854 +; admin_controller.php line: 1030 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 861 +; admin_controller.php line: 1041 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 899 +; admin_controller.php line: 1101 admin_controller_configure_no_change_db = "" ; -; admin_controller.php line: 910 +; admin_controller.php line: 1115 admin_controller_configure_profile_change = "" ; -; admin_controller.php line: 918 +; admin_controller.php line: 1129 admin_controller_configure_no_change_profile = "" ; -; admin_controller.php line: 946 +; admin_controller.php line: 1163 admin_controller_describe_robot = "" ; -; search_controller.php line: 108 +; search_controller.php line: 114 search_controller_logout_successful = "" ; -; search_controller.php line: 310 +; search_controller.php line: 334 search_controller_cached_version = "" ; -; settings_controller.php line: 109 +; settings_controller.php line: 114 settings_controller_settings_saved = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views ; -; admin_view.php line: 74 +; admin_view.php line: 76 admin_view_admin = "" ; -; admin_view.php line: 91 +; admin_view.php line: 96 adminview_auto_logout_one_minute = "" ; -; crawlstatus_view.php line: 59 +; crawlstatus_view.php line: 60 crawlstatus_view_currently_processing = "" ; -; crawlstatus_view.php line: 60 +; crawlstatus_view.php line: 61 crawlstatus_view_description = "" ; -; crawlstatus_view.php line: 64 +; crawlstatus_view.php line: 68 managecrawl_element_stop_crawl = "" ; -; crawlstatus_view.php line: 67 +; crawlstatus_view.php line: 71 crawlstatus_view_no_description = "" ; -; crawlstatus_view.php line: 70 +; crawlstatus_view.php line: 74 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 72 +; crawlstatus_view.php line: 77 crawlstatus_view_no_crawl_time = "" ; -; crawlstatus_view.php line: 74 +; crawlstatus_view.php line: 79 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 75 +; crawlstatus_view.php line: 82 crawlstatus_view_most_recent_fetcher = "" ; -; crawlstatus_view.php line: 81 +; crawlstatus_view.php line: 88 crawlstatus_view_no_fetcher = "" ; -; crawlstatus_view.php line: 84 +; crawlstatus_view.php line: 91 crawlstatus_view_most_recent_urls = "" ; -; crawlstatus_view.php line: 91 +; crawlstatus_view.php line: 99 crawlstatus_view_no_recent_urls = "" ; -; crawlstatus_view.php line: 95 +; crawlstatus_view.php line: 103 crawlstatus_view_previous_crawls = "" ; -; crawlstatus_view.php line: 101 +; crawlstatus_view.php line: 109 crawlstatus_view_description = "" +; +; crawlstatus_view.php line: 110 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 102 +; crawlstatus_view.php line: 111 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 103 +; crawlstatus_view.php line: 112 crawlstatus_view_actions = "" ; -; crawlstatus_view.php line: 109 +; crawlstatus_view.php line: 121 crawlstatus_view_resume = "" ; -; crawlstatus_view.php line: 113 +; crawlstatus_view.php line: 127 crawlstatus_view_set_index = "" ; -; crawlstatus_view.php line: 116 +; crawlstatus_view.php line: 130 crawlstatus_view_search_index = "" ; -; crawlstatus_view.php line: 121 +; crawlstatus_view.php line: 137 crawlstatus_view_delete = "" ; -; crawlstatus_view.php line: 128 +; crawlstatus_view.php line: 144 crawlstatus_view_no_previous_crawl = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/elements @@ -342,28 +344,28 @@ configure_element_robot_description = "" ; configure_element.php line: 148 configure_element_submit = "" ; -; crawloptions_element.php line: 60 +; crawloptions_element.php line: 62 crawloptions_element_back_to_manage = "" ; -; crawloptions_element.php line: 62 +; crawloptions_element.php line: 64 crawloptions_element_edit_crawl_options = "" ; -; crawloptions_element.php line: 70 +; crawloptions_element.php line: 74 crawloptions_element_crawl_order = "" ; -; crawloptions_element.php line: 73 +; crawloptions_element.php line: 79 crawloptions_element_restrict_by_url = "" ; -; crawloptions_element.php line: 75 +; crawloptions_element.php line: 86 crawloptions_element_allowed_to_crawl = "" ; -; crawloptions_element.php line: 79 +; crawloptions_element.php line: 91 crawloptions_element_disallowed_to_crawl = "" ; -; crawloptions_element.php line: 83 +; crawloptions_element.php line: 96 crawloptions_element_seed_sites = "" ; -; crawloptions_element.php line: 87 +; crawloptions_element.php line: 101 crawloptions_element_save_options = "" ; ; editlocales_element.php line: 60 @@ -441,175 +443,177 @@ managelocales_element_localetag = "" managelocales_element_writingmode = "" managelocales_element_percenttranslated = "" ; -; manageroles_element.php line: 55 +; manageroles_element.php line: 57 manageroles_element_add_role = "" ; -; manageroles_element.php line: 63 +; manageroles_element.php line: 67 manageroles_element_rolename = "" ; -; manageroles_element.php line: 64 +; manageroles_element.php line: 71 manageroles_element_submit = "" ; -; manageroles_element.php line: 69 +; manageroles_element.php line: 76 manageroles_element_delete_role = "" ; -; manageroles_element.php line: 77 +; manageroles_element.php line: 86 manageusers_element_delete_rolename = "" ; -; manageroles_element.php line: 78 +; manageroles_element.php line: 90 manageroles_element_submit = "" ; -; manageroles_element.php line: 82 +; manageroles_element.php line: 94 manageroles_element_view_role_activities = "" ; -; manageroles_element.php line: 89 +; manageroles_element.php line: 103 manageusers_element_select_role = "" ; -; manageroles_element.php line: 104 +; manageroles_element.php line: 123 manageusers_element_add_activity = "" ; -; manageroles_element.php line: 106 +; manageroles_element.php line: 128 manageroles_element_submit = "" ; ; manageusers_element.php line: 57 manageusers_element_add_user = "" ; -; manageusers_element.php line: 65 +; manageusers_element.php line: 67 manageusers_element_username = "" ; -; manageusers_element.php line: 67 +; manageusers_element.php line: 71 manageusers_element_password = "" ; -; manageusers_element.php line: 69 +; manageusers_element.php line: 75 manageusers_element_retype_password = "" ; -; manageusers_element.php line: 71 +; manageusers_element.php line: 80 manageusers_element_submit = "" ; -; manageusers_element.php line: 76 +; manageusers_element.php line: 86 manageusers_element_delete_user = "" ; -; manageusers_element.php line: 84 +; manageusers_element.php line: 96 manageusers_element_delete_username = "" ; -; manageusers_element.php line: 85 +; manageusers_element.php line: 100 manageusers_element_submit = "" ; -; manageusers_element.php line: 90 +; manageusers_element.php line: 105 manageusers_element_view_user_roles = "" ; -; manageusers_element.php line: 97 +; manageusers_element.php line: 114 manageusers_element_select_user = "" ; -; manageusers_element.php line: 112 +; manageusers_element.php line: 135 manageusers_element_add_role = "" ; -; manageusers_element.php line: 114 +; manageusers_element.php line: 140 manageusers_element_submit = "" ; -; signin_element.php line: 56 +; signin_element.php line: 57 signin_element_settings = "" ; -; signin_element.php line: 59 +; signin_element.php line: 61 signin_element_signin = "" ; -; signin_element.php line: 63 +; signin_element.php line: 67 signin_element_admin = "" ; -; signin_element.php line: 64 +; signin_element.php line: 69 signin_element_signout = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/helpers ; -; pagination_helper.php line: 86 +; pagination_helper.php line: 93 pagination_helper_previous = "" ; -; pagination_helper.php line: 99 +; pagination_helper.php line: 109 pagination_helper_next = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/layouts ; -; web_layout.php line: 68 +; web_layout.php line: 70 web_layout_description = "" ; -; web_layout.php line: 80 +; web_layout.php line: 83 web_layout_query_statistics = "" ; -; web_layout.php line: 81 +; web_layout.php line: 84 web_layout_total_elapsed_time = "" ; -; web_layout.php line: 84 +; web_layout.php line: 89 web_layout_query_time = "" ; ; nocache_view.php line: 65 nocache_view_no_cache = "" ; -; search_view.php line: 83 -search_view_developed_seek_quarry = "" -; -; search_view.php line: 88 +; search_view.php line: 85 search_view_input_label = "" ; -; search_view.php line: 89 +; search_view.php line: 88 search_view_input_placeholder = "" ; ; search_view.php line: 90 search_view_search = "" ; -; search_view.php line: 100 +; search_view.php line: 99 +search_view_developed_seek_quarry = "" +; +; search_view.php line: 103 search_view_query_results = "" +; +; search_view.php line: 104 search_view_calculated = "" ; -; search_view.php line: 101 +; search_view.php line: 105 search_view_results = "" ; -; search_view.php line: 117 +; search_view.php line: 126 search_view_rank = "" ; -; search_view.php line: 118 +; search_view.php line: 128 search_view_relevancy = "" ; -; search_view.php line: 119 +; search_view.php line: 131 search_view_score = "" ; -; search_view.php line: 124 +; search_view.php line: 140 search_view_cache = "" ; -; search_view.php line: 127 +; search_view.php line: 143 search_view_as_text = "" ; -; search_view.php line: 129 +; search_view.php line: 148 search_view_similar = "" ; -; settings_view.php line: 70 +; settings_view.php line: 74 settings_view_settings = "" ; -; settings_view.php line: 75 +; settings_view.php line: 80 settings_view_results_per_page = "" ; -; settings_view.php line: 78 +; settings_view.php line: 85 settings_view_language_label = "" ; -; settings_view.php line: 80 +; settings_view.php line: 91 settings_view_return_yioop = "" ; -; settings_view.php line: 81 +; settings_view.php line: 94 settings_view_save = "" ; -; settings_view.php line: 88 +; settings_view.php line: 102 setting_install_search_plugin = "" ; -; signin_view.php line: 61 +; signin_view.php line: 62 signin_view_signin = "" ; -; signin_view.php line: 66 +; signin_view.php line: 68 signin_view_username = "" ; -; signin_view.php line: 70 +; signin_view.php line: 74 signin_view_password = "" ; -; signin_view.php line: 76 +; signin_view.php line: 83 signin_view_login = "" ; -; signin_view.php line: 82 +; signin_view.php line: 90 signin_view_return_yioop = "" \ No newline at end of file diff --git a/locale/rn-US/configure.ini b/locale/rn-US/configure.ini index a150ad712..a413eda27 100755 --- a/locale/rn-US/configure.ini +++ b/locale/rn-US/configure.ini @@ -28,250 +28,252 @@ ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//controllers ; -; admin_controller.php line: 100 +; admin_controller.php line: 106 admin_controller_login_successful = "" ; -; admin_controller.php line: 104 +; admin_controller.php line: 111 admin_controller_login_failed = "" ; -; admin_controller.php line: 234 +; admin_controller.php line: 255 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 241 +; admin_controller.php line: 265 admin_controller_invalid_old_password = "" ; -; admin_controller.php line: 245 +; admin_controller.php line: 272 admin_controller_change_password = "" ; -; admin_controller.php line: 267 +; admin_controller.php line: 297 admin_controller_select_username = "" ; -; admin_controller.php line: 302 +; admin_controller.php line: 334 admin_controller_select_rolename = "" ; -; admin_controller.php line: 326 +; admin_controller.php line: 360 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 331 +; admin_controller.php line: 367 admin_controller_username_exists = "" ; -; admin_controller.php line: 336 +; admin_controller.php line: 374 admin_controller_username_added = "" ; -; admin_controller.php line: 344 +; admin_controller.php line: 383 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 349 +; admin_controller.php line: 390 admin_controller_username_deleted = "" ; -; admin_controller.php line: 355 +; admin_controller.php line: 397 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 359 +; admin_controller.php line: 403 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 363 +; admin_controller.php line: 409 admin_controller_rolename_added = "" ; -; admin_controller.php line: 371 +; admin_controller.php line: 420 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 375 +; admin_controller.php line: 426 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 382 +; admin_controller.php line: 436 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 402 +; admin_controller.php line: 459 admin_controller_select_rolename = "" ; -; admin_controller.php line: 433 +; admin_controller.php line: 494 admin_controller_select_activityname = "" ; -; admin_controller.php line: 463 +; admin_controller.php line: 527 admin_controller_rolename_exists = "" ; -; admin_controller.php line: 471 +; admin_controller.php line: 537 admin_controller_rolename_added = "" ; -; admin_controller.php line: 480 +; admin_controller.php line: 548 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 486 +; admin_controller.php line: 556 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 491 +; admin_controller.php line: 562 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 495 +; admin_controller.php line: 568 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 501 +; admin_controller.php line: 578 admin_controller_activity_added = "" ; -; admin_controller.php line: 506 +; admin_controller.php line: 584 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 511 +; admin_controller.php line: 591 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 518 +; admin_controller.php line: 603 admin_controller_activity_deleted = "" ; -; admin_controller.php line: 540 +; admin_controller.php line: 628 admin_controller_starting_new_crawl = "" ; -; admin_controller.php line: 555 +; admin_controller.php line: 648 admin_controller_no_description = "" ; -; admin_controller.php line: 575 +; admin_controller.php line: 673 admin_controller_stop_crawl = "" ; -; admin_controller.php line: 585 +; admin_controller.php line: 686 admin_controller_resume_crawl = "" ; -; admin_controller.php line: 603 +; admin_controller.php line: 717 admin_controller_delete_crawl_success = "" ; -; admin_controller.php line: 605 +; admin_controller.php line: 722 admin_controller_delete_crawl_fail = "" ; -; admin_controller.php line: 610 +; admin_controller.php line: 729 admin_controller_set_index = "" ; -; admin_controller.php line: 621 +; admin_controller.php line: 742 admin_controller_breadth_first = "" ; -; admin_controller.php line: 622 +; admin_controller.php line: 744 admin_controller_page_importance = "" ; -; admin_controller.php line: 647 +; admin_controller.php line: 783 admin_controller_update_seed_info = "" ; -; admin_controller.php line: 698 +; admin_controller.php line: 834 admin_controller_select_localename = "" ; -; admin_controller.php line: 735 +; admin_controller.php line: 877 admin_controller_locale_added = "" ; -; admin_controller.php line: 741 +; admin_controller.php line: 884 admin_controller_localename_doesnt_exists = "" ; -; admin_controller.php line: 748 +; admin_controller.php line: 893 admin_controller_localename_deleted = "" ; -; admin_controller.php line: 764 +; admin_controller.php line: 913 admin_controller_localestrings_updated = "" ; -; admin_controller.php line: 824 +; admin_controller.php line: 983 admin_controller_configure_work_dir_set = "" ; -; admin_controller.php line: 832 +; admin_controller.php line: 995 admin_controller_name_your_bot = "" ; -; admin_controller.php line: 837 +; admin_controller.php line: 1004 admin_controller_configure_work_profile_made = "" ; -; admin_controller.php line: 841 +; admin_controller.php line: 1010 admin_controller_configure_no_set_config = "" ; -; admin_controller.php line: 848 +; admin_controller.php line: 1021 admin_controller_configure_no_create_profile = "" ; -; admin_controller.php line: 854 +; admin_controller.php line: 1030 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 861 +; admin_controller.php line: 1041 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 899 +; admin_controller.php line: 1101 admin_controller_configure_no_change_db = "" ; -; admin_controller.php line: 910 +; admin_controller.php line: 1115 admin_controller_configure_profile_change = "" ; -; admin_controller.php line: 918 +; admin_controller.php line: 1129 admin_controller_configure_no_change_profile = "" ; -; admin_controller.php line: 946 +; admin_controller.php line: 1163 admin_controller_describe_robot = "" ; -; search_controller.php line: 108 +; search_controller.php line: 114 search_controller_logout_successful = "Logout Successful!" ; -; search_controller.php line: 310 +; search_controller.php line: 334 search_controller_cached_version = "" ; -; settings_controller.php line: 109 +; settings_controller.php line: 114 settings_controller_settings_saved = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views ; -; admin_view.php line: 74 +; admin_view.php line: 76 admin_view_admin = "" ; -; admin_view.php line: 91 +; admin_view.php line: 96 adminview_auto_logout_one_minute = "" ; -; crawlstatus_view.php line: 59 +; crawlstatus_view.php line: 60 crawlstatus_view_currently_processing = "" ; -; crawlstatus_view.php line: 60 +; crawlstatus_view.php line: 61 crawlstatus_view_description = "" ; -; crawlstatus_view.php line: 64 +; crawlstatus_view.php line: 68 managecrawl_element_stop_crawl = "" ; -; crawlstatus_view.php line: 67 +; crawlstatus_view.php line: 71 crawlstatus_view_no_description = "" ; -; crawlstatus_view.php line: 70 +; crawlstatus_view.php line: 74 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 72 +; crawlstatus_view.php line: 77 crawlstatus_view_no_crawl_time = "" ; -; crawlstatus_view.php line: 74 +; crawlstatus_view.php line: 79 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 75 +; crawlstatus_view.php line: 82 crawlstatus_view_most_recent_fetcher = "" ; -; crawlstatus_view.php line: 81 +; crawlstatus_view.php line: 88 crawlstatus_view_no_fetcher = "" ; -; crawlstatus_view.php line: 84 +; crawlstatus_view.php line: 91 crawlstatus_view_most_recent_urls = "" ; -; crawlstatus_view.php line: 91 +; crawlstatus_view.php line: 99 crawlstatus_view_no_recent_urls = "" ; -; crawlstatus_view.php line: 95 +; crawlstatus_view.php line: 103 crawlstatus_view_previous_crawls = "" ; -; crawlstatus_view.php line: 101 +; crawlstatus_view.php line: 109 crawlstatus_view_description = "" +; +; crawlstatus_view.php line: 110 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 102 +; crawlstatus_view.php line: 111 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 103 +; crawlstatus_view.php line: 112 crawlstatus_view_actions = "" ; -; crawlstatus_view.php line: 109 +; crawlstatus_view.php line: 121 crawlstatus_view_resume = "" ; -; crawlstatus_view.php line: 113 +; crawlstatus_view.php line: 127 crawlstatus_view_set_index = "" ; -; crawlstatus_view.php line: 116 +; crawlstatus_view.php line: 130 crawlstatus_view_search_index = "" ; -; crawlstatus_view.php line: 121 +; crawlstatus_view.php line: 137 crawlstatus_view_delete = "" ; -; crawlstatus_view.php line: 128 +; crawlstatus_view.php line: 144 crawlstatus_view_no_previous_crawl = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/elements @@ -342,28 +344,28 @@ configure_element_robot_description = "" ; configure_element.php line: 148 configure_element_submit = "" ; -; crawloptions_element.php line: 60 +; crawloptions_element.php line: 62 crawloptions_element_back_to_manage = "" ; -; crawloptions_element.php line: 62 +; crawloptions_element.php line: 64 crawloptions_element_edit_crawl_options = "" ; -; crawloptions_element.php line: 70 +; crawloptions_element.php line: 74 crawloptions_element_crawl_order = "" ; -; crawloptions_element.php line: 73 +; crawloptions_element.php line: 79 crawloptions_element_restrict_by_url = "" ; -; crawloptions_element.php line: 75 +; crawloptions_element.php line: 86 crawloptions_element_allowed_to_crawl = "" ; -; crawloptions_element.php line: 79 +; crawloptions_element.php line: 91 crawloptions_element_disallowed_to_crawl = "" ; -; crawloptions_element.php line: 83 +; crawloptions_element.php line: 96 crawloptions_element_seed_sites = "" ; -; crawloptions_element.php line: 87 +; crawloptions_element.php line: 101 crawloptions_element_save_options = "" ; ; editlocales_element.php line: 60 @@ -441,175 +443,177 @@ managelocales_element_localetag = "" managelocales_element_writingmode = "" managelocales_element_percenttranslated = "" ; -; manageroles_element.php line: 55 +; manageroles_element.php line: 57 manageroles_element_add_role = "" ; -; manageroles_element.php line: 63 +; manageroles_element.php line: 67 manageroles_element_rolename = "" ; -; manageroles_element.php line: 64 +; manageroles_element.php line: 71 manageroles_element_submit = "" ; -; manageroles_element.php line: 69 +; manageroles_element.php line: 76 manageroles_element_delete_role = "" ; -; manageroles_element.php line: 77 +; manageroles_element.php line: 86 manageusers_element_delete_rolename = "" ; -; manageroles_element.php line: 78 +; manageroles_element.php line: 90 manageroles_element_submit = "" ; -; manageroles_element.php line: 82 +; manageroles_element.php line: 94 manageroles_element_view_role_activities = "" ; -; manageroles_element.php line: 89 +; manageroles_element.php line: 103 manageusers_element_select_role = "" ; -; manageroles_element.php line: 104 +; manageroles_element.php line: 123 manageusers_element_add_activity = "" ; -; manageroles_element.php line: 106 +; manageroles_element.php line: 128 manageroles_element_submit = "" ; ; manageusers_element.php line: 57 manageusers_element_add_user = "" ; -; manageusers_element.php line: 65 +; manageusers_element.php line: 67 manageusers_element_username = "" ; -; manageusers_element.php line: 67 +; manageusers_element.php line: 71 manageusers_element_password = "" ; -; manageusers_element.php line: 69 +; manageusers_element.php line: 75 manageusers_element_retype_password = "" ; -; manageusers_element.php line: 71 +; manageusers_element.php line: 80 manageusers_element_submit = "" ; -; manageusers_element.php line: 76 +; manageusers_element.php line: 86 manageusers_element_delete_user = "" ; -; manageusers_element.php line: 84 +; manageusers_element.php line: 96 manageusers_element_delete_username = "" ; -; manageusers_element.php line: 85 +; manageusers_element.php line: 100 manageusers_element_submit = "" ; -; manageusers_element.php line: 90 +; manageusers_element.php line: 105 manageusers_element_view_user_roles = "" ; -; manageusers_element.php line: 97 +; manageusers_element.php line: 114 manageusers_element_select_user = "" ; -; manageusers_element.php line: 112 +; manageusers_element.php line: 135 manageusers_element_add_role = "" ; -; manageusers_element.php line: 114 +; manageusers_element.php line: 140 manageusers_element_submit = "" ; -; signin_element.php line: 56 +; signin_element.php line: 57 signin_element_settings = "" ; -; signin_element.php line: 59 +; signin_element.php line: 61 signin_element_signin = "" ; -; signin_element.php line: 63 +; signin_element.php line: 67 signin_element_admin = "" ; -; signin_element.php line: 64 +; signin_element.php line: 69 signin_element_signout = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/helpers ; -; pagination_helper.php line: 86 +; pagination_helper.php line: 93 pagination_helper_previous = "" ; -; pagination_helper.php line: 99 +; pagination_helper.php line: 109 pagination_helper_next = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/layouts ; -; web_layout.php line: 68 +; web_layout.php line: 70 web_layout_description = "" ; -; web_layout.php line: 80 +; web_layout.php line: 83 web_layout_query_statistics = "" ; -; web_layout.php line: 81 +; web_layout.php line: 84 web_layout_total_elapsed_time = "" ; -; web_layout.php line: 84 +; web_layout.php line: 89 web_layout_query_time = "" ; ; nocache_view.php line: 65 nocache_view_no_cache = "" ; -; search_view.php line: 83 -search_view_developed_seek_quarry = "" -; -; search_view.php line: 88 +; search_view.php line: 85 search_view_input_label = "" ; -; search_view.php line: 89 +; search_view.php line: 88 search_view_input_placeholder = "" ; ; search_view.php line: 90 search_view_search = "Search" ; -; search_view.php line: 100 +; search_view.php line: 99 +search_view_developed_seek_quarry = "" +; +; search_view.php line: 103 search_view_query_results = "" +; +; search_view.php line: 104 search_view_calculated = "" ; -; search_view.php line: 101 +; search_view.php line: 105 search_view_results = "" ; -; search_view.php line: 117 +; search_view.php line: 126 search_view_rank = "" ; -; search_view.php line: 118 +; search_view.php line: 128 search_view_relevancy = "" ; -; search_view.php line: 119 +; search_view.php line: 131 search_view_score = "" ; -; search_view.php line: 124 +; search_view.php line: 140 search_view_cache = "" ; -; search_view.php line: 127 +; search_view.php line: 143 search_view_as_text = "" ; -; search_view.php line: 129 +; search_view.php line: 148 search_view_similar = "" ; -; settings_view.php line: 70 +; settings_view.php line: 74 settings_view_settings = "" ; -; settings_view.php line: 75 +; settings_view.php line: 80 settings_view_results_per_page = "" ; -; settings_view.php line: 78 +; settings_view.php line: 85 settings_view_language_label = "" ; -; settings_view.php line: 80 +; settings_view.php line: 91 settings_view_return_yioop = "" ; -; settings_view.php line: 81 +; settings_view.php line: 94 settings_view_save = "" ; -; settings_view.php line: 88 +; settings_view.php line: 102 setting_install_search_plugin = "" ; -; signin_view.php line: 61 +; signin_view.php line: 62 signin_view_signin = "" ; -; signin_view.php line: 66 +; signin_view.php line: 68 signin_view_username = "" ; -; signin_view.php line: 70 +; signin_view.php line: 74 signin_view_password = "" ; -; signin_view.php line: 76 +; signin_view.php line: 83 signin_view_login = "" ; -; signin_view.php line: 82 +; signin_view.php line: 90 signin_view_return_yioop = "" \ No newline at end of file diff --git a/locale/vi-VN/configure.ini b/locale/vi-VN/configure.ini index 157fb6c36..a03825825 100755 --- a/locale/vi-VN/configure.ini +++ b/locale/vi-VN/configure.ini @@ -28,250 +28,252 @@ ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//controllers ; -; admin_controller.php line: 100 +; admin_controller.php line: 106 admin_controller_login_successful = "" ; -; admin_controller.php line: 104 +; admin_controller.php line: 111 admin_controller_login_failed = "" ; -; admin_controller.php line: 234 +; admin_controller.php line: 255 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 241 +; admin_controller.php line: 265 admin_controller_invalid_old_password = "" ; -; admin_controller.php line: 245 +; admin_controller.php line: 272 admin_controller_change_password = "" ; -; admin_controller.php line: 267 +; admin_controller.php line: 297 admin_controller_select_username = "" ; -; admin_controller.php line: 302 +; admin_controller.php line: 334 admin_controller_select_rolename = "" ; -; admin_controller.php line: 326 +; admin_controller.php line: 360 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 331 +; admin_controller.php line: 367 admin_controller_username_exists = "" ; -; admin_controller.php line: 336 +; admin_controller.php line: 374 admin_controller_username_added = "" ; -; admin_controller.php line: 344 +; admin_controller.php line: 383 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 349 +; admin_controller.php line: 390 admin_controller_username_deleted = "" ; -; admin_controller.php line: 355 +; admin_controller.php line: 397 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 359 +; admin_controller.php line: 403 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 363 +; admin_controller.php line: 409 admin_controller_rolename_added = "" ; -; admin_controller.php line: 371 +; admin_controller.php line: 420 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 375 +; admin_controller.php line: 426 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 382 +; admin_controller.php line: 436 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 402 +; admin_controller.php line: 459 admin_controller_select_rolename = "" ; -; admin_controller.php line: 433 +; admin_controller.php line: 494 admin_controller_select_activityname = "" ; -; admin_controller.php line: 463 +; admin_controller.php line: 527 admin_controller_rolename_exists = "" ; -; admin_controller.php line: 471 +; admin_controller.php line: 537 admin_controller_rolename_added = "" ; -; admin_controller.php line: 480 +; admin_controller.php line: 548 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 486 +; admin_controller.php line: 556 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 491 +; admin_controller.php line: 562 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 495 +; admin_controller.php line: 568 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 501 +; admin_controller.php line: 578 admin_controller_activity_added = "" ; -; admin_controller.php line: 506 +; admin_controller.php line: 584 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 511 +; admin_controller.php line: 591 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 518 +; admin_controller.php line: 603 admin_controller_activity_deleted = "" ; -; admin_controller.php line: 540 +; admin_controller.php line: 628 admin_controller_starting_new_crawl = "" ; -; admin_controller.php line: 555 +; admin_controller.php line: 648 admin_controller_no_description = "" ; -; admin_controller.php line: 575 +; admin_controller.php line: 673 admin_controller_stop_crawl = "" ; -; admin_controller.php line: 585 +; admin_controller.php line: 686 admin_controller_resume_crawl = "" ; -; admin_controller.php line: 603 +; admin_controller.php line: 717 admin_controller_delete_crawl_success = "" ; -; admin_controller.php line: 605 +; admin_controller.php line: 722 admin_controller_delete_crawl_fail = "" ; -; admin_controller.php line: 610 +; admin_controller.php line: 729 admin_controller_set_index = "" ; -; admin_controller.php line: 621 +; admin_controller.php line: 742 admin_controller_breadth_first = "" ; -; admin_controller.php line: 622 +; admin_controller.php line: 744 admin_controller_page_importance = "" ; -; admin_controller.php line: 647 +; admin_controller.php line: 783 admin_controller_update_seed_info = "" ; -; admin_controller.php line: 698 +; admin_controller.php line: 834 admin_controller_select_localename = "" ; -; admin_controller.php line: 735 +; admin_controller.php line: 877 admin_controller_locale_added = "" ; -; admin_controller.php line: 741 +; admin_controller.php line: 884 admin_controller_localename_doesnt_exists = "" ; -; admin_controller.php line: 748 +; admin_controller.php line: 893 admin_controller_localename_deleted = "" ; -; admin_controller.php line: 764 +; admin_controller.php line: 913 admin_controller_localestrings_updated = "" ; -; admin_controller.php line: 824 +; admin_controller.php line: 983 admin_controller_configure_work_dir_set = "" ; -; admin_controller.php line: 832 +; admin_controller.php line: 995 admin_controller_name_your_bot = "" ; -; admin_controller.php line: 837 +; admin_controller.php line: 1004 admin_controller_configure_work_profile_made = "" ; -; admin_controller.php line: 841 +; admin_controller.php line: 1010 admin_controller_configure_no_set_config = "" ; -; admin_controller.php line: 848 +; admin_controller.php line: 1021 admin_controller_configure_no_create_profile = "" ; -; admin_controller.php line: 854 +; admin_controller.php line: 1030 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 861 +; admin_controller.php line: 1041 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 899 +; admin_controller.php line: 1101 admin_controller_configure_no_change_db = "" ; -; admin_controller.php line: 910 +; admin_controller.php line: 1115 admin_controller_configure_profile_change = "" ; -; admin_controller.php line: 918 +; admin_controller.php line: 1129 admin_controller_configure_no_change_profile = "" ; -; admin_controller.php line: 946 +; admin_controller.php line: 1163 admin_controller_describe_robot = "" ; -; search_controller.php line: 108 +; search_controller.php line: 114 search_controller_logout_successful = "" ; -; search_controller.php line: 310 +; search_controller.php line: 334 search_controller_cached_version = "Trang gốc này: %s đã tìm được bởi công cụ tìm kiẽm Yioop vào ngày %s." ; -; settings_controller.php line: 109 +; settings_controller.php line: 114 settings_controller_settings_saved = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views ; -; admin_view.php line: 74 +; admin_view.php line: 76 admin_view_admin = "" ; -; admin_view.php line: 91 +; admin_view.php line: 96 adminview_auto_logout_one_minute = "" ; -; crawlstatus_view.php line: 59 +; crawlstatus_view.php line: 60 crawlstatus_view_currently_processing = "" ; -; crawlstatus_view.php line: 60 +; crawlstatus_view.php line: 61 crawlstatus_view_description = "" ; -; crawlstatus_view.php line: 64 +; crawlstatus_view.php line: 68 managecrawl_element_stop_crawl = "" ; -; crawlstatus_view.php line: 67 +; crawlstatus_view.php line: 71 crawlstatus_view_no_description = "" ; -; crawlstatus_view.php line: 70 +; crawlstatus_view.php line: 74 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 72 +; crawlstatus_view.php line: 77 crawlstatus_view_no_crawl_time = "" ; -; crawlstatus_view.php line: 74 +; crawlstatus_view.php line: 79 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 75 +; crawlstatus_view.php line: 82 crawlstatus_view_most_recent_fetcher = "" ; -; crawlstatus_view.php line: 81 +; crawlstatus_view.php line: 88 crawlstatus_view_no_fetcher = "" ; -; crawlstatus_view.php line: 84 +; crawlstatus_view.php line: 91 crawlstatus_view_most_recent_urls = "" ; -; crawlstatus_view.php line: 91 +; crawlstatus_view.php line: 99 crawlstatus_view_no_recent_urls = "" ; -; crawlstatus_view.php line: 95 +; crawlstatus_view.php line: 103 crawlstatus_view_previous_crawls = "" ; -; crawlstatus_view.php line: 101 +; crawlstatus_view.php line: 109 crawlstatus_view_description = "" +; +; crawlstatus_view.php line: 110 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 102 +; crawlstatus_view.php line: 111 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 103 +; crawlstatus_view.php line: 112 crawlstatus_view_actions = "" ; -; crawlstatus_view.php line: 109 +; crawlstatus_view.php line: 121 crawlstatus_view_resume = "" ; -; crawlstatus_view.php line: 113 +; crawlstatus_view.php line: 127 crawlstatus_view_set_index = "" ; -; crawlstatus_view.php line: 116 +; crawlstatus_view.php line: 130 crawlstatus_view_search_index = "" ; -; crawlstatus_view.php line: 121 +; crawlstatus_view.php line: 137 crawlstatus_view_delete = "" ; -; crawlstatus_view.php line: 128 +; crawlstatus_view.php line: 144 crawlstatus_view_no_previous_crawl = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/elements @@ -342,28 +344,28 @@ configure_element_robot_description = "" ; configure_element.php line: 148 configure_element_submit = "" ; -; crawloptions_element.php line: 60 +; crawloptions_element.php line: 62 crawloptions_element_back_to_manage = "" ; -; crawloptions_element.php line: 62 +; crawloptions_element.php line: 64 crawloptions_element_edit_crawl_options = "" ; -; crawloptions_element.php line: 70 +; crawloptions_element.php line: 74 crawloptions_element_crawl_order = "" ; -; crawloptions_element.php line: 73 +; crawloptions_element.php line: 79 crawloptions_element_restrict_by_url = "" ; -; crawloptions_element.php line: 75 +; crawloptions_element.php line: 86 crawloptions_element_allowed_to_crawl = "" ; -; crawloptions_element.php line: 79 +; crawloptions_element.php line: 91 crawloptions_element_disallowed_to_crawl = "" ; -; crawloptions_element.php line: 83 +; crawloptions_element.php line: 96 crawloptions_element_seed_sites = "" ; -; crawloptions_element.php line: 87 +; crawloptions_element.php line: 101 crawloptions_element_save_options = "" ; ; editlocales_element.php line: 60 @@ -441,175 +443,177 @@ managelocales_element_localetag = "" managelocales_element_writingmode = "" managelocales_element_percenttranslated = "" ; -; manageroles_element.php line: 55 +; manageroles_element.php line: 57 manageroles_element_add_role = "" ; -; manageroles_element.php line: 63 +; manageroles_element.php line: 67 manageroles_element_rolename = "" ; -; manageroles_element.php line: 64 +; manageroles_element.php line: 71 manageroles_element_submit = "" ; -; manageroles_element.php line: 69 +; manageroles_element.php line: 76 manageroles_element_delete_role = "" ; -; manageroles_element.php line: 77 +; manageroles_element.php line: 86 manageusers_element_delete_rolename = "" ; -; manageroles_element.php line: 78 +; manageroles_element.php line: 90 manageroles_element_submit = "" ; -; manageroles_element.php line: 82 +; manageroles_element.php line: 94 manageroles_element_view_role_activities = "" ; -; manageroles_element.php line: 89 +; manageroles_element.php line: 103 manageusers_element_select_role = "" ; -; manageroles_element.php line: 104 +; manageroles_element.php line: 123 manageusers_element_add_activity = "" ; -; manageroles_element.php line: 106 +; manageroles_element.php line: 128 manageroles_element_submit = "" ; ; manageusers_element.php line: 57 manageusers_element_add_user = "" ; -; manageusers_element.php line: 65 +; manageusers_element.php line: 67 manageusers_element_username = "" ; -; manageusers_element.php line: 67 +; manageusers_element.php line: 71 manageusers_element_password = "" ; -; manageusers_element.php line: 69 +; manageusers_element.php line: 75 manageusers_element_retype_password = "" ; -; manageusers_element.php line: 71 +; manageusers_element.php line: 80 manageusers_element_submit = "" ; -; manageusers_element.php line: 76 +; manageusers_element.php line: 86 manageusers_element_delete_user = "" ; -; manageusers_element.php line: 84 +; manageusers_element.php line: 96 manageusers_element_delete_username = "" ; -; manageusers_element.php line: 85 +; manageusers_element.php line: 100 manageusers_element_submit = "" ; -; manageusers_element.php line: 90 +; manageusers_element.php line: 105 manageusers_element_view_user_roles = "" ; -; manageusers_element.php line: 97 +; manageusers_element.php line: 114 manageusers_element_select_user = "" ; -; manageusers_element.php line: 112 +; manageusers_element.php line: 135 manageusers_element_add_role = "" ; -; manageusers_element.php line: 114 +; manageusers_element.php line: 140 manageusers_element_submit = "" ; -; signin_element.php line: 56 +; signin_element.php line: 57 signin_element_settings = "" ; -; signin_element.php line: 59 +; signin_element.php line: 61 signin_element_signin = "" ; -; signin_element.php line: 63 +; signin_element.php line: 67 signin_element_admin = "" ; -; signin_element.php line: 64 +; signin_element.php line: 69 signin_element_signout = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/helpers ; -; pagination_helper.php line: 86 +; pagination_helper.php line: 93 pagination_helper_previous = "Trang trước" ; -; pagination_helper.php line: 99 +; pagination_helper.php line: 109 pagination_helper_next = "Trang kế tiếp" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/layouts ; -; web_layout.php line: 68 +; web_layout.php line: 70 web_layout_description = "Giúp tìm caí mà bạn muốn tìm kiếm" ; -; web_layout.php line: 80 +; web_layout.php line: 83 web_layout_query_statistics = "" ; -; web_layout.php line: 81 +; web_layout.php line: 84 web_layout_total_elapsed_time = "" ; -; web_layout.php line: 84 +; web_layout.php line: 89 web_layout_query_time = "" ; ; nocache_view.php line: 65 nocache_view_no_cache = "Không tìm thấy trang gốc mà bạn muốn tìm kiếm." ; -; search_view.php line: 83 -search_view_developed_seek_quarry = "" -; -; search_view.php line: 88 +; search_view.php line: 85 search_view_input_label = "" ; -; search_view.php line: 89 +; search_view.php line: 88 search_view_input_placeholder = "" ; ; search_view.php line: 90 search_view_search = "Tìm Kiếm" ; -; search_view.php line: 100 +; search_view.php line: 99 +search_view_developed_seek_quarry = "" +; +; search_view.php line: 103 search_view_query_results = "Kết Quả:" +; +; search_view.php line: 104 search_view_calculated = "Đã tính toán trong %s giây." ; -; search_view.php line: 101 +; search_view.php line: 105 search_view_results = "Cho kết quả tứ %s - %s của %s" ; -; search_view.php line: 117 +; search_view.php line: 126 search_view_rank = "Thứ Tự:" ; -; search_view.php line: 118 +; search_view.php line: 128 search_view_relevancy = "Thích hợp:" ; -; search_view.php line: 119 +; search_view.php line: 131 search_view_score = "" ; -; search_view.php line: 124 +; search_view.php line: 140 search_view_cache = "Trang gốc" ; -; search_view.php line: 127 +; search_view.php line: 143 search_view_as_text = "Trang Web Bắng Chữ" ; -; search_view.php line: 129 +; search_view.php line: 148 search_view_similar = "Tương Tự" ; -; settings_view.php line: 70 +; settings_view.php line: 74 settings_view_settings = "Sự sắp đặt" ; -; settings_view.php line: 75 +; settings_view.php line: 80 settings_view_results_per_page = "" ; -; settings_view.php line: 78 +; settings_view.php line: 85 settings_view_language_label = "" ; -; settings_view.php line: 80 +; settings_view.php line: 91 settings_view_return_yioop = "Trở lại trang Yioop" ; -; settings_view.php line: 81 +; settings_view.php line: 94 settings_view_save = "Giữ sự sắp đặt" ; -; settings_view.php line: 88 +; settings_view.php line: 102 setting_install_search_plugin = "" ; -; signin_view.php line: 61 +; signin_view.php line: 62 signin_view_signin = "Đăng nhâp" ; -; signin_view.php line: 66 +; signin_view.php line: 68 signin_view_username = "Tài khoản" ; -; signin_view.php line: 70 +; signin_view.php line: 74 signin_view_password = "Mật khẩu" ; -; signin_view.php line: 76 +; signin_view.php line: 83 signin_view_login = "Tài khoản" ; -; signin_view.php line: 82 +; signin_view.php line: 90 signin_view_return_yioop = "Trở lại trang Yioop" \ No newline at end of file diff --git a/locale/vn-US/configure.ini b/locale/vn-US/configure.ini index 35b433eea..955fab387 100755 --- a/locale/vn-US/configure.ini +++ b/locale/vn-US/configure.ini @@ -28,250 +28,252 @@ ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//controllers ; -; admin_controller.php line: 100 +; admin_controller.php line: 106 admin_controller_login_successful = "" ; -; admin_controller.php line: 104 +; admin_controller.php line: 111 admin_controller_login_failed = "" ; -; admin_controller.php line: 234 +; admin_controller.php line: 255 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 241 +; admin_controller.php line: 265 admin_controller_invalid_old_password = "" ; -; admin_controller.php line: 245 +; admin_controller.php line: 272 admin_controller_change_password = "" ; -; admin_controller.php line: 267 +; admin_controller.php line: 297 admin_controller_select_username = "" ; -; admin_controller.php line: 302 +; admin_controller.php line: 334 admin_controller_select_rolename = "" ; -; admin_controller.php line: 326 +; admin_controller.php line: 360 admin_controller_passwords_dont_match = "" ; -; admin_controller.php line: 331 +; admin_controller.php line: 367 admin_controller_username_exists = "" ; -; admin_controller.php line: 336 +; admin_controller.php line: 374 admin_controller_username_added = "" ; -; admin_controller.php line: 344 +; admin_controller.php line: 383 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 349 +; admin_controller.php line: 390 admin_controller_username_deleted = "" ; -; admin_controller.php line: 355 +; admin_controller.php line: 397 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 359 +; admin_controller.php line: 403 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 363 +; admin_controller.php line: 409 admin_controller_rolename_added = "" ; -; admin_controller.php line: 371 +; admin_controller.php line: 420 admin_controller_username_doesnt_exists = "" ; -; admin_controller.php line: 375 +; admin_controller.php line: 426 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 382 +; admin_controller.php line: 436 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 402 +; admin_controller.php line: 459 admin_controller_select_rolename = "" ; -; admin_controller.php line: 433 +; admin_controller.php line: 494 admin_controller_select_activityname = "" ; -; admin_controller.php line: 463 +; admin_controller.php line: 527 admin_controller_rolename_exists = "" ; -; admin_controller.php line: 471 +; admin_controller.php line: 537 admin_controller_rolename_added = "" ; -; admin_controller.php line: 480 +; admin_controller.php line: 548 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 486 +; admin_controller.php line: 556 admin_controller_rolename_deleted = "" ; -; admin_controller.php line: 491 +; admin_controller.php line: 562 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 495 +; admin_controller.php line: 568 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 501 +; admin_controller.php line: 578 admin_controller_activity_added = "" ; -; admin_controller.php line: 506 +; admin_controller.php line: 584 admin_controller_rolename_doesnt_exists = "" ; -; admin_controller.php line: 511 +; admin_controller.php line: 591 admin_controller_activityname_doesnt_exists = "" ; -; admin_controller.php line: 518 +; admin_controller.php line: 603 admin_controller_activity_deleted = "" ; -; admin_controller.php line: 540 +; admin_controller.php line: 628 admin_controller_starting_new_crawl = "" ; -; admin_controller.php line: 555 +; admin_controller.php line: 648 admin_controller_no_description = "" ; -; admin_controller.php line: 575 +; admin_controller.php line: 673 admin_controller_stop_crawl = "" ; -; admin_controller.php line: 585 +; admin_controller.php line: 686 admin_controller_resume_crawl = "" ; -; admin_controller.php line: 603 +; admin_controller.php line: 717 admin_controller_delete_crawl_success = "" ; -; admin_controller.php line: 605 +; admin_controller.php line: 722 admin_controller_delete_crawl_fail = "" ; -; admin_controller.php line: 610 +; admin_controller.php line: 729 admin_controller_set_index = "" ; -; admin_controller.php line: 621 +; admin_controller.php line: 742 admin_controller_breadth_first = "" ; -; admin_controller.php line: 622 +; admin_controller.php line: 744 admin_controller_page_importance = "" ; -; admin_controller.php line: 647 +; admin_controller.php line: 783 admin_controller_update_seed_info = "" ; -; admin_controller.php line: 698 +; admin_controller.php line: 834 admin_controller_select_localename = "" ; -; admin_controller.php line: 735 +; admin_controller.php line: 877 admin_controller_locale_added = "" ; -; admin_controller.php line: 741 +; admin_controller.php line: 884 admin_controller_localename_doesnt_exists = "" ; -; admin_controller.php line: 748 +; admin_controller.php line: 893 admin_controller_localename_deleted = "" ; -; admin_controller.php line: 764 +; admin_controller.php line: 913 admin_controller_localestrings_updated = "" ; -; admin_controller.php line: 824 +; admin_controller.php line: 983 admin_controller_configure_work_dir_set = "" ; -; admin_controller.php line: 832 +; admin_controller.php line: 995 admin_controller_name_your_bot = "" ; -; admin_controller.php line: 837 +; admin_controller.php line: 1004 admin_controller_configure_work_profile_made = "" ; -; admin_controller.php line: 841 +; admin_controller.php line: 1010 admin_controller_configure_no_set_config = "" ; -; admin_controller.php line: 848 +; admin_controller.php line: 1021 admin_controller_configure_no_create_profile = "" ; -; admin_controller.php line: 854 +; admin_controller.php line: 1030 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 861 +; admin_controller.php line: 1041 admin_controller_configure_work_dir_invalid = "" ; -; admin_controller.php line: 899 +; admin_controller.php line: 1101 admin_controller_configure_no_change_db = "" ; -; admin_controller.php line: 910 +; admin_controller.php line: 1115 admin_controller_configure_profile_change = "" ; -; admin_controller.php line: 918 +; admin_controller.php line: 1129 admin_controller_configure_no_change_profile = "" ; -; admin_controller.php line: 946 +; admin_controller.php line: 1163 admin_controller_describe_robot = "" ; -; search_controller.php line: 108 +; search_controller.php line: 114 search_controller_logout_successful = "" ; -; search_controller.php line: 310 +; search_controller.php line: 334 search_controller_cached_version = "" ; -; settings_controller.php line: 109 +; settings_controller.php line: 114 settings_controller_settings_saved = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views ; -; admin_view.php line: 74 +; admin_view.php line: 76 admin_view_admin = "" ; -; admin_view.php line: 91 +; admin_view.php line: 96 adminview_auto_logout_one_minute = "" ; -; crawlstatus_view.php line: 59 +; crawlstatus_view.php line: 60 crawlstatus_view_currently_processing = "" ; -; crawlstatus_view.php line: 60 +; crawlstatus_view.php line: 61 crawlstatus_view_description = "" ; -; crawlstatus_view.php line: 64 +; crawlstatus_view.php line: 68 managecrawl_element_stop_crawl = "" ; -; crawlstatus_view.php line: 67 +; crawlstatus_view.php line: 71 crawlstatus_view_no_description = "" ; -; crawlstatus_view.php line: 70 +; crawlstatus_view.php line: 74 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 72 +; crawlstatus_view.php line: 77 crawlstatus_view_no_crawl_time = "" ; -; crawlstatus_view.php line: 74 +; crawlstatus_view.php line: 79 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 75 +; crawlstatus_view.php line: 82 crawlstatus_view_most_recent_fetcher = "" ; -; crawlstatus_view.php line: 81 +; crawlstatus_view.php line: 88 crawlstatus_view_no_fetcher = "" ; -; crawlstatus_view.php line: 84 +; crawlstatus_view.php line: 91 crawlstatus_view_most_recent_urls = "" ; -; crawlstatus_view.php line: 91 +; crawlstatus_view.php line: 99 crawlstatus_view_no_recent_urls = "" ; -; crawlstatus_view.php line: 95 +; crawlstatus_view.php line: 103 crawlstatus_view_previous_crawls = "" ; -; crawlstatus_view.php line: 101 +; crawlstatus_view.php line: 109 crawlstatus_view_description = "" +; +; crawlstatus_view.php line: 110 crawlstatus_view_time_started = "" ; -; crawlstatus_view.php line: 102 +; crawlstatus_view.php line: 111 crawlstatus_view_total_urls = "" ; -; crawlstatus_view.php line: 103 +; crawlstatus_view.php line: 112 crawlstatus_view_actions = "" ; -; crawlstatus_view.php line: 109 +; crawlstatus_view.php line: 121 crawlstatus_view_resume = "" ; -; crawlstatus_view.php line: 113 +; crawlstatus_view.php line: 127 crawlstatus_view_set_index = "" ; -; crawlstatus_view.php line: 116 +; crawlstatus_view.php line: 130 crawlstatus_view_search_index = "" ; -; crawlstatus_view.php line: 121 +; crawlstatus_view.php line: 137 crawlstatus_view_delete = "" ; -; crawlstatus_view.php line: 128 +; crawlstatus_view.php line: 144 crawlstatus_view_no_previous_crawl = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/elements @@ -342,28 +344,28 @@ configure_element_robot_description = "" ; configure_element.php line: 148 configure_element_submit = "" ; -; crawloptions_element.php line: 60 +; crawloptions_element.php line: 62 crawloptions_element_back_to_manage = "" ; -; crawloptions_element.php line: 62 +; crawloptions_element.php line: 64 crawloptions_element_edit_crawl_options = "" ; -; crawloptions_element.php line: 70 +; crawloptions_element.php line: 74 crawloptions_element_crawl_order = "" ; -; crawloptions_element.php line: 73 +; crawloptions_element.php line: 79 crawloptions_element_restrict_by_url = "" ; -; crawloptions_element.php line: 75 +; crawloptions_element.php line: 86 crawloptions_element_allowed_to_crawl = "" ; -; crawloptions_element.php line: 79 +; crawloptions_element.php line: 91 crawloptions_element_disallowed_to_crawl = "" ; -; crawloptions_element.php line: 83 +; crawloptions_element.php line: 96 crawloptions_element_seed_sites = "" ; -; crawloptions_element.php line: 87 +; crawloptions_element.php line: 101 crawloptions_element_save_options = "" ; ; editlocales_element.php line: 60 @@ -441,175 +443,177 @@ managelocales_element_localetag = "" managelocales_element_writingmode = "" managelocales_element_percenttranslated = "" ; -; manageroles_element.php line: 55 +; manageroles_element.php line: 57 manageroles_element_add_role = "" ; -; manageroles_element.php line: 63 +; manageroles_element.php line: 67 manageroles_element_rolename = "" ; -; manageroles_element.php line: 64 +; manageroles_element.php line: 71 manageroles_element_submit = "" ; -; manageroles_element.php line: 69 +; manageroles_element.php line: 76 manageroles_element_delete_role = "" ; -; manageroles_element.php line: 77 +; manageroles_element.php line: 86 manageusers_element_delete_rolename = "" ; -; manageroles_element.php line: 78 +; manageroles_element.php line: 90 manageroles_element_submit = "" ; -; manageroles_element.php line: 82 +; manageroles_element.php line: 94 manageroles_element_view_role_activities = "" ; -; manageroles_element.php line: 89 +; manageroles_element.php line: 103 manageusers_element_select_role = "" ; -; manageroles_element.php line: 104 +; manageroles_element.php line: 123 manageusers_element_add_activity = "" ; -; manageroles_element.php line: 106 +; manageroles_element.php line: 128 manageroles_element_submit = "" ; ; manageusers_element.php line: 57 manageusers_element_add_user = "" ; -; manageusers_element.php line: 65 +; manageusers_element.php line: 67 manageusers_element_username = "" ; -; manageusers_element.php line: 67 +; manageusers_element.php line: 71 manageusers_element_password = "" ; -; manageusers_element.php line: 69 +; manageusers_element.php line: 75 manageusers_element_retype_password = "" ; -; manageusers_element.php line: 71 +; manageusers_element.php line: 80 manageusers_element_submit = "" ; -; manageusers_element.php line: 76 +; manageusers_element.php line: 86 manageusers_element_delete_user = "" ; -; manageusers_element.php line: 84 +; manageusers_element.php line: 96 manageusers_element_delete_username = "" ; -; manageusers_element.php line: 85 +; manageusers_element.php line: 100 manageusers_element_submit = "" ; -; manageusers_element.php line: 90 +; manageusers_element.php line: 105 manageusers_element_view_user_roles = "" ; -; manageusers_element.php line: 97 +; manageusers_element.php line: 114 manageusers_element_select_user = "" ; -; manageusers_element.php line: 112 +; manageusers_element.php line: 135 manageusers_element_add_role = "" ; -; manageusers_element.php line: 114 +; manageusers_element.php line: 140 manageusers_element_submit = "" ; -; signin_element.php line: 56 +; signin_element.php line: 57 signin_element_settings = "" ; -; signin_element.php line: 59 +; signin_element.php line: 61 signin_element_signin = "" ; -; signin_element.php line: 63 +; signin_element.php line: 67 signin_element_admin = "" ; -; signin_element.php line: 64 +; signin_element.php line: 69 signin_element_signout = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/helpers ; -; pagination_helper.php line: 86 +; pagination_helper.php line: 93 pagination_helper_previous = "" ; -; pagination_helper.php line: 99 +; pagination_helper.php line: 109 pagination_helper_next = "" ; ; /Applications/XAMPP/xamppfiles/htdocs/git/yioop//views/layouts ; -; web_layout.php line: 68 +; web_layout.php line: 70 web_layout_description = "" ; -; web_layout.php line: 80 +; web_layout.php line: 83 web_layout_query_statistics = "" ; -; web_layout.php line: 81 +; web_layout.php line: 84 web_layout_total_elapsed_time = "" ; -; web_layout.php line: 84 +; web_layout.php line: 89 web_layout_query_time = "" ; ; nocache_view.php line: 65 nocache_view_no_cache = "" ; -; search_view.php line: 83 -search_view_developed_seek_quarry = "" -; -; search_view.php line: 88 +; search_view.php line: 85 search_view_input_label = "" ; -; search_view.php line: 89 +; search_view.php line: 88 search_view_input_placeholder = "" ; ; search_view.php line: 90 search_view_search = "" ; -; search_view.php line: 100 +; search_view.php line: 99 +search_view_developed_seek_quarry = "" +; +; search_view.php line: 103 search_view_query_results = "" +; +; search_view.php line: 104 search_view_calculated = "" ; -; search_view.php line: 101 +; search_view.php line: 105 search_view_results = "" ; -; search_view.php line: 117 +; search_view.php line: 126 search_view_rank = "" ; -; search_view.php line: 118 +; search_view.php line: 128 search_view_relevancy = "" ; -; search_view.php line: 119 +; search_view.php line: 131 search_view_score = "" ; -; search_view.php line: 124 +; search_view.php line: 140 search_view_cache = "" ; -; search_view.php line: 127 +; search_view.php line: 143 search_view_as_text = "" ; -; search_view.php line: 129 +; search_view.php line: 148 search_view_similar = "" ; -; settings_view.php line: 70 +; settings_view.php line: 74 settings_view_settings = "" ; -; settings_view.php line: 75 +; settings_view.php line: 80 settings_view_results_per_page = "" ; -; settings_view.php line: 78 +; settings_view.php line: 85 settings_view_language_label = "" ; -; settings_view.php line: 80 +; settings_view.php line: 91 settings_view_return_yioop = "" ; -; settings_view.php line: 81 +; settings_view.php line: 94 settings_view_save = "" ; -; settings_view.php line: 88 +; settings_view.php line: 102 setting_install_search_plugin = "" ; -; signin_view.php line: 61 +; signin_view.php line: 62 signin_view_signin = "" ; -; signin_view.php line: 66 +; signin_view.php line: 68 signin_view_username = "" ; -; signin_view.php line: 70 +; signin_view.php line: 74 signin_view_password = "" ; -; signin_view.php line: 76 +; signin_view.php line: 83 signin_view_login = "" ; -; signin_view.php line: 82 +; signin_view.php line: 90 signin_view_return_yioop = "" \ No newline at end of file diff --git a/models/activity_model.php b/models/activity_model.php index c336afece..c96b4f98d 100755 --- a/models/activity_model.php +++ b/models/activity_model.php @@ -59,10 +59,11 @@ class ActivityModel extends Model /** - * Given the method name of a method to perform an activity return the translated activity name + * Given the method name of a method to perform an activity return the + * translated activity name * - * @param string $method_name string with the name of the activity method - * @return string the translated name of the activity + * @param string $method_name string with the name of the activity method + * @return string the translated name of the activity */ function getActivityNameFromMethodName($method_name) { @@ -74,16 +75,19 @@ class ActivityModel extends Model $roles = array(); $locale_tag = getLocaleTag(); - $sql = "SELECT LOCALE_ID FROM LOCALE WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; + $sql = "SELECT LOCALE_ID FROM LOCALE ". + "WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; $result = $db->execute($sql); $row = $db->fetchArray($result); $locale_id = $row['LOCALE_ID']; $sql = "SELECT TL.TRANSLATION AS ACTIVITY_NAME FROM ". - " TRANSLATION_LOCALE TL, LOCALE L, ACTIVITY A WHERE A.METHOD_NAME = '$method_name' ". - " AND TL.TRANSLATION_ID = A.TRANSLATION_ID AND L.LOCALE_ID='$locale_id' AND ". - " L.LOCALE_ID = TL.LOCALE_ID LIMIT 1"; + " TRANSLATION_LOCALE TL, LOCALE L, ACTIVITY A ". + "WHERE A.METHOD_NAME = '$method_name' ". + "AND TL.TRANSLATION_ID = A.TRANSLATION_ID ". + "AND L.LOCALE_ID='$locale_id' AND ". + "L.LOCALE_ID = TL.LOCALE_ID LIMIT 1"; $result = $db->execute($sql); $row = $db->fetchArray($result); @@ -91,7 +95,8 @@ class ActivityModel extends Model if($row == NULL) { $sql = "SELECT T.IDENTIFIER_STRING AS ACTIVITY_NAME FROM ". - " ACTIVITY A, TRANSLATION T WHERE A.METHOD_NAME = '$method_name' ". + " ACTIVITY A, TRANSLATION T ". + "WHERE A.METHOD_NAME = '$method_name' ". " AND T.TRANSLATION_ID = A.TRANSLATION_ID LIMIT 1"; $result = $db->execute($sql); @@ -107,9 +112,10 @@ class ActivityModel extends Model /** - * Gets a list of activity ids, method names, and translated name of each available activity + * Gets a list of activity ids, method names, and translated + * name of each available activity * - * @return array activities + * @return array activities */ function getActivityList() { @@ -119,12 +125,14 @@ class ActivityModel extends Model $activities = array(); $locale_tag = getLocaleTag(); - $sql = "SELECT LOCALE_ID FROM LOCALE WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; + $sql = "SELECT LOCALE_ID FROM LOCALE ". + "WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; $result = $db->execute($sql); $row = $db->fetchArray($result); $locale_id = $row['LOCALE_ID']; - $sql = "SELECT A.ACTIVITY_ID AS ACTIVITY_ID, A.METHOD_NAME AS METHOD_NAME, ". + $sql = "SELECT A.ACTIVITY_ID AS ACTIVITY_ID, ". + "A.METHOD_NAME AS METHOD_NAME, ". " T.IDENTIFIER_STRING AS IDENTIFIER_STRING FROM ". " ACTIVITY A, TRANSLATION T WHERE ". " T.TRANSLATION_ID = A.TRANSLATION_ID"; @@ -134,8 +142,10 @@ class ActivityModel extends Model while($activities[$i] = $db->fetchArray($result)) { $id = $activities[$i]['ACTIVITY_ID']; - $sub_sql = "SELECT TRANSLATION AS ACTIVITY_NAME FROM TRANSLATION_LOCALE ". - " WHERE TRANSLATION_ID=$id AND LOCALE_ID=$locale_id LIMIT 1"; // maybe do left join at some point + $sub_sql = "SELECT TRANSLATION AS ACTIVITY_NAME ". + "FROM TRANSLATION_LOCALE ". + " WHERE TRANSLATION_ID=$id AND LOCALE_ID=$locale_id LIMIT 1"; + // maybe do left join at some point $result_sub = $db->execute($sub_sql); $translate = $db->fetchArray($result_sub); @@ -143,7 +153,8 @@ class ActivityModel extends Model if($translate) { $activities[$i]['ACTIVITY_NAME'] = $translate['ACTIVITY_NAME']; } else { - $activities[$i]['ACTIVITY_NAME'] = $activities['IDENTIFIER_STRING']; + $activities[$i]['ACTIVITY_NAME'] = + $activities['IDENTIFIER_STRING']; } $i++; } diff --git a/models/crawl_model.php b/models/crawl_model.php index a07db6f1e..6922de4b6 100755 --- a/models/crawl_model.php +++ b/models/crawl_model.php @@ -35,9 +35,15 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** For crawlHash function */ require_once BASE_DIR."/lib/utility.php"; -/** Loads common constants for web crawling, used for index_data_base_name and schedule_data_base_name */ +/** + * Loads common constants for web crawling, used for index_data_base_name and + * schedule_data_base_name + */ require_once BASE_DIR."/lib/crawl_constants.php"; -/** Crawl data is stored in an IndexArchiveBundle, so load the definition of this class*/ +/** + * Crawl data is stored in an IndexArchiveBundle, + * so load the definition of this class + */ require_once BASE_DIR."/lib/index_archive_bundle.php"; /** @@ -52,8 +58,9 @@ require_once BASE_DIR."/lib/index_archive_bundle.php"; class CrawlModel extends Model implements CrawlConstants { /** - * Stores the name of the current index archive to use to get search results from - * var string + * Stores the name of the current index archive to use to get search + * results from + * @Var string */ var $index_name; @@ -68,17 +75,19 @@ class CrawlModel extends Model implements CrawlConstants /** - * Get a summary of a document by it document id (a string hash value) and its offset + * Get a summary of a document by it document id (a string hash value) + * and its offset * - * @param string $ukey document id hash string - * @param int $summary_offset offset into a partition in a WebArchiveBundle - * @return array summary data of the matching document + * @param string $ukey document id hash string + * @param int $summary_offset offset into a partition in a WebArchiveBundle + * @return array summary data of the matching document */ function getCrawlItem($ukey, $summary_offset) { $index_archive_name = self::index_data_base_name . $this->index_name; - $index_archive = new IndexArchiveBundle(CRAWL_DIR.'/cache/'.$index_archive_name); + $index_archive = + new IndexArchiveBundle(CRAWL_DIR.'/cache/'.$index_archive_name); $summary = $index_archive->getPage($ukey, $summary_offset); @@ -87,28 +96,38 @@ class CrawlModel extends Model implements CrawlConstants /** - * Gets the cached version of a web page from the machine on which it was fetched. + * Gets the cached version of a web page from the machine on which it was + * fetched. * - * Complete cached versions of web pages typically only live on a fetcher machine. The - * queue server machine typically only maintains summaries. This method makes a REST - * request of a fetcher machine for a cached page and get the results back. + * Complete cached versions of web pages typically only live on a fetcher + * machine. The queue server machine typically only maintains summaries. + * This method makes a REST request of a fetcher machine for a cached page + * and get the results back. * - * @param string $machine the ip address of domain name of the machine the cached page lives on - * @param string $machine_uri the path from document root on $machine where the yioop scripts live - * @param string $hash the hash that was used to represent the page in the WebArchiveBundle - * @param int $offset the offset in bytes into the WebArchive partition in the WebArchiveBundle at which the cached page lives. - * @param string $crawl_time the timestamp of the crawl the cache page is from - * @return array page data of the cached page + * @param string $machine the ip address of domain name of the machine the + * cached page lives on + * @param string $machine_uri the path from document root on $machine where + * the yioop scripts live + * @param string $hash the hash that was used to represent the page in the + * WebArchiveBundle + * @param int $offset the offset in bytes into the WebArchive partition in + * the WebArchiveBundle at which the cached page lives. + * @param string $crawl_time the timestamp of the crawl the cache page is + * from + * @return array page data of the cached page */ function getCacheFile($machine, $machine_uri, $hash, $offset, $crawl_time) { $time = time(); $session = md5($time . AUTH_KEY); if($machine == '::1') { - $machine = "localhost"; //used if the fetching and queue serving were on the same machine + $machine = "localhost"; + //used if the fetching and queue serving were on the same machine } - $request= "http://$machine$machine_uri?c=archive&a=cache&time=$time&session=$session&hash=$hash&offset=$offset&crawl_time=$crawl_time"; + $request= "http://$machine$machine_uri?c=archive&a=cache&time=$time". + "&session=$session&hash=$hash&offset=$offset". + "&crawl_time=$crawl_time"; $page = @unserialize(base64_decode(FetchUrl::getPage($request))); $page['REQUEST'] = $request; @@ -117,9 +136,10 @@ class CrawlModel extends Model implements CrawlConstants /** - * Gets the name (aka timestamp) of the current index archive to be used to handle search queries + * Gets the name (aka timestamp) of the current index archive to be used to + * handle search queries * - * @return string the timestamp of the archive + * @return string the timestamp of the archive */ function getCurrentIndexDatabaseName() { @@ -134,10 +154,11 @@ class CrawlModel extends Model implements CrawlConstants /** - * Sets the IndexArchive that will be used for search results + * Sets the IndexArchive that will be used for search results * - * @param $timestamp the timestamp of the index archive. The timestamp is when the crawl was started - * Currently, the timestamp appears as substring of the index archives directory name + * @param $timestamp the timestamp of the index archive. The timestamp is + * when the crawl was started. Currently, the timestamp appears as substring + * of the index archives directory name */ function setCurrentIndexDatabaseName($timestamp) { @@ -150,11 +171,12 @@ class CrawlModel extends Model implements CrawlConstants /** - * Gets a list of all index archives of crawls that have been conducted + * Gets a list of all index archives of crawls that have been conducted * - * @return array Available IndexArchiveBundle directories and their meta information - * this meta information includes the time of the crawl, its description, the number of - * pages downloaded, and the number of partitions used in storing the inverted index + * @return array Available IndexArchiveBundle directories and + * their meta information this meta information includes the time of the + * crawl, its description, the number of pages downloaded, and the number + * of partitions used in storing the inverted index */ function getCrawlList() { @@ -162,9 +184,11 @@ class CrawlModel extends Model implements CrawlConstants $dirs = glob(CRAWL_DIR.'/cache/*', GLOB_ONLYDIR); foreach($dirs as $dir) { - if(strlen($pre_timestamp = strstr($dir, self::index_data_base_name)) > 0) { + if(strlen($pre_timestamp = + strstr($dir, self::index_data_base_name)) > 0) { $crawl = array(); - $crawl['CRAWL_TIME'] = substr($pre_timestamp, strlen(self::index_data_base_name)); + $crawl['CRAWL_TIME'] = + substr($pre_timestamp, strlen(self::index_data_base_name)); $info = IndexArchiveBundle::getArchiveInfo($dir); $crawl['DESCRIPTION'] = $info['DESCRIPTION']; $crawl['COUNT'] = $info['COUNT']; @@ -221,7 +245,8 @@ class CrawlModel extends Model implements CrawlConstants EOT; $n[] = '[general]'; $n[] = "crawl_order = '".$info['general']['crawl_order']."';"; - $bool_string = ($info['general']['restrict_sites_by_url']) ? "true" : "false"; + $bool_string = + ($info['general']['restrict_sites_by_url']) ? "true" : "false"; $n[] = "restrict_sites_by_url = $bool_string;"; $n[] = ""; diff --git a/models/datasources/datasource_manager.php b/models/datasources/datasource_manager.php index f4ab98408..f9c5a2ea3 100755 --- a/models/datasources/datasource_manager.php +++ b/models/datasources/datasource_manager.php @@ -49,8 +49,16 @@ require_once BASE_DIR."/lib/utility.php"; */ abstract class DatasourceManager { - /** Used to store statistics about what queries have been run depending on the debug level*/ + /** + * Used to store statistics about what queries have been run depending on + * the debug level + * @var string + */ var $query_log; + /** + * Used to store the total time taken to execute queries + * @var int + */ var $total_time; /** Sets up the query_log for query statistics */ @@ -60,15 +68,18 @@ abstract class DatasourceManager } /** - * Connects to a DBMS using data provided or from config.php - * - * @param string $db_url the url of where the database is located (not used in all dbms's) - * @param string $db_user the user to connect as - * @param string $db_password the password of the user to connect as - * @return mixed return false if not successful and some kind of connection object/identifier otherwise + * Connects to a DBMS using data provided or from config.php + * + * @param string $db_url the url of where the database is located + * (not used in all dbms's) + * @param string $db_user the user to connect as + * @param string $db_password the password of the user to connect as + * @return mixed return false if not successful and some kind of + * connection object/identifier otherwise */ - abstract function connect($db_url = DB_URL, $db_user = DB_USER, $db_password = DB_PASSWORD); + abstract function connect($db_url = DB_URL, $db_user = DB_USER, + $db_password = DB_PASSWORD); /** * Connects to the correct DB on that system @@ -83,12 +94,13 @@ abstract class DatasourceManager /** - * Executes the supplied sql command on the database, depending on debug levels computes query statistics + * Executes the supplied sql command on the database, depending on debug + * levels computes query statistics * - * This method operates either query or data manipulation statements + * This method operates either query or data manipulation statements * - * @param string $sql SQL statement to execute - * @return mixed false if query fails, resource or true otherwise + * @param string $sql SQL statement to execute + * @return mixed false if query fails, resource or true otherwise */ function execute($sql) @@ -109,49 +121,48 @@ abstract class DatasourceManager /** - * Hook Method for execute(). Executes the supplied sql command on the database + * Hook Method for execute(). Executes the sql command on the database * - * This method operates on either query or data manipulation statements + * This method operates on either query or data manipulation statements * - * @param string $sql SQL statement to execute - * @return mixed false if query fails, resource or true otherwise + * @param string $sql SQL statement to execute + * @return mixed false if query fails, resource or true otherwise */ abstract function exec($sql); /** - * Returns the number of rows affected by the last sql statement + * Returns the number of rows affected by the last sql statement * - * @return int the number of rows affected by the last - * insert, update, delete + * @return int the number of rows affected by the last + * insert, update, delete */ abstract function affectedRows(); /** * Returns the ID generated by the last insert statement - * if table has an auto increment key column + * if table has an auto increment key column * - * @return string the ID of the insert + * @return string the ID of the insert */ abstract function insertID(); /** - * Returns the next row from the provided result set + * Returns the next row from the provided result set * - * @param resource $result result set reference of a query - * - * @return array the next row from the result set as an - * associative array in the form column_name => value + * @param resource $result result set reference of a query + * @return array the next row from the result set as an + * associative array in the form column_name => value */ abstract function fetchArray($result); /** - * Used to escape strings before insertion in the - * database to avoid SQL injection - * - * @param string $str string to escape - * @return string a string which is safe to insert into the db + * Used to escape strings before insertion in the + * database to avoid SQL injection + * + * @param string $str string to escape + * @return string a string which is safe to insert into the db */ abstract function escapeString($str); @@ -160,7 +171,7 @@ abstract class DatasourceManager * Recursively delete a directory * * @param string $dir Directory name - * @param boolean $deleteRootToo Delete specified top-level directory as well + * @param boolean $deleteRootToo Delete specified top directory as well */ function unlinkRecursive($dir, $deleteRootToo = true) { @@ -171,7 +182,7 @@ abstract class DatasourceManager * Recursively chmod a directory to 0777 * * @param string $dir Directory name - * @param boolean $chmodRootToo Delete specified top-level directory as well + * @param boolean $chmodRootToo chmod specified top-level directory as well */ function setWorldPermissionsRecursive($dir, $chmodRootToo = true) { @@ -181,8 +192,8 @@ abstract class DatasourceManager /** * Recursively copies a source directory to a destination directory * - * It would have been coon to use traverseDirectory to implement this, - * but it was a little bit too much of a stretch to showhorn the code to match + * It would have been coon to use traverseDirectory to implement this, but + * it was a little bit too much of a stretch to showhorn the code to match * * @param string $source_dir the name of the source directory * @param string $desitnation_dir the name of the destination directory @@ -202,10 +213,12 @@ abstract class DatasourceManager while(false !== ( $obj = readdir($dh)) ) { if (( $obj != '.' ) && ( $obj != '..' )) { if ( is_dir($source_dir . '/' . $obj) ) { - $this->copyRecursive($source_dir . '/' . $obj, $destination_dir . '/' . $obj); + $this->copyRecursive($source_dir . '/' . + $obj, $destination_dir . '/' . $obj); } else { - copy($source_dir . '/' . $obj, $destination_dir . '/' . $obj); + copy($source_dir . '/' . + $obj, $destination_dir . '/' . $obj); chmod($destination_dir . '/' . $obj, 0777); } } @@ -219,7 +232,7 @@ abstract class DatasourceManager * * @param string $dir Directory name * @param function $callback Function to call as traverse structure - * @param boolean $deleteRootToo Delete specified top-level directory as well + * @param boolean $rootToo do op on top-level directory as well */ public function traverseDirectory($dir, $callback, $rootToo = true) diff --git a/models/datasources/mysql_manager.php b/models/datasources/mysql_manager.php index aa8b93d1f..d6a4d66ef 100755 --- a/models/datasources/mysql_manager.php +++ b/models/datasources/mysql_manager.php @@ -62,7 +62,8 @@ class MysqlManager extends DatasourceManager } /** {@inheritdoc} */ - function connect($db_url = DB_URL, $db_user = DB_USER, $db_password = DB_PASSWORD) + function connect($db_url = DB_URL, $db_user = DB_USER, + $db_password = DB_PASSWORD) { return mysql_connect($db_url, $db_user, $db_password); } diff --git a/models/datasources/sqlite3_manager.php b/models/datasources/sqlite3_manager.php index 08ff3cc6e..a129e7df4 100644 --- a/models/datasources/sqlite3_manager.php +++ b/models/datasources/sqlite3_manager.php @@ -40,7 +40,7 @@ require_once "datasource_manager.php"; /** - * SQLite4 DatasourceManager + * SQLite3 DatasourceManager * * This is concrete class, implementing * the abstract class DatasourceManager @@ -54,8 +54,15 @@ require_once "datasource_manager.php"; */ class Sqlite3Manager extends DatasourceManager { + /** + * Stores the current Sqlite3 DB object + * @var object + */ var $dbhandle; - var $dbname; + /** + * Filename of the Sqlite3 Database + * @var string + */ /** {@inheritdoc} */ function __construct() @@ -68,8 +75,13 @@ class Sqlite3Manager extends DatasourceManager $this->dbname = NULL; } - /** For an Sqlite database no connection needs to be made so this method does nothing*/ - function connect($db_url = DB_URL, $db_user = DB_USER, $db_password = DB_PASSWORD) + /** + * For an Sqlite3 database no connection needs to be made so this + * method does nothing + * {@inheritdoc} + */ + function connect($db_url = DB_URL, $db_user = DB_USER, + $db_password = DB_PASSWORD) { return true; } @@ -82,7 +94,8 @@ class Sqlite3Manager extends DatasourceManager } $this->dbname = $db_name; - $this->dbhandle = new SQLite3(CRAWL_DIR."/data/$db_name.db", SQLITE3_OPEN_READWRITE |SQLITE3_OPEN_CREATE); + $this->dbhandle = new SQLite3(CRAWL_DIR."/data/$db_name.db", + SQLITE3_OPEN_READWRITE |SQLITE3_OPEN_CREATE); return $this->dbhandle; } diff --git a/models/datasources/sqlite_manager.php b/models/datasources/sqlite_manager.php index 088d83eb7..6d707daa3 100644 --- a/models/datasources/sqlite_manager.php +++ b/models/datasources/sqlite_manager.php @@ -54,8 +54,20 @@ require_once "datasource_manager.php"; */ class SqliteManager extends DatasourceManager { + /** + * Stores the current Sqlite DB resource + * @var resource + */ var $dbhandle; + /** + * Filename of the Sqlite Database + * @var string + */ var $dbname; + /** + * Stores the result resource of the last DB exec + * @var resource + */ var $result; /** {@inheritdoc} */ @@ -71,8 +83,13 @@ class SqliteManager extends DatasourceManager $this->result = NULL; } - /** For an Sqlite database no connection needs to be made so this method does nothing*/ - function connect($db_url = DB_URL, $db_user = DB_USER, $db_password = DB_PASSWORD) + /** + * For an Sqlite database no connection needs to be made so this + * method does nothing + * {@inheritdoc} + */ + function connect($db_url = DB_URL, $db_user = DB_USER, + $db_password = DB_PASSWORD) { return true; } diff --git a/models/locale_model.php b/models/locale_model.php index a27f3e5b8..adb1fff7c 100644 --- a/models/locale_model.php +++ b/models/locale_model.php @@ -63,8 +63,9 @@ class LocaleModel extends Model */ var $locale_name; /** - * Combination of text direction and block porgression as a string. Has one of four values: - * lr-tb, rl-tb, tb-lr, tb-rl. Other possible values for things like Arabic block quoted in Mongolian not supported + * Combination of text direction and block porgression as a string. Has one + * of four values: lr-tb, rl-tb, tb-lr, tb-rl. Other possible values for + * things like Arabic block quoted in Mongolian not supported * @var string */ var $writing_mode; @@ -90,16 +91,19 @@ class LocaleModel extends Model /** - * Loads the provided locale's configure file (containing transalation) and calls setlocale to - * set up locale specific string formatting (for to format numbers, etc.) + * Loads the provided locale's configure file (containing transalation) and + * calls setlocale to set up locale specific string formatting + * (for to format numbers, etc.) * - * @param string $locale_tag the tag of the locale to use as the current locale + * @param string $locale_tag the tag of the locale to use as the current + * locale */ function initialize($locale_tag) { $this->db->selectDB(DB_NAME); - $this->configure = parse_ini_file (LOCALE_DIR."/$locale_tag/configure.ini", true); + $this->configure = parse_ini_file( + LOCALE_DIR."/$locale_tag/configure.ini", true); $this->locale_tag = $locale_tag; $sql = "SELECT LOCALE_NAME, WRITING_MODE ". " FROM LOCALE WHERE LOCALE_TAG ='$locale_tag'"; @@ -111,8 +115,10 @@ class LocaleModel extends Model $locale_tag_parts = explode("_", $locale_tag); - setlocale(LC_ALL, $locale_tag, $locale_tag.'.UTF-8', $locale_tag.'.UTF8', $locale_tag.".TCVN", $locale_tag.".VISCII", - $locale_tag_parts[0], $locale_tag_parts[0].'.UTF-8', $locale_tag_parts[0].'.UTF8', $locale_tag_parts[0].".TCVN"); + setlocale(LC_ALL, $locale_tag, $locale_tag.'.UTF-8', + $locale_tag.'.UTF8', $locale_tag.".TCVN", $locale_tag.".VISCII", + $locale_tag_parts[0], $locale_tag_parts[0].'.UTF-8', + $locale_tag_parts[0].'.UTF8', $locale_tag_parts[0].".TCVN"); //hacks for things that didn't work from the above if($locale_tag == 'vi_VN') { @@ -140,12 +146,16 @@ class LocaleModel extends Model $i = 0; $locales = array(); while($locales[$i] = $this->db->fetchArray($result)) { - //the statistics text file contains info used to calculate what fraction of strings have been translated - if(!file_exists(LOCALE_DIR."/{$locales[$i]['LOCALE_TAG']}/statistics.txt") || - filemtime(LOCALE_DIR."/{$locales[$i]['LOCALE_TAG']}/statistics.txt") < - filemtime(LOCALE_DIR."/{$locales[$i]['LOCALE_TAG']}/configure.ini")) { - - $tmp = parse_ini_file (LOCALE_DIR."/{$locales[$i]['LOCALE_TAG']}/configure.ini", true); + /* + the statistics text file contains info used to calculate + what fraction of strings have been translated + */ + $tag_prefix = LOCALE_DIR."/".$locales[$i]['LOCALE_TAG']; + if(!file_exists("$tag_prefix/statistics.txt") || + filemtime("$tag_prefix/statistics.txt") < + filemtime("$tag_prefix/configure.ini")) { + + $tmp = parse_ini_file ("$tag_prefix/configure.ini", true); $num_ids = 0; $num_strings = 0; foreach ($tmp['strings'] as $msg_id => $msg_string) { @@ -154,12 +164,14 @@ class LocaleModel extends Model $num_strings++; } } - $locales[$i]['PERCENT_WITH_STRINGS'] = floor(100 * $num_strings/$num_ids); - file_put_contents(LOCALE_DIR."/{$locales[$i]['LOCALE_TAG']}/statistics.txt", + $locales[$i]['PERCENT_WITH_STRINGS'] = + floor(100 * $num_strings/$num_ids); + file_put_contents("$tag_prefix/statistics.txt", serialize($locales[$i]['PERCENT_WITH_STRINGS'])); } else { $locales[$i]['PERCENT_WITH_STRINGS'] = - unserialize(file_get_contents(LOCALE_DIR."/{$locales[$i]['LOCALE_TAG']}/statistics.txt")); + unserialize( + file_get_contents("$tag_prefix/statistics.txt")); } $i++; @@ -172,18 +184,22 @@ class LocaleModel extends Model /** - * Adds information concerning a new locale to the database + * Adds information concerning a new locale to the database * - * @param string $locale_name the name of the locale in the locale's language - * @param string $locale_tag the IANA langauge tag for the locale - * @param string $writing_mode a combination of the horizontal and vertical text direction used for writing in the locale + * @param string $locale_name the name of the locale in the locale's + * language + * @param string $locale_tag the IANA langauge tag for the locale + * @param string $writing_mode a combination of the horizontal and + * vertical text direction used for writing in the locale */ function addLocale($locale_name, $locale_tag, $writing_mode) { $this->db->selectDB(DB_NAME); - $sql = "INSERT INTO LOCALE(LOCALE_NAME, LOCALE_TAG, WRITING_MODE) VALUES". - "('".$this->db->escapeString($locale_name)."', '".$this->db->escapeString($locale_tag) . + $sql = "INSERT INTO LOCALE". + "(LOCALE_NAME, LOCALE_TAG, WRITING_MODE) VALUES". + "('".$this->db->escapeString($locale_name). + "', '".$this->db->escapeString($locale_tag) . "', '".$this->db->escapeString($writing_mode)."')"; $this->db->execute($sql); @@ -200,13 +216,14 @@ class LocaleModel extends Model /** * Remove a locale from the database * - * @param string $locale_tag the IANA language tag for the locale to remove + * @param string $locale_tag the IANA language tag for the locale to remove */ function deleteLocale($locale_tag) { $this->db->selectDB(DB_NAME); - $sql = "DELETE FROM LOCALE WHERE LOCALE_TAG = '".$this->db->escapeString($locale_tag)."'"; + $sql = "DELETE FROM LOCALE WHERE LOCALE_TAG = '". + $this->db->escapeString($locale_tag)."'"; $this->db->execute($sql); @@ -218,10 +235,12 @@ class LocaleModel extends Model /** - * For each translatable identifier string (either static from a configure ini file, or dynamic from the db) - * return its name together with its translation into the given locale if such a translation exists. + * For each translatable identifier string (either static from a + * configure ini file, or dynamic from the db) + * return its name together with its translation into the given locale + * if such a translation exists. * - * @param string $locale_tag the IANA language tag to translate string into + * @param string $locale_tag the IANA language tag to translate string into * @return array rows of identfier string - translation pairs */ function getStringData($locale_tag) @@ -233,14 +252,20 @@ class LocaleModel extends Model //hacky. Join syntax isn't quite the same between sqlite and mysql if(in_array(DBMS, array('sqlite', 'sqlite3'))) { - $sql = "SELECT T.IDENTIFIER_STRING AS MSG_ID, TLL.TRANSLATION AS MSG_STRING " . - "FROM TRANSLATION T LEFT JOIN ". //sqlite supports left but not right outer join - "(TRANSLATION_LOCALE TL JOIN LOCALE L ON L.LOCALE_TAG = '$locale_tag' AND L.LOCALE_ID = TL.LOCALE_ID) TLL " . + $sql = "SELECT T.IDENTIFIER_STRING AS MSG_ID, ". + "TLL.TRANSLATION AS MSG_STRING " . + "FROM TRANSLATION T LEFT JOIN ". + //sqlite supports left but not right outer join + "(TRANSLATION_LOCALE TL JOIN LOCALE L ON ". + "L.LOCALE_TAG = '$locale_tag' AND ". + "L.LOCALE_ID = TL.LOCALE_ID) TLL " . "ON T.TRANSLATION_ID = TLL.TRANSLATION_ID"; } else { - $sql = "SELECT T.IDENTIFIER_STRING AS MSG_ID, TL.TRANSLATION AS MSG_STRING " . + $sql = "SELECT T.IDENTIFIER_STRING AS MSG_ID, ". + "TL.TRANSLATION AS MSG_STRING " . "FROM TRANSLATION T LEFT JOIN ". - "(TRANSLATION_LOCALE TL JOIN LOCALE L ON L.LOCALE_TAG = '$locale_tag' AND L.LOCALE_ID = TL.LOCALE_ID) " . + "(TRANSLATION_LOCALE TL JOIN LOCALE L ON ". + "L.LOCALE_TAG = '$locale_tag' AND L.LOCALE_ID = TL.LOCALE_ID) ". "ON T.TRANSLATION_ID = TL.TRANSLATION_ID"; } $result = $this->db->execute($sql); @@ -253,16 +278,18 @@ class LocaleModel extends Model /** - * Updates the identifier_string-translation pairs (both static and dynamic) for a given locale + * Updates the identifier_string-translation pairs + * (both static and dynamic) for a given locale * - * @param string $locale_tag the IANA language tag to update the strings of - * @param array $new_strings rows of identifier string - translation pairs + * @param string $locale_tag the IANA language tag to update the strings of + * @param array $new_strings rows of identifier string - translation pairs */ function updateStringData($locale_tag, $new_strings) { $this->db->selectDB(DB_NAME); - $sql = "SELECT LOCALE_ID FROM LOCALE WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; + $sql = "SELECT LOCALE_ID FROM LOCALE ". + "WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; $result = $this->db->execute($sql); $row = $this->db->fetchArray($result); $locale_id = $row['LOCALE_ID']; @@ -270,16 +297,20 @@ class LocaleModel extends Model list($general_ini, $strings) = $this->extractMergeLocales(); foreach($new_strings as $msg_id => $msg_string) { if(strcmp($msg_id, strstr($msg_id, "db_")) == 0) { - $sql = "SELECT TRANSLATION_ID FROM TRANSLATION WHERE IDENTIFIER_STRING = '$msg_id' LIMIT 1"; + $sql = "SELECT TRANSLATION_ID FROM TRANSLATION ". + "WHERE IDENTIFIER_STRING = '$msg_id' LIMIT 1"; $result = $this->db->execute($sql); $row = $this->db->fetchArray($result); $translate_id = $row['TRANSLATION_ID']; - $sql = "DELETE FROM TRANSLATION_LOCALE WHERE TRANSLATION_ID ='$translate_id' AND LOCALE_ID = '$locale_id'"; + $sql = "DELETE FROM TRANSLATION_LOCALE ". + "WHERE TRANSLATION_ID ='$translate_id' AND ". + "LOCALE_ID = '$locale_id'"; $result = $this->db->execute($sql); - $sql = "INSERT INTO TRANSLATION_LOCALE VALUES ('$translate_id', '$locale_id', '$msg_string')"; + $sql = "INSERT INTO TRANSLATION_LOCALE VALUES ". + "('$translate_id', '$locale_id', '$msg_string')"; $result = $this->db->execute($sql); $new_strings[$msg_id] = false; @@ -288,19 +319,24 @@ class LocaleModel extends Model array_filter($new_strings); $data['strings'] = $new_strings; - $this->updateLocale($general_ini, $strings, LOCALE_DIR, $locale_tag, $data); + $this->updateLocale( + $general_ini, $strings, LOCALE_DIR, $locale_tag, $data); } /** - * Translate an array consisting of an identifier string together with additional variable parameters into the current locale. + * Translate an array consisting of an identifier string together with + * additional variable parameters into the current locale. * - * Suppose the identifier string was some_view_fraction_received and two additional arguments 5 and 10 were given. - * Suppose further that its translation into the current locale (say en_US) was "%s out of %s". Then the string returned by translate - * would be "5 out of 10". + * Suppose the identifier string was some_view_fraction_received and two + * additional arguments 5 and 10 were given. Suppose further that its + * translation into the current locale (say en_US) was "%s out of %s". + * Then the string returned by translate would be "5 out of 10". * - * @param array $arr an array consisting of an identifier string followed optionally by parameter values. - * @return string the translation of the identifier string into the current locale where all %s have been replaced by the corresponding + * @param array $arr an array consisting of an identifier string followed + * optionally by parameter values. + * @return string the translation of the identifier string into the + * current locale where all %s have been replaced by the corresponding * parameter values */ function translate($arr) { @@ -359,9 +395,11 @@ class LocaleModel extends Model /** - * The direction that blocks (such as p or div tags) should be drawn in the current locale + * The direction that blocks (such as p or div tags) should be drawn in + * the current locale * - * @return string a direction which is one of tb -- top-bottom, rl -- right-to-left, or lr -- left-to-right + * @return string a direction which is one of tb -- top-bottom, + * rl -- right-to-left, or lr -- left-to-right */ function getBlockProgression() { @@ -398,15 +436,18 @@ class LocaleModel extends Model } /** - * Used to extract identifier strings from files with correct extensions, then these - * strings are merged with existing extracted strings for each locale as well as their - * translations (if an extract string has a translation the translation is untouched by this process). + * Used to extract identifier strings from files with correct extensions, + * then these strings are merged with existing extracted strings for each + * locale as well as their translations (if an extract string has a + * translation the translation is untouched by this process). * - * @return array a pair consisting of the data from the general.ini file together with an array of msg_ids msg_strings. + * @return array a pair consisting of the data from the general.ini file + * together with an array of msg_ids msg_strings. */ function extractMergeLocales() { - $strings = $this->getTranslateStrings($this->extract_dirs, $this->extensions) ; + $strings = + $this->getTranslateStrings($this->extract_dirs, $this->extensions); $general_ini = parse_ini_file(LOCALE_DIR."/general.ini", true); $this->updateLocales($general_ini, $strings); @@ -420,9 +461,11 @@ class LocaleModel extends Model * It deletes identifiers that are not in strings, it adds new identifiers * and it leaves existing identifier translation pairs untouched. * - * @param array $general_ini data that would typically come from the general.ini file - * @param array $string lines from what is equivalent to an ini file of msg_id msg_string pairs - * these lines also have comments on the file that strings were extracted from + * @param array $general_ini data that would typically come from the + * general.ini file + * @param array $string lines from what is equivalent to an ini file + * of msg_id msg_string pairs these lines also have comments on the + * file that strings were extracted from * */ function updateLocales($general_ini, $strings) @@ -444,22 +487,27 @@ class LocaleModel extends Model /** - * Updates the configure.ini file for a particular locale. - * - * The configure.ini has general information (at this point not really being used) about - * all locales together with specific msg_id (identifiers to be translated) and msg_string (translation) - * data. updateLocale takes line data coming from the general.ini file, strings extracted from - * documents that might need to be translation, the old configure.ini file (this might have existing translations), - * as well as new translation data that might come from a localizer via a web form and - * combines these to produce a new configure.ini file + * Updates the configure.ini file for a particular locale. + * + * The configure.ini has general information (at this point not really + * being used) about all locales together with specific msg_id (identifiers + * to be translated) and msg_string (translation) data. updateLocale takes + * line data coming from the general.ini file, strings extracted from + * documents that might need to be translation, the old configure.ini file + * (this might have existing translations), as well as new translation + * data that might come from a localizer via a web form and + * combines these to produce a new configure.ini file * - * @param array $general_ini data from the general.ini file - * @param array $strings line array data extracted from files in directories that have strings in need of translation - * @param string $dir the directory of all the locales - * @param string $locale the particular locale in $dir to update - * @param array $new_configure translations of identifier strings from another source such as a localizer using a web form + * @param array $general_ini data from the general.ini file + * @param array $strings line array data extracted from files in + * directories that have strings in need of translation + * @param string $dir the directory of all the locales + * @param string $locale the particular locale in $dir to update + * @param array $new_configure translations of identifier strings from + * another source such as a localizer using a web form */ - function updateLocale($general_ini, $strings, $dir, $locale, $new_configure = NULL) + function updateLocale($general_ini, $strings, + $dir, $locale, $new_configure = NULL) { $old_configure = array(); $cur_path = $dir . '/' . $locale; @@ -496,18 +544,24 @@ EOT; $n[] = "[$general_name]"; foreach($general_value as $name => $value) { if(isset($new_configure[$general_name][$name])) { - $n[] = $name.' = "'.addslashes($new_configure[$general_name][$name]).'"'; + $n[] = $name.' = "'. + addslashes($new_configure[$general_name][$name]). + '"'; } else if(isset($old_configure[$general_name][$name])) { - $n[] = $name.' = "'.addslashes($old_configure[$general_name][$name]).'"'; + $n[] = $name.' = "'. + addslashes($old_configure[$general_name][$name]). + '"'; } else { $n[] = $name.' = "'.$value.'"'; } } } else { if(isset($new_configure[$general_name])) { - $n[] = $general_name.' = "'.addslashes($new_configure[$general_name]).'"'; + $n[] = $general_name.' = "'. + addslashes($new_configure[$general_name]).'"'; } else if(isset($old_configure[$general_name])) { - $n[] = $general_name.' = "'.addslashes($old_configure[$general_name]).'"'; + $n[] = $general_name.' = "'. + addslashes($old_configure[$general_name]).'"'; } else { $n[] = $name.' = "'.$value.'"'; } @@ -521,9 +575,11 @@ EOT; $n[] = $string; } else { if(isset($new_configure['strings'][$string])) { - $n[] = $string.' = "'.addslashes($new_configure['strings'][$string]).'"'; + $n[] = $string.' = "'. + addslashes($new_configure['strings'][$string]).'"'; } else if(isset($old_configure['strings'][$string])) { - $n[] = $string.' = "'.addslashes($old_configure['strings'][$string]).'"'; + $n[] = $string.' = "'. + addslashes($old_configure['strings'][$string]).'"'; } else { $n[] = $string.' = ""'; } @@ -536,16 +592,20 @@ EOT; /** - * Searches the directories provided looking for files matching the extensions provided. When such - * a file is found it is loaded and scanned for tl() function calls. The identifier string in this - * function call is then extracted and added to a line array of strings to be translated. This line - * array is formatted so that each line looks like a line that might occur in an PHP ini file. - * To understand this format one can look at the parse_ini_string function in the PHP manual or - * look at the configure.ini files in the locale directory + * Searches the directories provided looking for files matching the + * extensions provided. When such a file is found it is loaded and scanned + * for tl() function calls. The identifier string in this function call is + * then extracted and added to a line array of strings to be translated. + * This line array is formatted so that each line looks like a line that + * might occur in an PHP ini file. To understand this format one can look at + * the parse_ini_string function in the PHP manual or look at the + * configure.ini files in the locale directory * - * @param array $extract_dirs directories to start looking for files with strings to be translated - * @param array $extensions file extensions of files which might contain such strings - * @return array of lines for any ini file of msg_id msg_string pairs + * @param array $extract_dirs directories to start looking for files with + * strings to be translated + * @param array $extensions file extensions of files which might contain + * such strings + * @return array of lines for any ini file of msg_id msg_string pairs */ function getTranslateStrings($extract_dirs, $extensions) { @@ -572,9 +632,11 @@ EOT; * the strings array. In addition, ini style comments are added givne the * line file and line number of the item to be translated * - * @param string $dir current directory to start looking for files with strings to be translated - * @param array $extensions file extensions of files which might contain such strings - * @return array of lines for any ini file of msg_id msg_string pairs + * @param string $dir current directory to start looking for files with + * strings to be translated + * @param array $extensions file extensions of files which might contain + * such strings + * @return array of lines for any ini file of msg_id msg_string pairs */ function traverseExtractRecursive($dir, $extensions) { @@ -591,7 +653,8 @@ EOT; $cur_path = $dir . '/' . $obj; if (is_dir($cur_path)) { - $dir_strings = $this->traverseExtractRecursive($cur_path, $extensions); + $dir_strings = + $this->traverseExtractRecursive($cur_path, $extensions); if(count($dir_strings) > 0) { $strings[] = ";"; $strings[] = "; $cur_path"; @@ -600,13 +663,16 @@ EOT; } if(is_file($cur_path)) { - $path_parts = pathinfo($cur_path); - $extension = (isset($path_parts['extension'])) ? $path_parts['extension'] : ""; + $path_parts = pathinfo($cur_path); + $extension = (isset($path_parts['extension'])) ? + $path_parts['extension'] : ""; if(in_array($extension, $extensions)) { $lines = file($cur_path); $num_lines = count($lines); for($i = 0; $i < $num_lines; $i++) { - $num_matches = preg_match_all('/tl\([\'|\"]?([[:word:]]+?)[\'|\"]?[(\))|(\s+\,)]/', $lines[$i], $to_translates); + $num_matches = preg_match_all( + '/tl\([\'|\"]?([[:word:]]+?)[\'|\"]?[(\))|(\s+\,)]/', + $lines[$i], $to_translates); if($num_matches > 0) { $strings[] = ";"; $strings[] = "; $obj line: $i"; diff --git a/models/model.php b/models/model.php index bb8e64e0b..d82d5c31c 100755 --- a/models/model.php +++ b/models/model.php @@ -76,9 +76,10 @@ class Model implements CrawlConstants /** - * Sets up the database manager that will be used and name of the search engine database + * Sets up the database manager that will be used and name of the search + * engine database * - * @param string $db_name the name of the database for the search engine + * @param string $db_name the name of the database for the search engine */ function __construct($db_name = DB_NAME) { @@ -92,13 +93,14 @@ class Model implements CrawlConstants /** - * Given an array page summarries, for each summary extracts snippets which are related to a set of search words - * For each snippet, bold faces the search terms, and then creates a new summary array. + * Given an array page summarries, for each summary extracts snippets which + * are related to a set of search words. For each snippet, bold faces the + * search terms, and then creates a new summary array. * - * @param array $results an array of web pages summaries (these in turn are arrays!) - * @param array $words an array of keywords (typically what was searched on) - * - * @return array summaries which have been snippified and bold faced + * @param array $results web pages summaries (these in turn are + * arrays!) + * @param array $words keywords (typically what was searched on) + * @return array summaries which have been snippified and bold faced */ function formatPageResults($results, $words = NULL) { @@ -117,7 +119,8 @@ class Model implements CrawlConstants $page[self::TITLE] = strip_tags($page[self::TITLE]); if(strlen($page[self::TITLE]) == 0 ) { - $offset = min(mb_strlen($page[self::DESCRIPTION]), TITLE_LENGTH); + $offset = + min(mb_strlen($page[self::DESCRIPTION]), TITLE_LENGTH); $end_title = mb_strpos($page[self::DESCRIPTION], " ", $offset); $ellipsis = ""; if($end_title > TITLE_LENGTH) { @@ -126,19 +129,28 @@ class Model implements CrawlConstants $end_title = MAX_TITLE_LENGTH; } } - $page[self::TITLE] = substr(strip_tags($page[self::DESCRIPTION]), 0, $end_title).$ellipsis; + $page[self::TITLE] = + substr(strip_tags($page[self::DESCRIPTION]), 0, $end_title). + $ellipsis; } if($words != NULL) { - $page[self::TITLE] = $this->boldKeywords($page[self::TITLE], $words); - $page[self::DESCRIPTION] = substr(strip_tags($page[self::DESCRIPTION]), 0, DESCRIPTION_LENGTH); + $page[self::TITLE] = + $this->boldKeywords($page[self::TITLE], $words); + $page[self::DESCRIPTION] = + substr(strip_tags( + $page[self::DESCRIPTION]), 0, DESCRIPTION_LENGTH); - $page[self::DESCRIPTION] = $this->getSnippets($page[self::DESCRIPTION], $words); - $page[self::DESCRIPTION] = $this->boldKeywords($page[self::DESCRIPTION], $words); + $page[self::DESCRIPTION] = + $this->getSnippets($page[self::DESCRIPTION], $words); + $page[self::DESCRIPTION] = + $this->boldKeywords($page[self::DESCRIPTION], $words); } else { - $page[self::DESCRIPTION] = substr(strip_tags($page[self::DESCRIPTION]), 0, DESCRIPTION_LENGTH); + $page[self::DESCRIPTION] = + substr(strip_tags( + $page[self::DESCRIPTION]), 0, DESCRIPTION_LENGTH); } $page[self::SCORE] = substr($page[self::SCORE], 0, SCORE_PRECISION); @@ -156,10 +168,11 @@ class Model implements CrawlConstants /** - * Given a string, extracts a snippets of text related to a given set of key words. - * For a given word a snippet is a window of characters to its left and right that - * is less than a maximum total number of characters. There is also a rule that - * a snippet should avoid ending in the middle of a word + * Given a string, extracts a snippets of text related to a given set of + * key words. For a given word a snippet is a window of characters to its + * left and right that is less than a maximum total number of characters. + * There is also a rule that a snippet should avoid ending in the middle of + * a word * * @param string $text haystack to extract snippet from * @param array $words keywords used to make look in haystack @@ -189,22 +202,25 @@ class Model implements CrawlConstants foreach($word_locations as $pos => $word) { - $pre_low = ($pos >= SNIPPET_LENGTH_LEFT) ? $pos - SNIPPET_LENGTH_LEFT: 0; + $pre_low = ($pos >= SNIPPET_LENGTH_LEFT) ? + $pos - SNIPPET_LENGTH_LEFT: 0; if(!($low = mb_strpos($text, " ", $pre_low))) { $low = $pre_low; } - $pre_high = ($pos + SNIPPET_LENGTH_RIGHT <= $len ) ? $pos + SNIPPET_LENGTH_RIGHT: $len; + $pre_high = ($pos + SNIPPET_LENGTH_RIGHT <= $len ) ? + $pos + SNIPPET_LENGTH_RIGHT: $len; if(!($high = mb_strpos($text, " ", $pre_high))) { $high = $pre_high; } if( strlen($snippet_string) < DESCRIPTION_LENGTH) { - $snippet_string .= $ellipsis.mb_substr($text, $low, $high - $low); + $snippet_string .= + $ellipsis.mb_substr($text, $low, $high - $low); $ellipsis = "..."; } } - } while( strlen($snippet_string) < DESCRIPTION_LENGTH && $offset < $len) ; + } while(strlen($snippet_string) < DESCRIPTION_LENGTH && $offset < $len); if(strlen($snippet_string) < MIN_SNIPPET_LENGTH) { $snippet_string = $text; @@ -236,7 +252,7 @@ class Model implements CrawlConstants } /** - * Gets a list of all DBMS that work with the search engine + * Gets a list of all DBMS that work with the search engine * * @return array Names of availabledatasources */ @@ -246,7 +262,10 @@ class Model implements CrawlConstants $data_managers = glob(BASE_DIR.'/models/datasources/*_manager.php'); foreach($data_managers as $data_manager) { - $dbms = substr($data_manager, strlen(BASE_DIR.'/models/datasources/'), -strlen("_manager.php")); + $dbms = + substr($data_manager, + strlen(BASE_DIR.'/models/datasources/'), - + strlen("_manager.php")); if($dbms != 'datasource') { $list[] = $dbms; } @@ -256,10 +275,11 @@ class Model implements CrawlConstants } /** - * Returns whether the provided dbms needs a login and password or not (sqlite or sqlite3) + * Returns whether the provided dbms needs a login and password or not + * (sqlite or sqlite3) * - * @param string $dbms the name of a database management system - * @return bool true if needs a login and password; false otherwise + * @param string $dbms the name of a database management system + * @return bool true if needs a login and password; false otherwise */ function loginDbms($dbms) { diff --git a/models/phrase_model.php b/models/phrase_model.php index 581cf10c0..e2225d2f5 100755 --- a/models/phrase_model.php +++ b/models/phrase_model.php @@ -33,11 +33,17 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} -/** logging is done during crawl not through web, so will not be used in the phrase model */ +/** + * logging is done during crawl not through web, + * so it will not be used in the phrase model + */ define("LOG_TO_FILES", false); /** For crawlHash function */ require_once BASE_DIR."/lib/utility.php"; -/** Used to look up words and phrases in the inverted index associated with a given crawl*/ +/** + * Used to look up words and phrases in the inverted index + * associated with a given crawl + */ require_once BASE_DIR."/lib/index_archive_bundle.php"; /** @@ -68,21 +74,25 @@ class PhraseModel extends Model /** - * Given a query phrase, returns formatted document summaries of the documents that match the phrase. + * Given a query phrase, returns formatted document summaries of the + * documents that match the phrase. * - * @param string $phrase the phrase to try to match - * @param int $low return results beginning with the $low document - * @param int $results_per_page how many results to return - * @param bool $format whether to highlight in the returned summaries the matched text - * - * @return array an array of summary data + * @param string $phrase the phrase to try to match + * @param int $low return results beginning with the $low document + * @param int $results_per_page how many results to return + * @param bool $format whether to highlight in the returned summaries the + * matched text + * @return array an array of summary data */ - function getPhrasePageResults($phrase, $low = 0, $results_per_page = NUM_RESULTS_PER_PAGE, $format = true) + function getPhrasePageResults( + $phrase, $low = 0, $results_per_page = NUM_RESULTS_PER_PAGE, + $format = true) { $index_archive_name = self::index_data_base_name . $this->index_name; - $index_archive = new IndexArchiveBundle(CRAWL_DIR.'/cache/'.$index_archive_name); + $index_archive = new IndexArchiveBundle( + CRAWL_DIR.'/cache/'.$index_archive_name); $results = NULL; @@ -91,29 +101,38 @@ class PhraseModel extends Model $phrase_hash = crawlHash($phrase_string); - //we search using the stemmed words, but we format snippets in the results by bolding either + /* + we search using the stemmed words, but we format snippets in the + results by bolding either + */ $query_words = explode(" ", $phrase_string); //not stemmed - $words = array_keys(PhraseParser::extractPhrasesAndCount($phrase_string)); //stemmed + $words = + array_keys(PhraseParser::extractPhrasesAndCount($phrase_string)); + //stemmed if($index_archive->getPhraseIndexInfo($phrase_hash) != NULL) { - $results = $index_archive->getSummariesByHash($phrase_hash, $low, $results_per_page); + $results = $index_archive->getSummariesByHash( + $phrase_hash, $low, $results_per_page); if(count($results) == 0) { $results = NULL; } } else { - //handle strings in quotes (we want an exact match on such quoted strings) - + /* + handle strings in quotes + (we want an exact match on such quoted strings) + */ $quoteds =array(); $hash_quoteds = array(); - $num_quotes = preg_match_all('/\"((?:[^\"\\\]|\\\\.)*)\"/', $phrase, $quoteds); + $num_quotes = + preg_match_all('/\"((?:[^\"\\\]|\\\\.)*)\"/', $phrase,$quoteds); if(isset($quoteds[1])) { $quoteds = $quoteds[1]; foreach($quoteds as $quote_phrase) { $hash_quote = crawlHash($quote_phrase); - if($index_archive->getPhraseIndexInfo($hash_quote) != NULL) { + if($index_archive->getPhraseIndexInfo($hash_quote) != NULL){ $hash_quoteds[] = $hash_quote; } } @@ -139,7 +158,9 @@ class PhraseModel extends Model $word_key = $word_keys[0]; $count = $words_array[$word_key]; if($count > 0 ) { - $results = $index_archive->getSummariesByHash($word_key, $low, $results_per_page, $restrict_phrases, $phrase_hash); + $results = $index_archive->getSummariesByHash( + $word_key, $low, $results_per_page, + $restrict_phrases, $phrase_hash); } } @@ -162,24 +183,27 @@ class PhraseModel extends Model /** - * Given a page summary extract the words from it and try to find documents which - * match the most relevant words. The algorithm for "relevant" is pretty weak. For - * now we pick the $num many words which appear in the fewest documents. - * - * @param string $craw_item a page summary - * @param int $num number of key phrase to return + * Given a page summary extract the words from it and try to find documents + * which match the most relevant words. The algorithm for "relevant" is + * pretty weak. For now we pick the $num many words which appear in the + * fewest documents. * - * @return array an array of most selective key phrases + * @param string $craw_item a page summary + * @param int $num number of key phrase to return + * @return array an array of most selective key phrases */ function getTopPhrases($crawl_item, $num) { $index_archive_name = self::index_data_base_name . $this->index_name; - $index_archive = new IndexArchiveBundle(CRAWL_DIR.'/cache/'.$index_archive_name); + $index_archive = + new IndexArchiveBundle(CRAWL_DIR.'/cache/'.$index_archive_name); - $phrase_string = PhraseParser::extractWordStringPageSummary($crawl_item); + $phrase_string = + PhraseParser::extractWordStringPageSummary($crawl_item); - $words = array_keys(PhraseParser::extractPhrasesAndCount($phrase_string)); + $words = + array_keys(PhraseParser::extractPhrasesAndCount($phrase_string)); $hashes = array(); $lookup = array(); @@ -189,7 +213,8 @@ class PhraseModel extends Model $lookup[$tmp] = $word; } - $words_array = $index_archive->getSelectiveWords($hashes, $num, "greaterThan"); + $words_array = + $index_archive->getSelectiveWords($hashes, $num, "greaterThan"); $word_keys = array_keys($words_array); $phrases = array(); diff --git a/models/profile_model.php b/models/profile_model.php index 36a115464..b0ad6b50a 100644 --- a/models/profile_model.php +++ b/models/profile_model.php @@ -50,7 +50,8 @@ require_once(BASE_DIR.'/lib/url_parser.php'); class ProfileModel extends Model { var $profile_fields = array('USER_AGENT_SHORT', - 'DEFAULT_LOCALE', 'DEBUG_LEVEL', 'DBMS','DB_URL', 'DB_NAME', 'DB_USER', 'DB_PASSWORD', + 'DEFAULT_LOCALE', 'DEBUG_LEVEL', 'DBMS','DB_URL', + 'DB_NAME', 'DB_USER', 'DB_PASSWORD', 'QUEUE_SERVER', 'AUTH_KEY', "ROBOT_DESCRIPTION", 'WEB_URI'); /** * {@inheritdoc} @@ -61,9 +62,11 @@ class ProfileModel extends Model } /** - * Creates a folder to be used to maintain local information about this instance of the Yioop/SeekQuarry engin + * Creates a folder to be used to maintain local information about this + * instance of the Yioop/SeekQuarry engin * - * Creates the directory provides as well as subdirectories for crawls, locales, logging, and sqlite DBs. + * Creates the directory provides as well as subdirectories for crawls, + * locales, logging, and sqlite DBs. * * @param string $directory parth and name of directory to create */ @@ -71,7 +74,8 @@ class ProfileModel extends Model { $to_make_dirs = array($directory, "$directory/locale", - "$directory/cache", "$directory/schedules", "$directory/log", "$directory/data"); + "$directory/cache", "$directory/schedules", + "$directory/log", "$directory/data"); $dir_status = array(); foreach($to_make_dirs as $dir) { $dir_status[$dir] = $this->createIfNecessaryDirectory($dir); @@ -89,16 +93,23 @@ class ProfileModel extends Model } /** - * Outputs a profile.php file in the given directory containing profile data based on new and old data sources + * Outputs a profile.php file in the given directory containing profile + * data based on new and old data sources * - * This function creates a profile.php file if it doesn't exist. A given field is output in the profile - * according to the precedence that a new value is preferred to an old value is prefered to the value that - * comes from a currently defined constant. It might be the case that a new value for a given field doesn't exist, etc. + * This function creates a profile.php file if it doesn't exist. A given + * field is output in the profile + * according to the precedence that a new value is preferred to an old + * value is prefered to the value that comes from a currently defined + * constant. It might be the case that a new value for a given field + * doesn't exist, etc. * - * @param string $directory the work directory to output the profile.php file - * @param array $new_profile_data fields and values containing at least some profile information (only $this->profile_fields - * fields of $new_profile_data will be considered). - * @param array $old_profile_data fields and values that come from preseumably a previously existing profile + * @param string $directory the work directory to output the profile.php + * file + * @param array $new_profile_data fields and values containing at least + * some profile information (only $this->profile_fields + * fields of $new_profile_data will be considered). + * @param array $old_profile_data fields and values that come from + * presumably a previously existing profile */ function updateProfile($directory, $new_profile_data, $old_profile_data) { @@ -164,10 +175,12 @@ EOT; } $out = implode("\n", $n); if(file_put_contents("$directory/profile.php", $out) !== false) { - chmod("$directory/profile.php", 0777); + @chmod("$directory/profile.php", 0777); if(isset($new_profile_data['ROBOT_DESCRIPTION'])) { - file_put_contents("$directory/bot.txt", $new_profile_data['ROBOT_DESCRIPTION']); - chmod("$directory/bot.txt", 0777); + file_put_contents( + "$directory/bot.txt", + $new_profile_data['ROBOT_DESCRIPTION']); + @chmod("$directory/bot.txt", 0777); } return true; } @@ -176,10 +189,11 @@ EOT; } /** - * Creates a directory and sets it to owrld prermission if it doesn't aleady exist + * Creates a directory and sets it to owrld prermission if it doesn't + * aleady exist * - * @param string $directory name of directory to create - * @return int -1 on failure, 0 if already existed, 1 if created + * @param string $directory name of directory to create + * @return int -1 on failure, 0 if already existed, 1 if created */ function createIfNecessaryDirectory($directory) { @@ -207,20 +221,32 @@ EOT; $auto_increment = "AUTO_INCREMENT"; } if(in_array($dbinfo['DBMS'], array("sqlite"))) { - $auto_increment = ""; //in sqlite2 a primary key column will act as auto_increment if don't give value + $auto_increment = ""; + /* in sqlite2 a primary key column will act + as auto_increment if don't give value + */ } - $tables = array("USER", "TRANSLATION", "LOCALE", "TRANSLATION_LOCALE", "ROLE", + $tables = array("USER", "TRANSLATION", + "LOCALE", "TRANSLATION_LOCALE", "ROLE", "ROLE_ACTIVITY", "ACTIVITY", "USER_ROLE", "CURRENT_WEB_INDEX"); $create_statements = array( - "CREATE TABLE USER( USER_ID INTEGER PRIMARY KEY $auto_increment, USER_NAME VARCHAR(16) UNIQUE, PASSWORD VARCHAR(16))", - "CREATE TABLE TRANSLATION (TRANSLATION_ID INTEGER PRIMARY KEY $auto_increment, IDENTIFIER_STRING VARCHAR(512) UNIQUE)", - "CREATE TABLE LOCALE (LOCALE_ID INTEGER PRIMARY KEY $auto_increment, LOCALE_TAG VARCHAR(16), LOCALE_NAME VARCHAR(256)," . + "CREATE TABLE USER( USER_ID INTEGER PRIMARY KEY $auto_increment, ". + "USER_NAME VARCHAR(16) UNIQUE, PASSWORD VARCHAR(16))", + "CREATE TABLE TRANSLATION (TRANSLATION_ID INTEGER PRIMARY KEY ". + "$auto_increment, IDENTIFIER_STRING VARCHAR(512) UNIQUE)", + "CREATE TABLE LOCALE(LOCALE_ID INTEGER PRIMARY KEY ". + "$auto_increment, LOCALE_TAG VARCHAR(16), ". + "LOCALE_NAME VARCHAR(256)," . "WRITING_MODE CHAR(5))", - "CREATE TABLE TRANSLATION_LOCALE (TRANSLATION_ID INTEGER, LOCALE_ID INTEGER, TRANSLATION VARCHAR(4096) )", - "CREATE TABLE ROLE (ROLE_ID INTEGER PRIMARY KEY $auto_increment, NAME VARCHAR(512))", + "CREATE TABLE TRANSLATION_LOCALE (TRANSLATION_ID INTEGER, ". + "LOCALE_ID INTEGER, TRANSLATION VARCHAR(4096) )", + "CREATE TABLE ROLE (ROLE_ID INTEGER PRIMARY KEY $auto_increment, ". + "NAME VARCHAR(512))", "CREATE TABLE ROLE_ACTIVITY (ROLE_ID INTEGER, ACTIVITY_ID INTEGER)", - "CREATE TABLE ACTIVITY (ACTIVITY_ID INTEGER PRIMARY KEY $auto_increment, TRANSLATION_ID INTEGER, METHOD_NAME VARCHAR(256))", + "CREATE TABLE ACTIVITY (ACTIVITY_ID INTEGER PRIMARY KEY ". + "$auto_increment, TRANSLATION_ID INTEGER, ". + "METHOD_NAME VARCHAR(256))", "CREATE TABLE USER_ROLE (USER_ID INTEGER, ROLE_ID INTEGER)", "CREATE TABLE CURRENT_WEB_INDEX (CRAWL_TIME INT(11) )"); foreach($create_statements as $statement) { @@ -230,10 +256,13 @@ EOT; require_once(BASE_DIR."/models/datasources/sqlite3_manager.php"); $default_dbm = new Sqlite3Manager(); - $default_dbm->dbhandle = new SQLite3(BASE_DIR."/data/default.db", SQLITE3_OPEN_READWRITE); // a little bit hacky + $default_dbm->dbhandle = new SQLite3( + BASE_DIR."/data/default.db", SQLITE3_OPEN_READWRITE); + // a little bit hacky if(!$default_dbm->dbhandle) {return false;} foreach($tables as $table) { - if(!$this->copyTable($table, $default_dbm, $test_dbm)) {return false;} + if(!$this->copyTable($table, $default_dbm, $test_dbm)) + {return false;} } return true; } @@ -246,15 +275,19 @@ EOT; if(!isset($dbinfo['DBMS'])) {return false;} // check if can establish a connect to dbms - require_once(BASE_DIR."/models/datasources/".$dbinfo['DBMS']."_manager.php"); + require_once( + BASE_DIR."/models/datasources/".$dbinfo['DBMS']."_manager.php"); $dbms_manager = ucfirst($dbinfo['DBMS'])."Manager"; $test_dbm = new $dbms_manager(); if(isset($dbinfo['DB_URL'])) { if(isset($dbinfo['DB_USER'])) { if(isset($dbinfo['DB_PASSWORD'])) { - $conn = @$test_dbm->connect($dbinfo['DB_URL'], $dbinfo['DB_USER'], $dbinfo['DB_PASSWORD']); + $conn = @$test_dbm->connect( + $dbinfo['DB_URL'], + $dbinfo['DB_USER'], $dbinfo['DB_PASSWORD']); } else { - $conn = @$test_dbm->connect($dbinfo['DB_URL'], $dbinfo['DB_USER']); + $conn = @$test_dbm->connect( + $dbinfo['DB_URL'], $dbinfo['DB_USER']); } } else { $conn = @$test_dbm->connect($dbinfo['DB_URL']); @@ -272,7 +305,9 @@ EOT; } } - //check if need to create db contents. We check if any locale exists if not create db + /* check if need to create db contents. + We check if any locale exists if not create db + */ $sql = "SELECT LOCALE_ID FROM LOCALE"; $result = @$test_dbm->execute($sql); @@ -311,12 +346,13 @@ EOT; function setWorkDirectoryConfigFile($directory) { $config = file_get_contents(BASE_DIR."/configs/config.php"); - $start_machine_section = strpos($config, '/*+++ The next block of code is machine edited'); + $start_machine_section = strpos($config,'/*+++ The next block of code'); if($start_machine_section === false) return false; $end_machine_section = strpos($config, '/*++++++*/'); if($end_machine_section === false) return false; $out = substr($config, 0, $start_machine_section + 1); - $out .= "/*+++ The next block of code is machine edited, change at your own risk, please use configure web page instead +++*/\n"; + $out .= "/*+++ The next block of code is machine edited, change at ". + "your own risk, please use configure web page instead +++*/\n"; $out .= "define('WORK_DIRECTORY', '$directory');\n"; $out .= substr($config, $end_machine_section); if(file_put_contents(BASE_DIR."/configs/config.php", $out)) return true; @@ -338,7 +374,8 @@ EOT; } if(file_exists($work_directory."/bot.txt")) { - $profile['ROBOT_DESCRIPTION'] = file_get_contents($work_directory."/bot.txt"); + $profile['ROBOT_DESCRIPTION'] = + file_get_contents($work_directory."/bot.txt"); } return $profile; @@ -349,7 +386,8 @@ EOT; */ function matchDefine($defined, $string) { - preg_match("/define\((?:\"$defined\"|\'$defined\')\,([^\)]*)\)/", $string, $match); + preg_match("/define\((?:\"$defined\"|\'$defined\')\,([^\)]*)\)/", + $string, $match); $match = (isset($match[1])) ? trim($match[1]) : ""; $len = strlen($match); if( $len >=2 && ($match[0] == '"' || $match[0] == "'")) { diff --git a/models/role_model.php b/models/role_model.php index bcd496612..896a6f284 100644 --- a/models/role_model.php +++ b/models/role_model.php @@ -73,15 +73,20 @@ class RoleModel extends Model $activities = array(); $locale_tag = getLocaleTag(); - $sql = "SELECT LOCALE_ID FROM LOCALE WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; + $sql = "SELECT LOCALE_ID FROM LOCALE ". + "WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; $result = $this->db->execute($sql); $row = $this->db->fetchArray($result); $locale_id = $row['LOCALE_ID']; - $sql = "SELECT R.ROLE_ID AS ROLE_ID, RA.ACTIVITY_ID AS ACTIVITY_ID, A.METHOD_NAME AS METHOD_NAME, ". - " T.IDENTIFIER_STRING AS IDENTIFIER_STRING, T.TRANSLATION_ID AS TRANSLATION_ID FROM ". - " ROLE R, ROLE_ACTIVITY RA, ACTIVITY A, TRANSLATION T WHERE R.ROLE_ID = '$role_id' AND". - " R.ROLE_ID = RA.ROLE_ID AND T.TRANSLATION_ID = A.TRANSLATION_ID AND RA.ACTIVITY_ID = A.ACTIVITY_ID"; + $sql = "SELECT R.ROLE_ID AS ROLE_ID, RA.ACTIVITY_ID AS ACTIVITY_ID, ". + "A.METHOD_NAME AS METHOD_NAME, ". + "T.IDENTIFIER_STRING AS IDENTIFIER_STRING, ". + "T.TRANSLATION_ID AS TRANSLATION_ID FROM ". + "ROLE R, ROLE_ACTIVITY RA, ACTIVITY A, TRANSLATION T ". + "WHERE R.ROLE_ID = '$role_id' AND ". + "R.ROLE_ID = RA.ROLE_ID AND T.TRANSLATION_ID = A.TRANSLATION_ID ". + "AND RA.ACTIVITY_ID = A.ACTIVITY_ID"; $result = $this->db->execute($sql); @@ -89,8 +94,10 @@ class RoleModel extends Model while($activities[$i] = $this->db->fetchArray($result)) { $id = $activities[$i]['TRANSLATION_ID']; - $sub_sql = "SELECT TRANSLATION AS ACTIVITY_NAME FROM TRANSLATION_LOCALE ". - " WHERE TRANSLATION_ID=$id AND LOCALE_ID=$locale_id LIMIT 1"; // maybe do left join at some point + $sub_sql = "SELECT TRANSLATION AS ACTIVITY_NAME ". + "FROM TRANSLATION_LOCALE ". + "WHERE TRANSLATION_ID=$id AND LOCALE_ID=$locale_id LIMIT 1"; + // maybe do left join at some point $result_sub = $this->db->execute($sub_sql); $translate = $this->db->fetchArray($result_sub); @@ -98,7 +105,8 @@ class RoleModel extends Model if($translate) { $activities[$i]['ACTIVITY_NAME'] = $translate['ACTIVITY_NAME']; } else { - $activities[$i]['ACTIVITY_NAME'] = $activities['IDENTIFIER_STRING']; + $activities[$i]['ACTIVITY_NAME'] = + $activities['IDENTIFIER_STRING']; } $i++; } @@ -139,10 +147,10 @@ class RoleModel extends Model /** - * Get the role id associated with a rolename (so rolenames better be unique) + * Get role id associated with rolename (so rolenames better be unique) * - * @param string $rolename to use to look up a role_id - * @return string role_id corresponding to the rolename. + * @param string $rolename to use to look up a role_id + * @return string role_id corresponding to the rolename. */ function getRoleId($rolename) { @@ -167,7 +175,8 @@ class RoleModel extends Model function addRole($rolename) { $this->db->selectDB(DB_NAME); - $sql = "INSERT INTO ROLE (NAME) VALUES ('".$this->db->escapeString($rolename)."')"; + $sql = "INSERT INTO ROLE (NAME) VALUES ('". + $this->db->escapeString($rolename)."')"; $this->db->execute($sql); } @@ -182,7 +191,9 @@ class RoleModel extends Model function addActivityRole($roleid, $activityid) { $this->db->selectDB(DB_NAME); - $sql = "INSERT INTO ROLE_ACTIVITY VALUES ('".$this->db->escapeString($roleid)."', '".$this->db->escapeString($activityid)."')"; + $sql = "INSERT INTO ROLE_ACTIVITY VALUES ('". + $this->db->escapeString($roleid)."', '". + $this->db->escapeString($activityid)."')"; $this->db->execute($sql); } @@ -199,7 +210,8 @@ class RoleModel extends Model $sql = "DELETE FROM ROLE_ACTIVITY WHERE ROLE_ID='$roleid'"; $this->db->execute($sql); - $sql = "DELETE FROM ROLE WHERE ROLE_ID='".$this->db->escapeString($roleid)."'"; + $sql = "DELETE FROM ROLE WHERE ROLE_ID='". + $this->db->escapeString($roleid)."'"; $this->db->execute($sql); } @@ -213,7 +225,9 @@ class RoleModel extends Model function deleteActivityRole($roleid, $activityid) { $this->db->selectDB(DB_NAME); - $sql = "DELETE FROM ROLE_ACTIVITY WHERE ROLE_ID='".$this->db->escapeString($roleid)."' AND ACTIVITY_ID='".$this->db->escapeString($activityid)."'"; + $sql = "DELETE FROM ROLE_ACTIVITY WHERE ROLE_ID='". + $this->db->escapeString($roleid)."' AND ACTIVITY_ID='". + $this->db->escapeString($activityid)."'"; $this->db->execute($sql); } diff --git a/models/signin_model.php b/models/signin_model.php index 3e84cc89c..f0793f8d0 100755 --- a/models/signin_model.php +++ b/models/signin_model.php @@ -61,11 +61,12 @@ class SigninModel extends Model /** - * Checks that a username password pair is valid + * Checks that a username password pair is valid * - * @param string $username the username to check - * @param string $password the password to check - * @return bool where the password is that of the given user (or at least hashes to the same thing) + * @param string $username the username to check + * @param string $password the password to check + * @return bool where the password is that of the given user + * (or at least hashes to the same thing) */ function checkValidSignin($username, $password) { @@ -74,12 +75,14 @@ class SigninModel extends Model $username = $this->db->escapeString($username); $password = $this->db->escapeString($password); - $sql = "SELECT USER_NAME, PASSWORD FROM USER WHERE USER_NAME = '$username' LIMIT 1"; + $sql = "SELECT USER_NAME, PASSWORD FROM USER ". + "WHERE USER_NAME = '$username' LIMIT 1"; $result = $this->db->execute($sql); $row = $this->db->fetchArray($result); - return ($username == $row['USER_NAME'] && crawlCrypt($password, $row['PASSWORD']) == $row['PASSWORD']) ; + return ($username == $row['USER_NAME'] && + crawlCrypt($password, $row['PASSWORD']) == $row['PASSWORD']) ; } @@ -136,7 +139,8 @@ class SigninModel extends Model $username = $this->db->escapeString($username); $password = $this->db->escapeString($password); - $sql = "UPDATE USER SET PASSWORD='".crawlCrypt($password)."' WHERE USER_NAME = '$username' "; + $sql = "UPDATE USER SET PASSWORD='". + crawlCrypt($password)."' WHERE USER_NAME = '$username' "; $result = $this->db->execute($sql); return $result != false; diff --git a/models/user_model.php b/models/user_model.php index a24d7915b..5c1b7e569 100755 --- a/models/user_model.php +++ b/models/user_model.php @@ -75,15 +75,19 @@ class UserModel extends Model $activities = array(); $locale_tag = getLocaleTag(); - $sql = "SELECT LOCALE_ID FROM LOCALE WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; + $sql = "SELECT LOCALE_ID FROM LOCALE ". + "WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; $result = $this->db->execute($sql); $row = $this->db->fetchArray($result); $locale_id = $row['LOCALE_ID']; - $sql = "SELECT UR.ROLE_ID AS ROLE_ID, RA.ACTIVITY_ID AS ACTIVITY_ID, T.TRANSLATION_ID AS TRANSLATION_ID, A.METHOD_NAME AS METHOD_NAME, ". + $sql = "SELECT UR.ROLE_ID AS ROLE_ID, RA.ACTIVITY_ID AS ACTIVITY_ID, ". + "T.TRANSLATION_ID AS TRANSLATION_ID, A.METHOD_NAME AS METHOD_NAME,". " T.IDENTIFIER_STRING AS IDENTIFIER_STRING FROM ACTIVITY A, ". - " USER_ROLE UR, ROLE_ACTIVITY RA, TRANSLATION T WHERE UR.USER_ID = '$user_id' ". - " AND UR.ROLE_ID = RA.ROLE_ID AND T.TRANSLATION_ID = A.TRANSLATION_ID AND RA.ACTIVITY_ID = A.ACTIVITY_ID"; + " USER_ROLE UR, ROLE_ACTIVITY RA, TRANSLATION T ". + "WHERE UR.USER_ID = '$user_id' ". + "AND UR.ROLE_ID=RA.ROLE_ID AND T.TRANSLATION_ID=A.TRANSLATION_ID ". + "AND RA.ACTIVITY_ID = A.ACTIVITY_ID"; $result = $this->db->execute($sql); $i = 0; @@ -91,8 +95,11 @@ class UserModel extends Model $id = $activities[$i]['TRANSLATION_ID']; - $sub_sql = "SELECT TRANSLATION AS ACTIVITY_NAME FROM TRANSLATION_LOCALE ". - " WHERE TRANSLATION_ID=$id AND LOCALE_ID=$locale_id LIMIT 1"; // maybe do left join at some point + $sub_sql = "SELECT TRANSLATION AS ACTIVITY_NAME ". + "FROM TRANSLATION_LOCALE ". + "WHERE TRANSLATION_ID=$id AND ". + "LOCALE_ID=$locale_id LIMIT 1"; + // maybe do left join at some point $result_sub = $this->db->execute($sub_sql); $translate = $this->db->fetchArray($result_sub); @@ -100,7 +107,8 @@ class UserModel extends Model if($translate) { $activities[$i]['ACTIVITY_NAME'] = $translate['ACTIVITY_NAME']; } else { - $activities[$i]['ACTIVITY_NAME'] = $activities[$i]['IDENTIFIER_STRING']; + $activities[$i]['ACTIVITY_NAME'] = + $activities[$i]['IDENTIFIER_STRING']; } $i++; } @@ -126,7 +134,8 @@ class UserModel extends Model $roles = array(); $locale_tag = getLocaleTag(); - $sql = "SELECT LOCALE_ID FROM LOCALE WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; + $sql = "SELECT LOCALE_ID FROM LOCALE ". + "WHERE LOCALE_TAG = '$locale_tag' LIMIT 1"; $result = $this->db->execute($sql); $row = $this->db->fetchArray($result); $locale_id = $row['LOCALE_ID']; @@ -168,56 +177,65 @@ class UserModel extends Model /** - * Add a user with a given username and password to the list of users that can login to the admin panel + * Add a user with a given username and password to the list of users + * that can login to the admin panel * - * @param string $username the username of the user to be added - * @param string $password the password of the user to be added + * @param string $username the username of the user to be added + * @param string $password the password of the user to be added */ function addUser($username, $password) { $this->db->selectDB(DB_NAME); - $sql = "INSERT INTO USER(USER_NAME, PASSWORD) VALUES ('".$this->db->escapeString($username)."', '".crawlCrypt($this->db->escapeString($password))."' ) "; + $sql = "INSERT INTO USER(USER_NAME, PASSWORD) VALUES ('". + $this->db->escapeString($username)."', '". + crawlCrypt($this->db->escapeString($password))."' ) "; $result = $this->db->execute($sql); } /** - * Deletes a user by username from the list of users that can login to the admin panel + * Deletes a user by username from the list of users that can login to + * the admin panel * - * @param string $username the login name of the user to delete + * @param string $username the login name of the user to delete */ function deleteUser($username) { $this->db->selectDB(DB_NAME); - $sql = "DELETE FROM USER WHERE USER_NAME='".$this->db->escapeString($username)."'"; + $sql = "DELETE FROM USER WHERE USER_NAME='". + $this->db->escapeString($username)."'"; $result = $this->db->execute($sql); } /** - * Adds a role to a given user + * Adds a role to a given user * - * @param string $userid the id of the user to add the role to - * @param string $roleid the id of the role to add + * @param string $userid the id of the user to add the role to + * @param string $roleid the id of the role to add */ function addUserRole($userid, $roleid) { $this->db->selectDB(DB_NAME); - $sql = "INSERT INTO USER_ROLE VALUES ('".$this->db->escapeString($userid)."', '".$this->db->escapeString($roleid)."' ) "; + $sql = "INSERT INTO USER_ROLE VALUES ('". + $this->db->escapeString($userid)."', '". + $this->db->escapeString($roleid)."' ) "; $result = $this->db->execute($sql); } /** - * Deletes a role from a given user + * Deletes a role from a given user * - * @param string $userid the id of the user to delete the role from - * @param string $roleid the id of the role to delete + * @param string $userid the id of the user to delete the role from + * @param string $roleid the id of the role to delete */ function deleteUserRole($userid, $roleid) { $this->db->selectDB(DB_NAME); - $sql = "DELETE FROM USER_ROLE WHERE USER_ID='".$this->db->escapeString($userid)."' AND ROLE_ID='".$this->db->escapeString($roleid)."'"; + $sql = "DELETE FROM USER_ROLE WHERE USER_ID='". + $this->db->escapeString($userid)."' AND ROLE_ID='". + $this->db->escapeString($roleid)."'"; $result = $this->db->execute($sql); } } diff --git a/scripts/basic.js b/scripts/basic.js index f098698b9..a90666c41 100755 --- a/scripts/basic.js +++ b/scripts/basic.js @@ -87,7 +87,7 @@ function makeRequest() } /* - Make an AJAX request for a url and put the results as the inner HTML of a tag + Make an AJAX request for a url and put the results as inner HTML of a tag Object tag a DOM element to put the results of the AJAX request String url web page to fetch using AJAX diff --git a/tests/bloom_filter_file_test.php b/tests/bloom_filter_file_test.php index 1ed5a9fd0..46b92dbd8 100755 --- a/tests/bloom_filter_file_test.php +++ b/tests/bloom_filter_file_test.php @@ -39,9 +39,9 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} require_once BASE_DIR."/lib/bloom_filter_file.php"; /** - * Used to test that the BloomFilterFile class provides the basic functionality of - * a persistent set. I.e., we can insert things into it, and we can do membership - * testing + * Used to test that the BloomFilterFile class provides the basic functionality + * of a persistent set. I.e., we can insert things into it, and we can do + * membership testing * * @author Chris Pollett * @package seek_quarry @@ -50,7 +50,8 @@ require_once BASE_DIR."/lib/bloom_filter_file.php"; class BloomFilterFileTest extends UnitTest { /** - * Set up a bloom filter that can store up to 10 items and that saves itself every 100 writes + * Set up a bloom filter that can store up to 10 items and that saves + * itself every 100 writes */ public function setUp() { @@ -58,8 +59,8 @@ class BloomFilterFileTest extends UnitTest } /** - * Since a BloomFilterFile is a PersistentStructure it periodically saves itself to a file. - * To clean up we delete the files that might be created + * Since a BloomFilterFile is a PersistentStructure it periodically saves + * itself to a file. To clean up we delete the files that might be created */ public function tearDown() { @@ -67,28 +68,33 @@ class BloomFilterFileTest extends UnitTest } /** - * Tests that if nothing is in the bloom filter yet, that if we do a lookup we don't find anything + * Tests that if nothing is in the bloom filter yet, that if we do a lookup + * we don't find anything */ public function notInTestCase() { - $this->assertFalse($this->test_objects['FILE1']->contains(66), "File 1 contains 66"); + $this->assertFalse( + $this->test_objects['FILE1']->contains(66), "File 1 contains 66"); } /** - * Tests if we insert something into the bloom filter, that when we look it up, we find it. On the other - * hand, if we look something else up that we didn't insert, we shouldn't find it + * Tests if we insert something into the bloom filter, that when we look it + * up, we find it. On the other hand, if we look something else up that we + * didn't insert, we shouldn't find it * */ public function inTestCase() { $this->test_objects['FILE1']->add(77); - $this->assertTrue($this->test_objects['FILE1']->contains(77), "File 1 contains 77"); - $this->assertFalse($this->test_objects['FILE1']->contains(66), "File 1 contains 66"); + $this->assertTrue( + $this->test_objects['FILE1']->contains(77), "File 1 contains 77"); + $this->assertFalse( + $this->test_objects['FILE1']->contains(66), "File 1 contains 66"); } /** - * Check that if we force save the bloom filter file and then we reload it back in that - * it has the same Contents + * Check that if we force save the bloom filter file and then we reload it + * back in that it has the same Contents * */ public function saveLoadTestCase() @@ -98,8 +104,10 @@ class BloomFilterFileTest extends UnitTest $this->test_objects['FILE1'] = NULL; $this->test_objects['FILE2'] = BloomFilterFile::load("test.ftr"); - $this->assertTrue($this->test_objects['FILE2']->contains(77), "File 2 contains 77"); - $this->assertFalse($this->test_objects['FILE2']->contains(66), "File 2 contains 66"); + $this->assertTrue( + $this->test_objects['FILE2']->contains(77), "File 2 contains 77"); + $this->assertFalse( + $this->test_objects['FILE2']->contains(66), "File 2 contains 66"); } } diff --git a/tests/hash_table_test.php b/tests/hash_table_test.php index 14d36c79c..ef3845e79 100755 --- a/tests/hash_table_test.php +++ b/tests/hash_table_test.php @@ -54,9 +54,9 @@ require_once BASE_DIR.'/lib/utility.php'; class HashTableTest extends UnitTest { /** - * We'll use two different tables one more representative of how the table is going to - * be used by the web_queue_bundle, the other small enough that we can manually figure - * out what the result should be + * We'll use two different tables one more representative of how the table + * is going to be used by the web_queue_bundle, the other small enough that + * we can manually figure out what the result should be */ public function setUp() { @@ -65,8 +65,8 @@ class HashTableTest extends UnitTest } /** - * Since a HashTable is a PersistentStructure it periodically saves itself to a file. - * To clean up we delete the files that might be created + * Since a HashTable is a PersistentStructure it periodically saves + * itself to a file. To clean up we delete the files that might be created */ public function tearDown() { @@ -75,86 +75,134 @@ class HashTableTest extends UnitTest } /** - * Check if for the big hash table we insert something then later look it up, that we in fact find it. - * Moreover, the value we associated with the insert key is as expected + * Check if for the big hash table we insert something then later look it + * up, that we in fact find it. Moreover, the value we associated with the + * insert key is as expected */ public function insertLookupTestCase() { - $this->assertTrue($this->test_objects['FILE1']->insert(crawlHash("http://www.cs.sjsu.edu/",true), pack("H*","0000147700000000")), + $this->assertTrue( + $this->test_objects['FILE1']->insert( + crawlHash("http://www.cs.sjsu.edu/",true), + pack("H*","0000147700000000")), "Insert (hash(URL), value) succeeded"); - $this->assertEqual($this->test_objects['FILE1']->lookup(crawlHash("http://www.cs.sjsu.edu/",true)), pack("H*","0000147700000000"), + $this->assertEqual( + $this->test_objects['FILE1']->lookup( + crawlHash("http://www.cs.sjsu.edu/",true)), + pack("H*","0000147700000000"), "Lookup value equals insert value"); } /** - * Checks insert an item, delete that item, then look it up. Make sure we don't find it after deletion. + * Checks insert an item, delete that item, then look it up. Make sure we + * don't find it after deletion. */ public function insertDeleteLookupTestCase() { - $this->assertTrue($this->test_objects['FILE1']->insert(crawlHash("http://www.cs.sjsu.edu/",true), pack("H*","0000147700000000")), + $this->assertTrue( + $this->test_objects['FILE1']->insert( + crawlHash("http://www.cs.sjsu.edu/",true), + pack("H*","0000147700000000")), "Insert (crawlHash(URL), value) succeeded"); - $this->assertTrue($this->test_objects['FILE1']->delete(crawlHash("http://www.cs.sjsu.edu/", true)), + $this->assertTrue( + $this->test_objects['FILE1']->delete( + crawlHash("http://www.cs.sjsu.edu/", true)), "delete crawlHash(URL) succeeded"); - $this->assertFalse($this->test_objects['FILE1']->lookup(crawlHash("http://www.cs.sjsu.edu/", true)), + $this->assertFalse( + $this->test_objects['FILE1']->lookup( + crawlHash("http://www.cs.sjsu.edu/", true)), "delete crawlHash(URL) succeeded"); } /** - * Completety fill table. Next insert should fail. Then delete all the items. Then check that we can't find any of them + * Completety fill table. Next insert should fail. Then delete all the + * items. Then check that we can't find any of them */ public function completeFillTestCase() { for ($i = 0; $i < 10; $i++) { - $this->assertTrue($this->test_objects['FILE2']->insert(crawlHash("hi$i", true), "$i"), + $this->assertTrue( + $this->test_objects['FILE2']->insert( + crawlHash("hi$i", true), "$i"), "Insert item ".($i+1)." into table of size 10"); } - $this->assertFalse($this->test_objects['FILE2']->insert(crawlHash("hi11",true), "a"), "Insert item 11 into table of size 10"); + $this->assertFalse( + $this->test_objects['FILE2']->insert( + crawlHash("hi11",true), "a"), + "Insert item 11 into table of size 10"); for ($i = 0; $i < 10; $i++) { - $this->assertEqual($this->test_objects['FILE2']->lookup(crawlHash("hi$i",true)), "$i", + $this->assertEqual( + $this->test_objects['FILE2']->lookup( + crawlHash("hi$i",true)), "$i", "Inserted value ".($i+1)." equals lookup value"); } - $this->assertFalse($this->test_objects['FILE2']->lookup(crawlHash("hi11",true)), "a", "Item 11's value should not be in table"); + $this->assertFalse( + $this->test_objects['FILE2']->lookup( + crawlHash("hi11",true)), "a", + "Item 11's value should not be in table"); for ($i = 0; $i < 10; $i++) { - $this->assertTrue($this->test_objects['FILE2']->delete(crawlHash("hi$i",true)), + $this->assertTrue( + $this->test_objects['FILE2']->delete(crawlHash("hi$i",true)), "Item ".($i+1)." delete success"); } for ($i = 0; $i < 11; $i++) { - $this->assertFalse($this->test_objects['FILE2']->lookup(crawlHash("hi$i",true)), + $this->assertFalse( + $this->test_objects['FILE2']->lookup(crawlHash("hi$i",true)), "Should not find Item ".($i+1)." after delete"); } } /** - * First check that inserting an item twice does not change its index in the table. Then inserts an - * item which should hash to the same value. So there is a collision which is resolved by linear offset. - * Check lookup of new item succeeds.Then delete first insert, check lookup of second insert still works. - * Check delete of second item, reinsert of first item and lookup. Index should change + * First check that inserting an item twice does not change its index in + * the table. Then inserts an item which should hash to the same value. So + * there is a collision which is resolved by linear offset. Check lookup of + * new item succeeds.Then delete first insert, check lookup of second insert + * still works. Check delete of second item, reinsert of first item and + * lookup. Index should change */ public function reinsertCollisionAndIndexTestCase() { $this->test_objects['FILE2']->insert(crawlHash("hi7",true), "7"); - $index = $this->test_objects['FILE2']->lookup(crawlHash("hi7",true),true); + $index = + $this->test_objects['FILE2']->lookup(crawlHash("hi7",true),true); $this->test_objects['FILE2']->insert(crawlHash("hi7",true), "z"); - $this->assertTrue($this->test_objects['FILE2']->lookup(crawlHash("hi7",true)), "z", "Reinsert Item hi7 overwrites old value"); - - $index2 = $this->test_objects['FILE2']->lookup(crawlHash("hi7",true),true); - $this->assertEqual($index, $index2, "Index of reinserted should not change"); - - $this->assertTrue($this->test_objects['FILE2']->insert(crawlHash("hi4",true), "8"), "Item hi4 which collides with hi7 insert okay"); - $this->assertTrue($this->test_objects['FILE2']->lookup(crawlHash("hi4",true), true), $index2 + 1, "Item hi4 located one after hi7"); + $this->assertTrue( + $this->test_objects['FILE2']->lookup( + crawlHash("hi7",true)), + "z", "Reinsert Item hi7 overwrites old value"); + + $index2 = + $this->test_objects['FILE2']->lookup(crawlHash("hi7",true),true); + $this->assertEqual( + $index, $index2, "Index of reinserted should not change"); + + $this->assertTrue( + $this->test_objects['FILE2']->insert(crawlHash("hi4",true), "8"), + "Item hi4 which collides with hi7 insert okay"); + $this->assertTrue( + $this->test_objects['FILE2']->lookup( + crawlHash("hi4",true), true), $index2 + 1, + "Item hi4 located one after hi7"); $this->test_objects['FILE2']->delete(crawlHash("hi7",true), true); - $this->assertTrue($this->test_objects['FILE2']->lookup(crawlHash("hi4",true), true), $index2 + 1, + $this->assertTrue( + $this->test_objects['FILE2']->lookup( + crawlHash("hi4",true), true), $index2 + 1, "Item hi4 looked up succeed after hi7 deleted"); $this->test_objects['FILE2']->delete(crawlHash("hi4",true), true); $this->test_objects['FILE2']->insert(crawlHash("hi7",true), "7"); - $this->assertEqual($this->test_objects['FILE2']->lookup(crawlHash("hi7",true)), "7", "Reinserted Item hi7 lookup succeeds"); - $this->assertEqual($this->test_objects['FILE2']->lookup(crawlHash("hi7",true),true), $index2 + 2, + $this->assertEqual( + $this->test_objects['FILE2']->lookup( + crawlHash("hi7",true)), "7", + "Reinserted Item hi7 lookup succeeds"); + $this->assertEqual( + $this->test_objects['FILE2']->lookup( + crawlHash("hi7",true),true), $index2 + 2, "New Item hi7 location does not overwrite deleted itemss"); } diff --git a/tests/index.php b/tests/index.php index f49f40133..1ea3c119b 100644 --- a/tests/index.php +++ b/tests/index.php @@ -34,7 +34,8 @@ * @filesource */ -define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD'].$_SERVER["SCRIPT_NAME"], 0, +define("BASE_DIR", substr($_SERVER['DOCUMENT_ROOT'].$_SERVER['PWD']. + $_SERVER["SCRIPT_NAME"], 0, -strlen("tests/index.php"))); /** Load search engine wide configuration file */ @@ -54,7 +55,8 @@ if(!PROFILE || !DISPLAY_TESTS) {echo "BAD REQUEST"; exit();} <meta name="Author" content="Christopher Pollett" /> - <meta name="description" content="Displays unit tests for search engine" /> + <meta name="description" + content="Displays unit tests for search engine" /> <meta charset="utf-8" /> <link rel="shortcut icon" href="../favicon.ico" /> <style type="text/css"> @@ -81,8 +83,10 @@ require_once BASE_DIR."/lib/unit_test.php"; define("LOG_TO_FILES", false); -$allowed_activities = array("listTests", "runAllTests", "runTestBasedOnRequest"); -if(isset($_REQUEST['activity']) && in_array($_REQUEST['activity'], $allowed_activities)) { +$allowed_activities = + array("listTests", "runAllTests", "runTestBasedOnRequest"); +if(isset($_REQUEST['activity']) && + in_array($_REQUEST['activity'], $allowed_activities)) { $activity = $_REQUEST['activity']; } else { $activity = "listTests"; @@ -94,7 +98,8 @@ $activity(); /** - * This function is responsible for listing out HTML links to the available unit tests a user can run + * This function is responsible for listing out HTML links to the available + * unit tests a user can run */ function listTests() { @@ -106,7 +111,8 @@ function listTests() <?php foreach($names as $name) { $stem = substr($name, 0, strlen($name) - strlen("_test.php")); - echo "<li><a href='?activity=runTestBasedOnRequest&test=$stem'>".getClassNameFromFileName($name)."</a></li>"; + echo "<li><a href='?activity=runTestBasedOnRequest&test=$stem'>". + getClassNameFromFileName($name)."</a></li>"; } ?> </ul> @@ -129,8 +135,9 @@ function runAllTests() } /** - * Run the single unit test whose name is given in $_REQUEST['test'] and display the results. - * If the unit test file was blah_test.php, then $_REQUEST['test'] should be blah. + * Run the single unit test whose name is given in $_REQUEST['test'] and + * display the results. If the unit test file was blah_test.php, then + * $_REQUEST['test'] should be blah. */ function runTestBasedOnRequest() { @@ -145,9 +152,10 @@ function runTestBasedOnRequest() } /** - * Uses $name to load a unit test class, run the tests in it and display the results + * Uses $name to load a unit test class, run the tests in it and display the + * results * - * @param string $name the name of a unit test file in the current directory + * @param string $name the name of a unit test file in the current directory */ function runTest($name) { @@ -194,11 +202,11 @@ function runTest($name) } /** - * Gets the names of all the unit test files in the current directory. - * Doesn't really check for this explicitly, just checks if the file - * end with _test.php + * Gets the names of all the unit test files in the current directory. + * Doesn't really check for this explicitly, just checks if the file + * end with _test.php * - * @return array an array of unit test files + * @return array an array of unit test files */ function getTestNames() { @@ -207,12 +215,13 @@ function getTestNames() } /** - * Convert the convention for unit test file names into our convention - * for unit test class names + * Convert the convention for unit test file names into our convention + * for unit test class names * - * @param string $name a file name with words separated by underscores, ending in .php + * @param string $name a file name with words separated by underscores, ending + * in .php * - * @return string a camel-cased name ending with Test + * @return string a camel-cased name ending with Test */ function getClassNameFromFileName($name) { diff --git a/tests/priority_queue_test.php b/tests/priority_queue_test.php index 321183a81..6f0d61898 100755 --- a/tests/priority_queue_test.php +++ b/tests/priority_queue_test.php @@ -40,25 +40,30 @@ require_once BASE_DIR.'/configs/config.php'; require_once BASE_DIR."/lib/priority_queue.php"; /** - * Used to test the PriorityQueue class that is used to figure out which URL to crawl next + * Used to test the PriorityQueue class that is used to figure out which URL + * to crawl next * - * @author Chris Pollett - * @package seek_quarry - * @subpackage test + * @author Chris Pollett + * @package seek_quarry + * @subpackage test */ class PriorityQueueTest extends UnitTest { /** - * We setup two queue one that always returns the max element, one that always returns the min element + * We setup two queue one that always returns the max element, one that + * always returns the min element */ public function setUp() { - $this->test_objects['FILE1'] = new PriorityQueue("queue1.txt", 100, 4, CrawlConstants::MAX); - $this->test_objects['FILE2'] = new PriorityQueue("queue2.txt", 100, 4, CrawlConstants::MIN); + $this->test_objects['FILE1'] = + new PriorityQueue("queue1.txt", 100, 4, CrawlConstants::MAX); + $this->test_objects['FILE2'] = + new PriorityQueue("queue2.txt", 100, 4, CrawlConstants::MIN); } /** - * Since our queues are persistent structures, we delete files that might be associated with them when we tear down + * Since our queues are persistent structures, we delete files that might be + * associated with them when we tear down */ public function tearDown() { @@ -67,9 +72,11 @@ class PriorityQueueTest extends UnitTest } /** - * Insert five items into a priority queue. Checks that the resulting heap array matches the expected array - * calculated by hand. Weights of some elements of the queue are adjusted and the resulting heap array checked - * again. The the results of polling the queue and normalizing the queue are tested + * Insert five items into a priority queue. Checks that the resulting heap + * array matches the expected array calculated by hand. Weights of some + * elements of the queue are adjusted and the resulting heap array checked + * again. The results of polling the queue and normalizing the queue are + * tested */ public function maxQueueTestCase() { @@ -78,25 +85,31 @@ class PriorityQueueTest extends UnitTest $this->test_objects['FILE1']->insert("caaa", 4.5); $this->test_objects['FILE1']->insert("daaa", 5.0); $this->test_objects['FILE1']->insert("eaaa", 7.5); - $expected_array = array(array("eaaa", 7.5), array("baaa", 6.5), array("caaa", 4.5), - array("daaa", 5.0), array("aaaa", 5.5)); - $this->assertEqual($this->test_objects['FILE1']->getContents(), $expected_array, "Insert into queue yields expected contents"); + $expected_array = array(array("eaaa", 7.5), array("baaa", 6.5), + array("caaa", 4.5), array("daaa", 5.0), array("aaaa", 5.5)); + $this->assertEqual( + $this->test_objects['FILE1']->getContents(), $expected_array, + "Insert into queue yields expected contents"); $this->test_objects['FILE1']->adjustWeight(3, 4.0); - $expected_array = array(array("caaa", 8.5), array("baaa", 6.5), array("eaaa", 7.5), - array("daaa", 5.0), array("aaaa", 5.5)); - $this->assertEqual($this->test_objects['FILE1']->getContents(), $expected_array, "Adjust elt weight yields expected contents"); + $expected_array = array(array("caaa", 8.5), array("baaa", 6.5), + array("eaaa", 7.5), array("daaa", 5.0), array("aaaa", 5.5)); + $this->assertEqual( + $this->test_objects['FILE1']->getContents(), $expected_array, + "Adjust elt weight yields expected contents"); $this->test_objects['FILE1']->normalize(); $queue_data = $this->test_objects['FILE1']->getContents(); $sum = 0; $count = count($queue_data); for($i = 0; $i < $count; $i++) { - $this->assertEqual($queue_data[$i][0], $expected_array[$i][0], "key of $i th elt of queue unchanged by normalize"); + $this->assertEqual($queue_data[$i][0], $expected_array[$i][0], + "key of $i th elt of queue unchanged by normalize"); $sum += $queue_data[$i][1]; } - $this->assertEqual(round($sum), NUM_URLS_QUEUE_RAM, "Normalizations yields correct sum"); + $this->assertEqual(round($sum), NUM_URLS_QUEUE_RAM, + "Normalizations yields correct sum"); $elt = $this->test_objects['FILE1']->poll(); @@ -111,12 +124,15 @@ class PriorityQueueTest extends UnitTest $elt = $this->test_objects['FILE1']->poll(); $this->test_objects['FILE1']->normalize(); $expected_array = array(array("daaa", NUM_URLS_QUEUE_RAM)); - $this->assertEqual($this->test_objects['FILE1']->getContents(), $expected_array, "Queue after deletes has expected content"); + $this->assertEqual( + $this->test_objects['FILE1']->getContents(), $expected_array, + "Queue after deletes has expected content"); } /** - * Inserts five elements inserted into a minimum priority queue. The resulting heap array is compared to expected. - * Then repeated polling is done to make sure the objects come out in the correct order. + * Inserts five elements inserted into a minimum priority queue. The + * resulting heap array is compared to expected. Then repeated polling is + * done to make sure the objects come out in the correct order. */ public function minQueueTestCase() { @@ -126,9 +142,11 @@ class PriorityQueueTest extends UnitTest $this->test_objects['FILE2']->insert("daaa", 5.0); $this->test_objects['FILE2']->insert("eaaa", 7.5); - $expected_array = array(array("caaa", 4.5), array("daaa", 5.0), array("aaaa", 5.5), - array("baaa", 6.5), array("eaaa", 7.5)); - $this->assertEqual($this->test_objects['FILE2']->getContents(), $expected_array, "Queue has expected order after initial inserts"); + $expected_array = array(array("caaa", 4.5), array("daaa", 5.0), + array("aaaa", 5.5), array("baaa", 6.5), array("eaaa", 7.5)); + $this->assertEqual( + $this->test_objects['FILE2']->getContents(), $expected_array, + "Queue has expected order after initial inserts"); $elt = $this->test_objects['FILE2']->poll(); $this->assertEqual($elt[0], "caaa", "Remove caaa from queue okay"); @@ -145,7 +163,9 @@ class PriorityQueueTest extends UnitTest $elt = $this->test_objects['FILE2']->poll(); $this->assertEqual($elt[0], "eaaa", "Remove eaaa from queue okay"); - $this->assertEqual($this->test_objects['FILE2']->getContents(), array(), "Queue should be empty after deletes"); + $this->assertEqual( + $this->test_objects['FILE2']->getContents(), + array(), "Queue should be empty after deletes"); } diff --git a/tests/socket_experiment.php b/tests/socket_experiment.php index ffbf243e7..14c89bc66 100755 --- a/tests/socket_experiment.php +++ b/tests/socket_experiment.php @@ -22,7 +22,9 @@ * * END LICENSE * - * Test to see how many sockets system can open. On *nix systems, by doing ulimit -n, you can find this out, + * Test to see how many sockets system can open. On *nix systems, by doing + * ulimit -n + * you can find this out, * but the number doesn't exactly agree. * * On Macs you can change this value by editing /etc/launchd.conf diff --git a/tests/web_archive_test.php b/tests/web_archive_test.php index 722e17e28..7500009b3 100755 --- a/tests/web_archive_test.php +++ b/tests/web_archive_test.php @@ -46,9 +46,9 @@ require_once BASE_DIR."/lib/web_archive.php"; require_once BASE_DIR."/lib/gzip_compressor.php"; /** - * UnitTest for the WebArchive class. A web archive is used to store array-based - * objects persistently to a file. This class tests storing and retreiving from - * such an archive. + * UnitTest for the WebArchive class. A web archive is used to store + * array-based objects persistently to a file. This class tests storing and + * retreiving from such an archive. * * @author Chris Pollett * @package seek_quarry @@ -61,7 +61,8 @@ class WebArchiveTest extends UnitTest */ public function setUp() { - $this->test_objects['FILE1'] = new WebArchive("ar1.sqwa", new GzipCompressor()); + $this->test_objects['FILE1'] = + new WebArchive("ar1.sqwa", new GzipCompressor()); } /** @@ -73,66 +74,86 @@ class WebArchiveTest extends UnitTest } /** - * Inserts three objects into a web archive. To look up an object in a web archive - * we need to know its byte offset into the archive file. This test looks that - * after the inserts we get back an array of byte offsets and that the byte offsets - * are of increasing size + * Inserts three objects into a web archive. To look up an object in a web + * archive we need to know its byte offset into the archive file. This test + * looks that after the inserts we get back an array of byte offsets and + * that the byte offsets are of increasing size */ public function addObjectTestCase() { - $items = array(array("hello"), array("how are you"), array("good thanks")); + $items = + array(array("hello"), array("how are you"), array("good thanks")); $objects = $this->test_objects['FILE1']->addObjects("offset", $items); - $this->assertTrue(isset($objects[0]['offset']), "First insert got offset into file"); - $this->assertTrue(isset($objects[1]['offset']), "Second insert got offset into file"); - $this->assertTrue(isset($objects[2]['offset']), "Third insert got offset into file"); - $offset_flag = $objects[0]['offset'] === 0 && $objects[0]['offset'] < $objects[1]['offset'] && + $this->assertTrue( + isset($objects[0]['offset']), "First insert got offset into file"); + $this->assertTrue( + isset($objects[1]['offset']), "Second insert got offset into file"); + $this->assertTrue( + isset($objects[2]['offset']), "Third insert got offset into file"); + $offset_flag = $objects[0]['offset'] === 0 && + $objects[0]['offset'] < $objects[1]['offset'] && $objects[1]['offset'] < $objects[2]['offset']; - $this->assertTrue($offset_flag, "First offset into archive is zero and later ones are strictly increasing"); + $this->assertTrue($offset_flag, + "First offset into archive is zero and ". + "later ones are strictly increasing"); } /** - * Does two addObjects of three objects each. Then does a getObjects to get six object using offset 0 - * into the web archive. This should return the six objects just inserted - * + * Does two addObjects of three objects each. Then does a getObjects to get + * six object using offset 0 into the web archive. This should return the + * six objects just inserted */ public function getObjectTestCase() { - $items = array(array("hello"), array("how are you"), array("good thanks")); - $more_items = array(array("he3llo"), array("how4 are you"), array("good5 thanks")); + $items = + array(array("hello"), array("how are you"), array("good thanks")); + $more_items = + array(array("he3llo"),array("how4 are you"), array("good5 thanks")); $objects = $this->test_objects['FILE1']->addObjects("offset", $items); - $new_objects = $this->test_objects['FILE1']->addObjects("offset", $more_items); + $new_objects = + $this->test_objects['FILE1']->addObjects("offset", $more_items); $all_items = array_merge($items, $more_items); $retrieved_items = $this->test_objects['FILE1']->getObjects(0,6); $retrieved_count = count($retrieved_items); - $this->assertEqual($retrieved_count, 6, "number of items retrieved is what asked for"); + $this->assertEqual( + $retrieved_count, 6, "number of items retrieved is what asked for"); for($i = 0; $i < $retrieved_count; $i++) { - $this->assertEqual($retrieved_items[$i][1][0], $all_items[$i][0], "object $i retrieved correctly"); + $this->assertEqual( + $retrieved_items[$i][1][0], $all_items[$i][0], + "object $i retrieved correctly"); } } /** - * If the file associated with a web archive already exists when the constructor is called, - * then the constructor will load the existing web archive. This test case checks this functionality - * by adding six items to a web archive, then constructing a new WebArchive object using the same - * file name and seeing if we can read the objects that were just inserted. + * If the file associated with a web archive already exists when the + * constructor is called, then the constructor will load the existing web + * archive. This test case checks this functionality by adding six items to + * a web archive, then constructing a new WebArchive object using the same + * file name and seeing if we can read the objects that were just inserted. * */ public function reloadArchiveTestCase() { - $items = array(array("hello"), array("how are you"), array("good thanks")); - $more_items = array(array("he3llo"), array("how4 are you"), array("good5 thanks")); + $items = + array(array("hello"), array("how are you"), array("good thanks")); + $more_items = + array(array("he3llo"),array("how4 are you"), array("good5 thanks")); $objects = $this->test_objects['FILE1']->addObjects("offset", $items); - $new_objects = $this->test_objects['FILE1']->addObjects("offset", $more_items); - - $this->test_objects['REF_FILE1'] = new WebArchive("ar1.sqwa", new GzipCompressor()); - $this->assertEqual($this->test_objects['REF_FILE1']->count, 6, "Archive count is equal to number of items inserted"); + $new_objects = + $this->test_objects['FILE1']->addObjects("offset", $more_items); + + $this->test_objects['REF_FILE1'] = + new WebArchive("ar1.sqwa", new GzipCompressor()); + $this->assertEqual( + $this->test_objects['REF_FILE1']->count, 6, + "Archive count is equal to number of items inserted"); } } ?> diff --git a/tests/web_queue_bundle_test.php b/tests/web_queue_bundle_test.php index 7a37daf6a..8d45cde9a 100644 --- a/tests/web_queue_bundle_test.php +++ b/tests/web_queue_bundle_test.php @@ -57,8 +57,8 @@ class WebQueueBundleTest extends UnitTest var $db; /** - * Sets up a miminal DBMS manager class so that we will be able to use unlinkRecursive to - * tear down own WebQueueBundle + * Sets up a miminal DBMS manager class so that we will be able to use + * unlinkRecursive to tear down own WebQueueBundle */ public function __construct() { @@ -66,12 +66,14 @@ class WebQueueBundleTest extends UnitTest $this->db = new $db_class(); } /** - * Set up a web queue bundle that can store 1000 urls in ram, has bloom filter space for 1000 urls - * and which uses a maximum value returning priority queue. + * Set up a web queue bundle that can store 1000 urls in ram, has bloom + * filter space for 1000 urls and which uses a maximum value returning + * priority queue. */ public function setUp() { - $this->test_objects['FILE1'] = new WebQueueBundle("QueueTest", 1000, 1000, CrawlConstants::MAX); + $this->test_objects['FILE1'] = + new WebQueueBundle("QueueTest", 1000, 1000, CrawlConstants::MAX); } /** @@ -83,15 +85,18 @@ class WebQueueBundleTest extends UnitTest } /** - * Does two adds to the WebQueueBundle of urls and weight. Then checks the contents of the queue - * to see if as expected. Then does a rebuild on the hash table of the queue and checks that the contents - * have not changed. + * Does two adds to the WebQueueBundle of urls and weight. Then checks the + * contents of the queue to see if as expected. Then does a rebuild on the + * hash table of the queue and checks that the contents have not changed. */ public function addQueueTestCase() { - $urls1 = array(array("http://www.pollett.com/", 10), array("http://www.ucanbuyart.com/", 15)); + $urls1 = array(array("http://www.pollett.com/", 10), + array("http://www.ucanbuyart.com/", 15)); $this->test_objects['FILE1']->addUrlsQueue($urls1); - $urls2 = array(array("http://www.yahoo.com/", 2), array("http://www.google.com/", 20), array("http://www.slashdot.org/", 3)); + $urls2 = array(array("http://www.yahoo.com/", 2), + array("http://www.google.com/", 20), + array("http://www.slashdot.org/", 3)); $this->test_objects['FILE1']->addUrlsQueue($urls2); $expected_array = array(array('http://www.google.com/', 20), @@ -102,10 +107,14 @@ class WebQueueBundleTest extends UnitTest ); - $this->assertEqual($this->test_objects['FILE1']->getContents(), $expected_array, "Insert Queue matches predicted"); + $this->assertEqual( + $this->test_objects['FILE1']->getContents(), $expected_array, + "Insert Queue matches predicted"); $this->test_objects['FILE1']->rebuildUrlTable(); - $this->assertEqual($this->test_objects['FILE1']->getContents(), $expected_array, "Rebuild table should not affect contents"); + $this->assertEqual( + $this->test_objects['FILE1']->getContents(), $expected_array, + "Rebuild table should not affect contents"); } diff --git a/tutorials/seek_quarry/.svn/all-wcprops b/tutorials/seek_quarry/.svn/all-wcprops deleted file mode 100644 index 92630e3fe..000000000 --- a/tutorials/seek_quarry/.svn/all-wcprops +++ /dev/null @@ -1,17 +0,0 @@ -K 25 -svn:wc:ra_dav:version-url -V 49 -/svn/!svn/ver/1/yioop/trunk/tutorials/seek_quarry -END -seek_quarry.pkg.ini -K 25 -svn:wc:ra_dav:version-url -V 69 -/svn/!svn/ver/1/yioop/trunk/tutorials/seek_quarry/seek_quarry.pkg.ini -END -manual.pkg -K 25 -svn:wc:ra_dav:version-url -V 60 -/svn/!svn/ver/1/yioop/trunk/tutorials/seek_quarry/manual.pkg -END diff --git a/tutorials/seek_quarry/.svn/entries b/tutorials/seek_quarry/.svn/entries deleted file mode 100644 index 71b9d4070..000000000 --- a/tutorials/seek_quarry/.svn/entries +++ /dev/null @@ -1,96 +0,0 @@ -10 - -dir -1 -https://www.pollett.org/svn/yioop/trunk/tutorials/seek_quarry -https://www.pollett.org/svn - - - -2010-07-08T17:43:01.223212Z -1 - - - - - - - - - - - - - - - -66785b46-dfa8-4d64-b9ca-93ed5e9ae41d - -seek_quarry.pkg.ini -file - - - - -2010-06-17T00:34:20.000000Z -e487b0ef571c1086c3437c649cfe3ebb -2010-07-08T17:43:01.223212Z -1 - - - - - - - - - - - - - - - - - - - - - - -29 - -manual.pkg -file - - - - -2010-06-10T04:26:15.000000Z -d298df67bf1264979eeb328c2ca5f586 -2010-07-08T17:43:01.223212Z -1 - - - - - - - - - - - - - - - - - - - - - - -510 - diff --git a/tutorials/seek_quarry/.svn/text-base/manual.pkg.svn-base b/tutorials/seek_quarry/.svn/text-base/manual.pkg.svn-base deleted file mode 100644 index 4d79680b7..000000000 --- a/tutorials/seek_quarry/.svn/text-base/manual.pkg.svn-base +++ /dev/null @@ -1,29 +0,0 @@ - <refentry id="{@id}"> - - <refnamediv> - - <refname>Manual</refname> - - <refpurpose>Understanding The SeekQuarry Search Engine</refpurpose> - - </refnamediv> - - <refsynopsisdiv> - - <author> - - Chris Pollett - - <authorblurb> - - {@link mailto:chris@pollett.org} - - </authorblurb> - </author> - </refsynopsisdiv> - - <refsect1 id="{@id intro}"> - <para>Hello World</para> - </refsect1> -</refentry> - diff --git a/tutorials/seek_quarry/.svn/text-base/seek_quarry.pkg.ini.svn-base b/tutorials/seek_quarry/.svn/text-base/seek_quarry.pkg.ini.svn-base deleted file mode 100644 index 8c14725d4..000000000 --- a/tutorials/seek_quarry/.svn/text-base/seek_quarry.pkg.ini.svn-base +++ /dev/null @@ -1,2 +0,0 @@ - [Linked Tutorials] -manual diff --git a/tutorials/seek_quarry/manual.pkg b/tutorials/seek_quarry/manual.pkg deleted file mode 100644 index 4d79680b7..000000000 --- a/tutorials/seek_quarry/manual.pkg +++ /dev/null @@ -1,29 +0,0 @@ - <refentry id="{@id}"> - - <refnamediv> - - <refname>Manual</refname> - - <refpurpose>Understanding The SeekQuarry Search Engine</refpurpose> - - </refnamediv> - - <refsynopsisdiv> - - <author> - - Chris Pollett - - <authorblurb> - - {@link mailto:chris@pollett.org} - - </authorblurb> - </author> - </refsynopsisdiv> - - <refsect1 id="{@id intro}"> - <para>Hello World</para> - </refsect1> -</refentry> - diff --git a/tutorials/seek_quarry/seek_quarry.pkg.ini b/tutorials/seek_quarry/seek_quarry.pkg.ini deleted file mode 100644 index 8c14725d4..000000000 --- a/tutorials/seek_quarry/seek_quarry.pkg.ini +++ /dev/null @@ -1,2 +0,0 @@ - [Linked Tutorials] -manual diff --git a/views/admin_view.php b/views/admin_view.php index a2df5f096..c501a80eb 100755 --- a/views/admin_view.php +++ b/views/admin_view.php @@ -48,12 +48,13 @@ class AdminView extends View * @var string */ var $layout = "web"; - /** An array of names of element objects that the view uses to display itself + /** Names of element objects that the view uses to display itself * @var array */ - var $elements = array("language", "activity", "signin", "managecrawl", "manageaccount", - "manageusers", "manageroles", "managelocales", "editlocales", "crawloptions", "configure"); - /** An array of names of helper objects that the view uses to help draw itself + var $elements = array("language", "activity", "signin", + "managecrawl", "manageaccount", "manageusers", "manageroles", + "managelocales", "editlocales", "crawloptions", "configure"); + /** Names of helper objects that the view uses to help draw itself * @var array */ var $helpers = array('options'); @@ -62,7 +63,7 @@ class AdminView extends View * Renders the list of admin activities and draws the current activity * Renders the Javascript to autologout after an hour * - * @param array $data what is contained in this array depend on the current + * @param array $data what is contained in this array depend on the current * admin activity. The $data['ELEMENT'] says which activity to render */ public function renderView($data) { @@ -71,8 +72,10 @@ class AdminView extends View } ?> - <h1 class="logo"><a href="."><img src="resources/yioop.png" alt="Yioop!" /></a><span> - <?php - e(tl('admin_view_admin')); e(' ['.$data['CURRENT_ACTIVITY'].']')?></span></h1> + <h1 class="logo"><a href="."><img + src="resources/yioop.png" alt="Yioop!" /></a><span> - <?php + e(tl('admin_view_admin')); + e(' ['.$data['CURRENT_ACTIVITY'].']')?></span></h1> <?php $this->activityElement->render($data); @@ -89,7 +92,9 @@ class AdminView extends View */ function logoutWarn() { - doMessage("<h2 class='red'><?php e(tl('adminview_auto_logout_one_minute'))?></h2>"); + doMessage( + "<h2 class='red'><?php + e(tl('adminview_auto_logout_one_minute'))?></h2>"); } /* Javscript to perform autologout diff --git a/views/crawlstatus_view.php b/views/crawlstatus_view.php index dcca34fa7..b7a92e888 100755 --- a/views/crawlstatus_view.php +++ b/views/crawlstatus_view.php @@ -48,13 +48,14 @@ class CrawlstatusView extends View /** * An Ajax call from the Manage Crawl Element in Admin View triggers * this view to be instantiated. The renderView method then draws statistics - * about the currently active crawl. The $data is supplied by the crawlStatus + * about the currently active crawl.The $data is supplied by the crawlStatus * method of the AdminController. * * @param array $data info about the current crawl status */ public function renderView($data) { - $base_url = "?c=admin&a=manageCrawl&YIOOP_TOKEN=".$data['YIOOP_TOKEN']."&arg="; + $base_url = "?c=admin&a=manageCrawl&YIOOP_TOKEN=". + $data['YIOOP_TOKEN']."&arg="; ?> <h2><?php e(tl('crawlstatus_view_currently_processing')); ?></h2> @@ -62,7 +63,10 @@ class CrawlstatusView extends View if(isset($data['DESCRIPTION'])) { e($data['DESCRIPTION']); ?> - <button class="buttonbox" type="button" onclick="javascript:document.location = '<?php e($base_url); ?>stop'" ><?php e(tl('managecrawl_element_stop_crawl'))?></button> + <button class="buttonbox" type="button" + onclick="javascript:document.location = '<?php + e($base_url); ?>stop'" ><?php + e(tl('managecrawl_element_stop_crawl'))?></button> <?php } else { e(tl('crawlstatus_view_no_description')); @@ -70,9 +74,12 @@ class CrawlstatusView extends View ?></p> <p><b><?php e(tl('crawlstatus_view_time_started')); ?></b> <?php - if(isset($data['CRAWL_TIME'])) { e(date("r",$data['CRAWL_TIME'])); } else {e(tl('crawlstatus_view_no_crawl_time'));} ?></p> + if(isset($data['CRAWL_TIME'])) { e(date("r",$data['CRAWL_TIME'])); } + else {e(tl('crawlstatus_view_no_crawl_time'));} ?></p> - <p><b><?php e(tl('crawlstatus_view_total_urls')); ?></b> <?php if(isset($data['COUNT'])) { e($data['COUNT']); } else {e("0");} ?></p> + <p><b><?php e(tl('crawlstatus_view_total_urls')); ?></b> <?php + if(isset($data['COUNT'])) { e($data['COUNT']); } else {e("0");} + ?></p> <p><b><?php e(tl('crawlstatus_view_most_recent_fetcher')); ?></b> <?php @@ -84,7 +91,8 @@ class CrawlstatusView extends View ?></p> <h2><?php e(tl('crawlstatus_view_most_recent_urls')); ?></h2> <?php - if(isset($data['MOST_RECENT_URLS_SEEN']) && count($data['MOST_RECENT_URLS_SEEN']) > 0) { + if(isset($data['MOST_RECENT_URLS_SEEN']) && + count($data['MOST_RECENT_URLS_SEEN']) > 0) { foreach($data['MOST_RECENT_URLS_SEEN'] as $url) { e("<p>$url</p>"); } @@ -99,19 +107,25 @@ class CrawlstatusView extends View ?> <table class="crawlstable"> - <tr><th><?php e(tl('crawlstatus_view_description')); ?></th><th><?php e(tl('crawlstatus_view_time_started')); ?></th> + <tr><th><?php e(tl('crawlstatus_view_description'));?></th><th><?php + e(tl('crawlstatus_view_time_started')); ?></th> <th><?php e(tl('crawlstatus_view_total_urls'));?></th> <th colspan="3"><?php e(tl('crawlstatus_view_actions'));?></th></tr> <?php foreach($data['RECENT_CRAWLS'] as $crawl) { ?> - <tr><td><b><?php e($crawl['DESCRIPTION']); ?></b></td><td> <?php e(date("r", $crawl['CRAWL_TIME'])); ?></td> + <tr><td><b><?php e($crawl['DESCRIPTION']); ?></b></td><td> <?php + e(date("r", $crawl['CRAWL_TIME'])); ?></td> <td> <?php e( $crawl['COUNT']); ?></td> - <td><a href="<?php e($base_url); ?>resume×tamp=<?php e($crawl['CRAWL_TIME']); ?>"><?php e(tl('crawlstatus_view_resume'));?></a></td> + <td><a href="<?php e($base_url); ?>resume×tamp=<?php + e($crawl['CRAWL_TIME']); ?>"><?php + e(tl('crawlstatus_view_resume'));?></a></td> <td> <?php if( $crawl['CRAWL_TIME'] != $data['CURRENT_INDEX']) { ?> - <a href="<?php e($base_url); ?>index×tamp=<?php e($crawl['CRAWL_TIME']); ?>"><?php e(tl('crawlstatus_view_set_index')); ?></a> + <a href="<?php e($base_url); ?>index×tamp=<?php + e($crawl['CRAWL_TIME']); ?>"><?php + e(tl('crawlstatus_view_set_index')); ?></a> <?php } else { ?> <?php e(tl('crawlstatus_view_search_index')); ?> @@ -119,7 +133,9 @@ class CrawlstatusView extends View } ?> </td> - <td><a href="<?php e($base_url); ?>delete×tamp=<?php e($crawl['CRAWL_TIME']); ?>"><?php e(tl('crawlstatus_view_delete'))?></a></td> + <td><a href="<?php e($base_url); + ?>delete×tamp=<?php e($crawl['CRAWL_TIME']); + ?>"><?php e(tl('crawlstatus_view_delete')); ?></a></td> </tr> <?php } diff --git a/views/elements/activity_element.php b/views/elements/activity_element.php index 81531cb8f..0b74e70fe 100755 --- a/views/elements/activity_element.php +++ b/views/elements/activity_element.php @@ -47,9 +47,9 @@ class ActivityElement extends Element { /** - * Displays a list of admin activities + * Displays a list of admin activities * - * @param array $data available activities and CSRF token + * @param array $data available activities and CSRF token */ public function render($data) { @@ -67,8 +67,10 @@ class ActivityElement extends Element } else { $class=""; } - e("<li $class><a href='?c=admin&YIOOP_TOKEN=".$data['YIOOP_TOKEN'] - ."&a=".$activity[$i]['METHOD_NAME']."'>".$activity[$i]['ACTIVITY_NAME']."</a></li>"); + e("<li $class><a href='?c=admin&YIOOP_TOKEN=". + $data['YIOOP_TOKEN']."&a=". + $activity[$i]['METHOD_NAME']."'>". + $activity[$i]['ACTIVITY_NAME']."</a></li>"); } } ?> diff --git a/views/elements/configure_element.php b/views/elements/configure_element.php index 0d38ef62f..87476e7c9 100644 --- a/views/elements/configure_element.php +++ b/views/elements/configure_element.php @@ -36,8 +36,9 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** * Element responsible for drawing the screen used to set up the search engine * - * This element has form fields to set up the work directory for crawls, the default language, the debug settings, - * the database, and the robot identifier information. + * This element has form fields to set up the work directory for crawls, + * the default language, the debug settings, the database, and the robot + * identifier information. * * @author Chris Pollett * @package seek_quarry @@ -48,20 +49,21 @@ class ConfigureElement extends Element { /** - * Draws the forms used to configure the search engine. + * Draws the forms used to configure the search engine. * - * This element has two forms on it: One for setting the working directory for - * crawls, the other to set-up profile information which is mainly stored in the - * profile.php file in the working directory. The exception is longer data concerning the - * crawl robot description which is stored in bot.txt. Some elements on forms are not - * displayed if they are not relevant (for instance, there is no notion of a username for - * a sqlite database system, but there is for other DBMSs). Also, if the work directory - * is not properly configured then only the language portion of the profile form is displayed - * since there is no real place to store data from the latter form until a proper working - * directory is established. + * This element has two forms on it: One for setting the working directory + * for crawls, the other to set-up profile information which is mainly + * stored in the profile.php file in the working directory. The exception + * is longer data concerning the crawl robot description which is stored + * in bot.txt. Some elements on forms are not displayed if they are not + * relevant (for instance, there is no notion of a username for a sqlite + * database system, but there is for other DBMSs). Also, if the work + * directory is not properly configured then only the language portion of + * the profile form is displayed since there is no real place to store data + * from the latter form until a proper working directory is established. * - * @param array $data holds data on the profile elements which have been filled in as well as - * data about which form fields to display + * @param array $data holds data on the profile elements which have been + * filled in as well as data about which form fields to display */ public function render($data) { @@ -69,84 +71,134 @@ class ConfigureElement extends Element <div class="currentactivity"> <form id="configureDirectoryForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="configure" /> <?php if(isset($data['lang'])) { ?> - <input type="hidden" name="lang" value="<?php e($data['lang']); ?>" /> + <input type="hidden" name="lang" value="<?php + e($data['lang']); ?>" /> <?php }?> <input type="hidden" name="arg" value="directory" /> - <h2><label for="directory-path"><?php e(tl('configure_element_work_directory'))?></label></h2> - <div class="topmargin"><input type="text" id="directory-path" name="WORK_DIRECTORY" class="extrawidefield" - value='<?php e($data["WORK_DIRECTORY"])?>' /><button class="buttonbox" - type="submit"><?php e(tl('configure_element_load_or_create')); ?></button> + <h2><label for="directory-path"><?php + e(tl('configure_element_work_directory'))?></label></h2> + <div class="topmargin"><input type="text" id="directory-path" + name="WORK_DIRECTORY" class="extrawidefield" value='<?php + e($data["WORK_DIRECTORY"]); ?>' /><button + class="buttonbox" + type="submit"><?php + e(tl('configure_element_load_or_create')); ?></button> </div> </form> <form id="configureProfileForm" method="post" action=''> <?php if(isset($data['WORK_DIRECTORY'])) { ?> - <input type="hidden" name="WORK_DIRECTORY" value="<?php e($data['WORK_DIRECTORY']); ?>" /> + <input type="hidden" name="WORK_DIRECTORY" value="<?php + e($data['WORK_DIRECTORY']); ?>" /> <?php }?> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="configure" /> <input type="hidden" name="arg" value="profile" /> <h2><?php e(tl('configure_element_profile_settings'))?></h2> - <div class="topmargin"><b><label for="locale"><b><?php e(tl('configure_element_default_language')); ?></b> + <div class="topmargin"><b><label for="locale"><b><?php + e(tl('configure_element_default_language')); ?></b> <?php $this->view->languageElement->render($data); ?> </div> <?php if($data['PROFILE']) { ?> <div class="topmargin"> - <fieldset class="extrawidefield"><legend><?php e(tl('configure_element_debug_display'))?></legend> - <label for="error-info"><input id='error-info' type="checkbox" name="ERROR_INFO" value="<?php e(ERROR_INFO);?>" - <?php if(($data['DEBUG_LEVEL'] & ERROR_INFO) == ERROR_INFO) {e("checked='checked'");}?> + <fieldset class="extrawidefield"><legend><?php + e(tl('configure_element_debug_display'))?></legend> + <label for="error-info"><input id='error-info' type="checkbox" + name="ERROR_INFO" value="<?php e(ERROR_INFO);?>" + <?php if(($data['DEBUG_LEVEL'] & ERROR_INFO)==ERROR_INFO){ + e("checked='checked'");}?> /><?php e(tl('configure_element_error_info')); ?></label> - <label for="query-info"><input id='query-info' type="checkbox" name="QUERY_INFO" value="<?php e(QUERY_INFO);?>" - <?php if(($data['DEBUG_LEVEL'] & QUERY_INFO) == QUERY_INFO) {e("checked='checked'");}?>/><?php e(tl('configure_element_query_info')); ?></label> - <label for="test-info"><input id='test-info' type="checkbox" name="TEST_INFO" value="<?php e(TEST_INFO);?>" - <?php if(($data['DEBUG_LEVEL'] & TEST_INFO) == TEST_INFO) {e("checked='checked'");}?>/><?php e(tl('configure_element_test_info')); ?></label> + <label for="query-info"><input id='query-info' type="checkbox" + name="QUERY_INFO" value="<?php e(QUERY_INFO);?>" + <?php if(($data['DEBUG_LEVEL'] & QUERY_INFO)==QUERY_INFO){ + e("checked='checked'");}?>/><?php + e(tl('configure_element_query_info')); ?></label> + <label for="test-info"><input id='test-info' type="checkbox" + name="TEST_INFO" value="<?php e(TEST_INFO);?>" + <?php if(($data['DEBUG_LEVEL'] & TEST_INFO) == TEST_INFO){ + e("checked='checked'");}?>/><?php + e(tl('configure_element_test_info')); ?></label> </fieldset> </div> <div class="topmargin"> - <fieldset class="extrawidefield"><legend><?php e(tl('configure_element_database_setup'))?></legend> - <div ><label for="database-system"><b><?php e(tl('configure_element_database_system'))?></b></label><?php - $this->view->optionsHelper->render("database-system", "DBMS", $data['DBMSS'], $data['DBMS']); + <fieldset class="extrawidefield"><legend><?php + e(tl('configure_element_database_setup'))?></legend> + <div ><label for="database-system"><b><?php + e(tl('configure_element_database_system')); ?></b></label> + <?php $this->view->optionsHelper->render( + "database-system", "DBMS", + $data['DBMSS'], $data['DBMS']); ?></div> - <div class="topmargin"><b><label for="database-name"><?php e(tl('configure_element_databasename'))?></label></b> <input - type="text" id="database-name" name="DB_NAME" value="<?php e($data['DB_NAME']); ?>" class="widefield" /> + <div class="topmargin"><b><label for="database-name"><?php + e(tl('configure_element_databasename'))?></label></b> + <input type="text" id="database-name" name="DB_NAME" + value="<?php e($data['DB_NAME']); ?>" + class="widefield" /> </div> <div id="login-dbms"> - <div class="topmargin"><b><label for="database-url"><?php e(tl('configure_element_databaseurl'))?></label></b> <input - type="text" id="database-user" name="DB_URL" value="<?php e($data['DB_URL']); ?>" class="widefield" /> + <div class="topmargin"><b><label for="database-url"><?php + e(tl('configure_element_databaseurl')); ?></label></b> + <input type="text" id="database-user" name="DB_URL" + value="<?php e($data['DB_URL']); ?>" + class="widefield" /> </div> - <div class="topmargin"><b><label for="database-user"><?php e(tl('configure_element_databaseuser'))?></label></b> <input - type="text" id="database-user" name="DB_USER" value="<?php e($data['DB_USER']); ?>" class="widefield" /> + <div class="topmargin"><b><label for="database-user"><?php + e(tl('configure_element_databaseuser'))?></label></b> + <input type="text" id="database-user" name="DB_USER" + value="<?php e($data['DB_USER']); ?>" + class="widefield" /> </div> - <div class="topmargin"><b><label for="database-password"><?php e(tl('configure_element_databasepassword'));?></label></b> <input - type="password" id="database-password" name="DB_PASSWORD" value="<?php e($data['DB_PASSWORD']); ?>" class="widefield" /> + <div class="topmargin"><b><label + for="database-password"><?php + e(tl('configure_element_databasepassword'));?></label> + </b> <input type="password" id="database-password" + name="DB_PASSWORD" value="<?php + e($data['DB_PASSWORD']); ?>" class="widefield" /> </div> </div> </fieldset> </div> - <div class="topmargin"><fieldset><legend><?php e(tl('configure_element_queue_server'))?></legend> - <div ><b><label for="queue-fetcher-salt"><?php e(tl('configure_element_queue_server_key'))?></label></b> <input - type="text" id="queue-fetcher-salt" name="AUTH_KEY" value="<?php e($data['AUTH_KEY']); ?>" class="widefield" /> + <div class="topmargin"><fieldset><legend><?php + e(tl('configure_element_queue_server'))?></legend> + <div ><b><label for="queue-fetcher-salt"><?php + e(tl('configure_element_queue_server_key'))?></label></b> + <input type="text" id="queue-fetcher-salt" name="AUTH_KEY" + value="<?php e($data['AUTH_KEY']); ?>" + class="widefield" /> </div> - <div class="topmargin"><b><label for="queue-server-url"><?php e(tl('configure_element_queue_server_url'))?></label></b> <input - type="text" id="queue-server-url" name="QUEUE_SERVER" value="<?php e($data['QUEUE_SERVER']); ?>" class="extrawidefield" /> + <div class="topmargin"><b><label for="queue-server-url"><?php + e(tl('configure_element_queue_server_url'))?></label></b> + <input type="text" id="queue-server-url" name="QUEUE_SERVER" + value="<?php e($data['QUEUE_SERVER']); ?>" + class="extrawidefield" /> </div> </fieldset> </div> - <div class="topmargin"><fieldset><legend><?php e(tl('configure_element_crawl_robot'))?></legend> - <div><b><label for="crawl-robot-name"><?php e(tl('configure_element_robot_name'))?></label></b> <input - type="text" id="crawl-robot-name" name="USER_AGENT_SHORT" value="<?php e($data['USER_AGENT_SHORT']); ?>" class="extrawidefield" /> + <div class="topmargin"><fieldset><legend><?php + e(tl('configure_element_crawl_robot'))?></legend> + <div><b><label for="crawl-robot-name"><?php + e(tl('configure_element_robot_name'))?></label></b> + <input type="text" id="crawl-robot-name" + name="USER_AGENT_SHORT" + value="<?php e($data['USER_AGENT_SHORT']); ?>" + class="extrawidefield" /> </div> - <div class="topmargin"><label for="robot-description"><b><?php e(tl('configure_element_robot_description'))?></b></label></div> + <div class="topmargin"><label for="robot-description"><b><?php + e(tl('configure_element_robot_description')); + ?></b></label></div> <textarea class="talltextarea" name="ROBOT_DESCRIPTION" ><?php e($data['ROBOT_DESCRIPTION']); ?></textarea> </fieldset> </div> <div class="topmargin center"> - <button class="buttonbox" type="submit"><?php e(tl('configure_element_submit')); ?></button> + <button class="buttonbox" type="submit"><?php + e(tl('configure_element_submit')); ?></button> </div> <?php } ?> </form> diff --git a/views/elements/crawloptions_element.php b/views/elements/crawloptions_element.php index f54c315f8..a33732846 100644 --- a/views/elements/crawloptions_element.php +++ b/views/elements/crawloptions_element.php @@ -34,9 +34,9 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * Element responsible for displaying options about how a crawl will be - * performed. For instance, what are the seed sites for the crawl, what - * sites are allowed to be crawl what sites must not be crawled, etc. + * Element responsible for displaying options about how a crawl will be + * performed. For instance, what are the seed sites for the crawl, what + * sites are allowed to be crawl what sites must not be crawled, etc. * * @author Chris Pollett * @@ -48,44 +48,60 @@ class CrawloptionsElement extends Element { /** - * Draws configurable options about how a web crawl should be conducted + * Draws configurable options about how a web crawl should be conducted * - * @param array $data keys are generally the different setting that can be set in the crawl.ini file + * @param array $data keys are generally the different setting that can + * be set in the crawl.ini file */ public function render($data) { ?> <div class="currentactivity"> <div class="<?php e($data['leftorright']);?>"> - <a href="?c=admin&a=manageCrawl&YIOOP_TOKEN=<?php e($data['YIOOP_TOKEN']) ?>" + <a href="?c=admin&a=manageCrawl&YIOOP_TOKEN=<?php + e($data['YIOOP_TOKEN']) ?>" ><?php e(tl('crawloptions_element_back_to_manage'))?></a> </div> <h2><?php e(tl('crawloptions_element_edit_crawl_options'))?></h2> <form id="crawloptionsForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageCrawl" /> <input type="hidden" name="arg" value="options" /> <input type="hidden" name="posted" value="posted" /> - <div class="topmargin"><label for="crawl-order"><b><?php e(tl('crawloptions_element_crawl_order'))?></b></label><?php - $this->view->optionsHelper->render("crawl-order", "crawl_order", $data['available_crawl_orders'], $data['crawl_order']); + <div class="topmargin"><label for="crawl-order"><b><?php + e(tl('crawloptions_element_crawl_order'))?></b></label><?php + $this->view->optionsHelper->render("crawl-order", "crawl_order", + $data['available_crawl_orders'], $data['crawl_order']); ?></div> - <div class="topmargin"><label for="restrict-sites-by-url"><b><?php e(tl('crawloptions_element_restrict_by_url'))?></b></label><input type="checkbox" id="restrict-sites-by-url" name="restrict_sites_by_url" value="true" - onchange="setDisplay('toggle', this.checked)" <?php e($data['TOGGLE_STATE']); ?>/></div> - <div id="toggle"><div class="topmargin"><label for="allowed-sites"><b><?php e(tl('crawloptions_element_allowed_to_crawl'))?></b></label></div> - <textarea class="shorttextarea" id="allowed-sites" name="allowed_sites" ><?php - e($data['allowed_sites']); + <div class="topmargin"><label for="restrict-sites-by-url"><b><?php + e(tl('crawloptions_element_restrict_by_url'))?></b></label> + <input type="checkbox" id="restrict-sites-by-url" + name="restrict_sites_by_url" value="true" + onchange="setDisplay('toggle', this.checked)" <?php + e($data['TOGGLE_STATE']); ?> /></div> + <div id="toggle"> + <div class="topmargin"><label for="allowed-sites"><b><?php + e(tl('crawloptions_element_allowed_to_crawl'))?></b></label></div> + <textarea class="shorttextarea" id="allowed-sites" + name="allowed_sites"><?php e($data['allowed_sites']); ?></textarea></div> - <div class="topmargin"><label for="disallowed-sites"><b><?php e(tl('crawloptions_element_disallowed_to_crawl'))?></b></label></div> - <textarea class="shorttextarea" id="disallowed-sites" name="disallowed_sites" ><?php - e($data['disallowed_sites']); + <div class="topmargin"><label for="disallowed-sites"><b><?php + e(tl('crawloptions_element_disallowed_to_crawl')); + ?></b></label></div> + <textarea class="shorttextarea" id="disallowed-sites" + name="disallowed_sites" ><?php e($data['disallowed_sites']); ?></textarea> - <div class="topmargin"><label for="seed-sites"><b><?php e(tl('crawloptions_element_seed_sites'))?></b></label></div> + <div class="topmargin"><label for="seed-sites"><b><?php + e(tl('crawloptions_element_seed_sites'))?></b></label></div> <textarea class="talltextarea" name="seed_sites" ><?php e($data['seed_sites']); ?></textarea> - <div class="center slightpad"><button class="buttonbox" type="submit"><?php e(tl('crawloptions_element_save_options')); ?></button></div> + <div class="center slightpad"><button class="buttonbox" + type="submit"><?php e(tl('crawloptions_element_save_options')); + ?></button></div> </form> </div> diff --git a/views/elements/editlocales_element.php b/views/elements/editlocales_element.php index 613d60171..70fed3930 100644 --- a/views/elements/editlocales_element.php +++ b/views/elements/editlocales_element.php @@ -34,8 +34,8 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * Element responsible for displaying the form where users can input string - * translations for a given locale + * Element responsible for displaying the form where users can input string + * translations for a given locale * * @author Chris Pollett * @@ -47,39 +47,53 @@ class EditlocalesElement extends Element { /** - * Draws a form with strings to translate and a text field for the translation into - * the given locale. Strings with no translations yet appear in red + * Draws a form with strings to translate and a text field for the + * translation into + * the given locale. Strings with no translations yet appear in red * - * @param array $data contains msgid and already translated msg_string info + * @param array $data contains msgid and already translated msg_string info */ public function render($data) { ?> <div class="currentactivity"> <div class="<?php e($data['leftorright']);?>"> - <a href="?c=admin&a=manageLocales&YIOOP_TOKEN=<?php e($data['YIOOP_TOKEN']) ?>" + <a href="?c=admin&a=manageLocales&YIOOP_TOKEN=<?php + e($data['YIOOP_TOKEN']) ?>" ><?php e(tl('editlocales_element_back_to_manage'))?></a> </div> - <h2><?php e(tl('editlocales_element_edit_locale', $data['CURRENT_LOCALE_NAME']))?></h2> + <h2><?php e(tl('editlocales_element_edit_locale', + $data['CURRENT_LOCALE_NAME']))?></h2> <form id="editLocaleForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageLocales" /> <input type="hidden" name="arg" value="editlocale" /> - <input type="hidden" name="selectlocale" value="<?php e($data['CURRENT_LOCALE_TAG']); ?>" /> + <input type="hidden" name="selectlocale" value="<?php + e($data['CURRENT_LOCALE_TAG']); ?>" /> <table class="translatetable"> <?php foreach($data['STRINGS'] as $msg_id => $msg_string) { if(strlen($msg_string) > 0) { - e("<tr><td><label for='$msg_id'>$msg_id</label></td><td><input type='text' title='".@tl($msg_id,"%s", "%s", "%s")."' id='$msg_id' name='STRINGS[$msg_id]' value='$msg_string' /></td></tr>"); + e("<tr><td><label for='$msg_id'>$msg_id</label>". + "</td><td><input type='text' title='". + @tl($msg_id,"%s", "%s", "%s"). + "' id='$msg_id' name='STRINGS[$msg_id]' ". + "value='$msg_string' /></td></tr>"); } else { - e("<tr><td><label for='$msg_id'>$msg_id</label></td><td><input class='highlight' type='text' title='".@tl($msg_id,"%s", "%s", "%s")."' id='$msg_id' name='STRINGS[$msg_id]' value='$msg_string' /></td></tr>"); + e("<tr><td><label for='$msg_id'>$msg_id</label></td><td><input". + " class='highlight' type='text' title='". + @tl($msg_id,"%s", "%s", "%s")."' id='$msg_id' ". + "name='STRINGS[$msg_id]' value='$msg_string' /></td></tr>"); } } ?> </table> - <div class="center slightpad"><button class="buttonbox" type="submit"><?php e(tl('editlocales_element_submit')); ?></button></div> + <div class="center slightpad"><button class="buttonbox" + type="submit"><?php + e(tl('editlocales_element_submit')); ?></button></div> </form> </div> <?php diff --git a/views/elements/language_element.php b/views/elements/language_element.php index ae2151036..27fa70078 100755 --- a/views/elements/language_element.php +++ b/views/elements/language_element.php @@ -1,26 +1,26 @@ <?php /** - * SeekQuarry/Yioop -- - * Open Source Pure PHP Search Engine, Crawler, and Indexer + * SeekQuarry/Yioop -- + * Open Source Pure PHP Search Engine, Crawler, and Indexer * - * Copyright (C) 2009, 2010 Chris Pollett chris@pollett.org + * Copyright (C) 2009, 2010 Chris Pollett chris@pollett.org * - * LICENSE: + * LICENSE: * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. * - * END LICENSE + * END LICENSE * * @author Chris Pollett chris@pollett.org * @package seek_quarry @@ -46,9 +46,11 @@ class LanguageElement extends Element { /** - * Draws a selects tag with a list of available languages + * Draws a selects tag with a list of available languages * - * @param array $data this variables LANGUAGES elt contains pairs of IANA tag and language names; its LOCALE_TAG is the current IANA locale tag + * @param array $data this variables LANGUAGES elt contains pairs of + * IANA tag and language names; its LOCALE_TAG is the current + * IANA locale tag */ public function render($data) { @@ -58,7 +60,8 @@ class LanguageElement extends Element <?php foreach ($data['LANGUAGES'] as $locale_tag => $locale_name) { if($data['LOCALE_TAG'] == $locale_tag) { - e('<option value="'.$locale_tag.'" selected="selected">'.$locale_name.'</option>'); + e('<option value="'.$locale_tag.'" selected="selected">'. + $locale_name.'</option>'); } else { e('<option value="'.$locale_tag.'">'.$locale_name.'</option>'); } diff --git a/views/elements/manageaccount_element.php b/views/elements/manageaccount_element.php index 9a9be576b..c10aa1298 100644 --- a/views/elements/manageaccount_element.php +++ b/views/elements/manageaccount_element.php @@ -57,19 +57,30 @@ class ManageaccountElement extends Element <h2><?php e(tl('manageaccount_element_change_password'))?></h2> <form id="changePasswordForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageAccount" /> <input type="hidden" name="arg" value="changepassword" /> <table class="nametable"> - <tr><td><label for="old-password"><?php e(tl('manageaccount_element_old_password'))?></label></td> - <td><input type="password" id="old-password" name="oldpassword" maxlength="80" class="narrowfield"/></td></tr> - <tr><td><label for="new-password"><?php e(tl('manageaccount_element_new_password'))?></label></td> - <td><input type="password" id="new-password" name="newpassword" maxlength="80" class="narrowfield"/></td></tr> - <tr><td><label for="retype-password"><?php e(tl('manageaccount_element_retype_password'))?></label></td> - <td><input type="password" id="retype-password" name="retypepassword" maxlength="80" class="narrowfield" /></td></tr> + <tr><td><label for="old-password"><?php + e(tl('manageaccount_element_old_password'))?></label></td> + <td><input type="password" id="old-password" + name="oldpassword" maxlength="80" class="narrowfield"/> + </td></tr> + <tr><td><label for="new-password"><?php + e(tl('manageaccount_element_new_password'))?></label></td> + <td><input type="password" id="new-password" + name="newpassword" maxlength="80" class="narrowfield"/> + </td></tr> + <tr><td><label for="retype-password"><?php + e(tl('manageaccount_element_retype_password'))?></label></td> + <td><input type="password" id="retype-password" + name="retypepassword" maxlength="80" class="narrowfield" /> + </td></tr> <tr><td></td> - <td class="center"><button class="buttonbox" type="submit"><?php e(tl('manageaccount_element_save')); ?></button></td></tr> + <td class="center"><button class="buttonbox" type="submit"><?php + e(tl('manageaccount_element_save')); ?></button></td></tr> </table> </form> </div> diff --git a/views/elements/managecrawl_element.php b/views/elements/managecrawl_element.php index 23fa1f464..28e45918f 100755 --- a/views/elements/managecrawl_element.php +++ b/views/elements/managecrawl_element.php @@ -34,8 +34,8 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * Element responsible for displaying info about starting, stopping, deleting, and using - * a crawl. It makes use of the CrawlStatusView + * Element responsible for displaying info about starting, stopping, deleting, + * and using a crawl. It makes use of the CrawlStatusView * * @author Chris Pollett * @package seek_quarry @@ -46,9 +46,10 @@ class ManagecrawlElement { /** - * Draw form to start a new crawl, has div place holder and ajax code to get info about current crawl - * - * @param array $data form about about a crawl such as its description + * Draw form to start a new crawl, has div place holder and ajax code to + * get info about current crawl + * + * @param array $data form about about a crawl such as its description */ public function render($data) {?> @@ -56,12 +57,22 @@ class ManagecrawlElement <h2><?php e(tl('managecrawl_element_create_crawl'))?></h2> <form id="crawlStartForm" method="get" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageCrawl" /> <input type="hidden" name="arg" value="start" /> - <p><label for="description-name"><?php e(tl('managecrawl_element_description'))?></label>: <input type="text" id="description-name" name="description" value="<?php if(isset($data['DESCRIPTION'])) {e($data['DESCRIPTION']);} ?>" maxlength="80" class="widefield"/> - <button class="buttonbox" type="submit"><?php e(tl('managecrawl_element_start_new_crawl')); ?></button> <a href="?c=admin&a=manageCrawl&arg=options&YIOOP_TOKEN=<?php e($data['YIOOP_TOKEN']) ?>"><?php e(tl('managecrawl_element_options')); ?></a></p> + <p><label for="description-name"><?php + e(tl('managecrawl_element_description')); ?></label>: + <input type="text" id="description-name" name="description" + value="<?php if(isset($data['DESCRIPTION'])) { + e($data['DESCRIPTION']); } ?>" maxlength="80" + class="widefield"/> + <button class="buttonbox" type="submit"><?php + e(tl('managecrawl_element_start_new_crawl')); ?></button> + <a href="?c=admin&a=manageCrawl&arg=options&YIOOP_TOKEN=<?php + e($data['YIOOP_TOKEN']) ?>"><?php + e(tl('managecrawl_element_options')); ?></a></p> </form> <div id="crawlstatus" > <h2><?php e(tl('managecrawl_element_awaiting_status'))?></h2> @@ -70,7 +81,8 @@ class ManagecrawlElement var updateId; function crawlStatusUpdate() { - var startUrl = "?c=admin&YIOOP_TOKEN=<?php e($data['YIOOP_TOKEN']); ?>&a=crawlStatus"; + var startUrl = "?c=admin&YIOOP_TOKEN=<?php + e($data['YIOOP_TOKEN']); ?>&a=crawlStatus"; var crawlTag = elt('crawlstatus'); getPage(crawlTag, startUrl); } @@ -79,7 +91,8 @@ class ManagecrawlElement { clearInterval(updateId ); var crawlTag = elt('crawlstatus'); - crawlTag.innerHTML= "<h2 class='red'><?php e(tl('managecrawl_element_up_longer_update'))?></h2>"; + crawlTag.innerHTML= "<h2 class='red'><?php + e(tl('managecrawl_element_up_longer_update'))?></h2>"; } function doUpdate() { diff --git a/views/elements/managelocales_element.php b/views/elements/managelocales_element.php index 9d423ddcb..2de3dd04a 100644 --- a/views/elements/managelocales_element.php +++ b/views/elements/managelocales_element.php @@ -47,10 +47,11 @@ class ManagelocalesElement extends Element { /** - * Responsible for drawing the ceate, delete set writing mode screen for locales - * as well ass the screen for adding modifying translations + * Responsible for drawing the ceate, delete set writing mode screen for + * locales as well ass the screen for adding modifying translations * - * @param array $data contains info about the available locales and what has been translated + * @param array $data contains info about the available locales and what + * has been translated */ public function render($data) { @@ -59,25 +60,38 @@ class ManagelocalesElement extends Element <h2><?php e(tl('managelocales_element_add_locale'))?></h2> <form id="addLocaleForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageLocales" /> <input type="hidden" name="arg" value="addlocale" /> <table class="nametable"> - <tr><td><label for="locale-name"><?php e(tl('managelocales_element_localenamelabel'))?></label></td> - <td><input type="text" id="locale-name" name="localename" maxlength="80" class="narrowfield"/></td><td></td> + <tr><td><label for="locale-name"><?php + e(tl('managelocales_element_localenamelabel'))?></label></td> + <td><input type="text" id="locale-name" + name="localename" maxlength="80" class="narrowfield"/> + </td><td></td> </tr> - <tr><td><label for="locale-tag"><?php e(tl('managelocales_element_localetaglabel'))?></label></td> - <td><input type="text" id="locale-tag" name="localetag" maxlength="80" class="narrowfield"/></td> + <tr><td><label for="locale-tag"><?php + e(tl('managelocales_element_localetaglabel'))?></label></td> + <td><input type="text" id="locale-tag" + name="localetag" maxlength="80" class="narrowfield"/></td> </tr> <tr><td><?php e(tl('managelocales_element_writingmodelabel'))?></td> - <td><label for="locale-lr-tb">lr-tb</label><input type="radio" id="locale-lr-tb" name="writingmode" value="lr-tb" checked="checked" /> - <label for="locale-rl-tb">rl-tb</label><input type="radio" id="locale-rl-tb" name="writingmode" value="rl-tb" /> - <label for="locale-tb-rl">tb-rl</label><input type="radio" id="locale-tb-rl" name="writingmode" value="tb-rl" /> - <label for="locale-tb-lr">tb-lr</label><input type="radio" id="locale-tb-lr" name="writingmode" value="tb-lr" /> + <td><label for="locale-lr-tb">lr-tb</label><input type="radio" + id="locale-lr-tb" name="writingmode" + value="lr-tb" checked="checked" /> + <label for="locale-rl-tb">rl-tb</label><input type="radio" + id="locale-rl-tb" name="writingmode" value="rl-tb" /> + <label for="locale-tb-rl">tb-rl</label><input type="radio" + id="locale-tb-rl" name="writingmode" value="tb-rl" /> + <label for="locale-tb-lr">tb-lr</label><input type="radio" + id="locale-tb-lr" name="writingmode" value="tb-lr" /> </td> </tr> - <tr><td></td><td class="center"><button class="buttonbox" type="submit"><?php e(tl('managelocales_element_submit')); ?></button></td> + <tr><td></td><td class="center"><button class="buttonbox" + type="submit"><?php e(tl('managelocales_element_submit')); + ?></button></td> </tr> </table> </form> @@ -85,27 +99,40 @@ class ManagelocalesElement extends Element <h2><?php e(tl('managelocales_element_delete_locale'))?></h2> <form id="deleteLocaleForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageLocales" /> <input type="hidden" name="arg" value="deletelocale" /> <table class="nametable" > - <tr><td><label for="delete-localename"><?php e(tl('managelocales_element_delete_localelabel'))?></label></td> - <td><?php $this->view->optionsHelper->render("delete-localename", "selectlocale", $data['LOCALE_NAMES'], "-1"); ?></td><td><button class="buttonbox" type="submit"><?php e(tl('managelocales_element_submit')); ?></button></td> + <tr><td><label for="delete-localename"><?php + e(tl('managelocales_element_delete_localelabel'))?></label></td> + <td><?php $this->view->optionsHelper->render("delete-localename", + "selectlocale", $data['LOCALE_NAMES'], "-1"); ?></td> + <td><button class="buttonbox" type="submit"><?php + e(tl('managelocales_element_submit')); ?></button></td> </tr> </table> </form> <h2><?php e(tl('managelocales_element_locale_list'))?></h2> <table class="localetable"> - <tr><th><?php e(tl('managelocales_element_localename')); ?></th><th><?php e(tl('managelocales_element_localetag'));?></th> - <th><?php e(tl('managelocales_element_writingmode'));?></th><th><?php e(tl('managelocales_element_percenttranslated'));?></th> + <tr> + <th><?php e(tl('managelocales_element_localename')); ?></th> + <th><?php e(tl('managelocales_element_localetag'));?></th> + <th><?php e(tl('managelocales_element_writingmode')); + ?></th> + <th><?php e(tl('managelocales_element_percenttranslated'));?></th> </tr> <?php foreach($data['LOCALES'] as $locale) { - e("<tr><td><a href='?c=admin&a=manageLocales&arg=editlocale&selectlocale=".$locale['LOCALE_TAG']. - "&YIOOP_TOKEN=".$data['YIOOP_TOKEN']."'>".$locale['LOCALE_NAME']."</a></td><td>".$locale['LOCALE_TAG']."</td>"); - e("<td>".$locale['WRITING_MODE']."</td><td class='alignRight' >".$locale['PERCENT_WITH_STRINGS']."</td></tr>"); + e("<tr><td><a href='?c=admin&a=manageLocales". + "&arg=editlocale&selectlocale=".$locale['LOCALE_TAG']. + "&YIOOP_TOKEN=".$data['YIOOP_TOKEN']."'>". + $locale['LOCALE_NAME']."</a></td><td>". + $locale['LOCALE_TAG']."</td>"); + e("<td>".$locale['WRITING_MODE']."</td><td class='alignRight' >". + $locale['PERCENT_WITH_STRINGS']."</td></tr>"); } ?> </table> diff --git a/views/elements/manageroles_element.php b/views/elements/manageroles_element.php index 898be3ad0..9c96a29d2 100644 --- a/views/elements/manageroles_element.php +++ b/views/elements/manageroles_element.php @@ -34,8 +34,8 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} /** - * Used to draw the admin screen on which admin users can create roles, delete roles - * and add and delete roles from users + * Used to draw the admin screen on which admin users can create roles, delete + * roles and add and delete roles from users * * @author Chris Pollett * @package seek_quarry @@ -46,9 +46,11 @@ class ManagerolesElement extends Element { /** - * reders the screen in which roles can be created, deleted, and added or deleted from a user + * renders the screen in which roles can be created, deleted, and added or + * deleted from a user * - * @param array $data contains antiCSRF token, as well as data on available roles or which user has what role + * @param array $data contains antiCSRF token, as well as data on + * available roles or which user has what role */ public function render($data) {?> @@ -56,13 +58,18 @@ class ManagerolesElement extends Element <h2><?php e(tl('manageroles_element_add_role'))?></h2> <form id="addRoleForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageRoles" /> <input type="hidden" name="arg" value="addrole" /> <table class="nametable"> - <tr><td><label for="role-name"><?php e(tl('manageroles_element_rolename'))?></label></td> - <td><input type="text" id="role-name" name="rolename" maxlength="80" class="narrowfield" /></td><td class="center"><button class="buttonbox" type="submit"><?php e(tl('manageroles_element_submit')); ?></button></td> + <tr><td><label for="role-name"><?php + e(tl('manageroles_element_rolename'))?></label></td> + <td><input type="text" id="role-name" name="rolename" + maxlength="80" class="narrowfield" /></td><td + class="center"><button class="buttonbox" type="submit"><?php + e(tl('manageroles_element_submit')); ?></button></td> </tr> </table> </form> @@ -70,41 +77,56 @@ class ManagerolesElement extends Element <h2><?php e(tl('manageroles_element_delete_role'))?></h2> <form id="deleteRoleForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageRoles" /> <input type="hidden" name="arg" value="deleterole" /> <table class="nametable"> - <tr><td><label for="delete-rolename"><?php e(tl('manageusers_element_delete_rolename'))?></label></td> - <td><?php $this->view->optionsHelper->render("delete-rolename", "selectrole", $data['ROLE_NAMES'], "-1"); ?></td><td><button class="buttonbox" type="submit"><?php e(tl('manageroles_element_submit')); ?></button></td> + <tr><td><label for="delete-rolename"><?php + e(tl('manageusers_element_delete_rolename'))?></label></td> + <td><?php $this->view->optionsHelper->render( + "delete-rolename", "selectrole", $data['ROLE_NAMES'], "-1"); + ?></td><td><button class="buttonbox" type="submit"><?php + e(tl('manageroles_element_submit')); ?></button></td> </tr> </table> </form> <h2><?php e(tl('manageroles_element_view_role_activities'))?></h2> <form id="viewRoleActivityForm" method="get" action='' > <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageRoles" /> <input type="hidden" name="arg" value="viewroleactivities" /> <table class="nametable"> - <tr><td><label for="select-role"><?php e(tl('manageusers_element_select_role'))?></label></td> - <td><?php $this->view->optionsHelper->render("select-role", "selectrole", $data['ROLE_NAMES'], $data['SELECT_ROLE']); ?></td></tr> + <tr><td><label for="select-role"><?php + e(tl('manageusers_element_select_role'))?></label></td> + <td><?php $this->view->optionsHelper->render("select-role", + "selectrole", $data['ROLE_NAMES'], $data['SELECT_ROLE']); + ?></td></tr> </table> </form> <?php if(isset($data['ROLE_ACTIVITIES'])) { - if(count($data['AVAILABLE_ACTIVITIES']) > 0 && $data['SELECT_ROLE'] != -1) { - ?> + if(count($data['AVAILABLE_ACTIVITIES']) > 0 && + $data['SELECT_ROLE'] != -1) { ?> <form id="addRoleActivityForm" method="get" action='' > <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageRoles" /> <input type="hidden" name="arg" value="addactivity" /> - <input type="hidden" name="selectrole" value="<?php e($data['SELECT_ROLE']);?>" /> + <input type="hidden" name="selectrole" value="<?php + e($data['SELECT_ROLE']);?>" /> <table class="nametable"> - <tr><td><label for="add-activity"><?php e(tl('manageusers_element_add_activity'))?></label></td> - <td><?php $this->view->optionsHelper->render("add-activity", "selectactivity", $data['AVAILABLE_ACTIVITIES'], $data['SELECT_ACTIVITY']); ?></td> - <td><button class="buttonbox" type="submit"><?php e(tl('manageroles_element_submit')); ?></button></td></tr> + <tr><td><label for="add-activity"><?php + e(tl('manageusers_element_add_activity'))?></label></td> + <td><?php $this->view->optionsHelper->render("add-activity", + "selectactivity", $data['AVAILABLE_ACTIVITIES'], + $data['SELECT_ACTIVITY']); ?></td> + <td><button class="buttonbox" type="submit"><?php + e(tl('manageroles_element_submit')); ?></button></td></tr> </table> </form> <?php @@ -112,8 +134,13 @@ class ManagerolesElement extends Element ?> <table class="roletable"><?php foreach($data['ROLE_ACTIVITIES'] as $role_activity) { - e("<tr><td>".$role_activity['ACTIVITY_NAME']."</td><td><a href='?c=admin&a=manageRoles&arg=deleteactivity&selectrole=".$role_activity['ROLE_ID']. - "&selectactivity=".$role_activity['ACTIVITY_ID']."&YIOOP_TOKEN=".$data['YIOOP_TOKEN']."'>Delete</a></td>"); + e("<tr><td>".$role_activity['ACTIVITY_NAME']. + "</td><td><a href='?c=admin&a=manageRoles". + "&arg=deleteactivity&selectrole=". + $role_activity['ROLE_ID']. + "&selectactivity=".$role_activity['ACTIVITY_ID']. + "&YIOOP_TOKEN=".$data['YIOOP_TOKEN']. + "'>Delete</a></td>"); } ?> </table> diff --git a/views/elements/manageusers_element.php b/views/elements/manageusers_element.php index f471e1bb3..2eaed4207 100644 --- a/views/elements/manageusers_element.php +++ b/views/elements/manageusers_element.php @@ -46,10 +46,10 @@ class ManageusersElement extends Element { /** - * render draws a screen in which an admin can add users, delete users, - * and manipulate user roles. + * draws a screen in which an admin can add users, delete users, + * and manipulate user roles. * - * @param array $data info about current users and current roles, CSRF token + * @param array $data info about current users and current roles, CSRF token */ public function render($data) { @@ -58,18 +58,28 @@ class ManageusersElement extends Element <h2><?php e(tl('manageusers_element_add_user'))?></h2> <form id="addUserForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageUsers" /> <input type="hidden" name="arg" value="adduser" /> <table class="nametable"> - <tr><td><label for="user-name"><?php e(tl('manageusers_element_username'))?></label></td> - <td><input type="text" id="user-name" name="username" maxlength="80" class="narrowfield"/></td></tr> - <tr><td><label for="pass-word"><?php e(tl('manageusers_element_password'))?></label></td> - <td><input type="password" id="pass-word" name="password" maxlength="80" class="narrowfield"/></td></tr> - <td><label for="retype-password"><?php e(tl('manageusers_element_retype_password'))?></label></td> - <td><input type="password" id="retype-password" name="retypepassword" maxlength="80" class="narrowfield"/></td></tr> - <tr><td></td><td class="center"><button class="buttonbox" type="submit"><?php e(tl('manageusers_element_submit')); ?></button></td> + <tr><td><label for="user-name"><?php + e(tl('manageusers_element_username'))?></label></td> + <td><input type="text" id="user-name" + name="username" maxlength="80" class="narrowfield"/></td></tr> + <tr><td><label for="pass-word"><?php + e(tl('manageusers_element_password'))?></label></td> + <td><input type="password" id="pass-word" + name="password" maxlength="80" class="narrowfield"/></td></tr> + <td><label for="retype-password"><?php + e(tl('manageusers_element_retype_password'))?></label></td> + <td><input type="password" id="retype-password" + name="retypepassword" maxlength="80" + class="narrowfield"/></td></tr> + <tr><td></td><td class="center"><button class="buttonbox" + type="submit"><?php e(tl('manageusers_element_submit')); + ?></button></td> </tr> </table> </form> @@ -77,13 +87,18 @@ class ManageusersElement extends Element <h2><?php e(tl('manageusers_element_delete_user'))?></h2> <form id="deleteUserForm" method="post" action=''> <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageUsers" /> <input type="hidden" name="arg" value="deleteuser" /> <table class="nametable"> - <tr><td><label for="delete-username"><?php e(tl('manageusers_element_delete_username'))?></label></td> - <td><?php $this->view->optionsHelper->render("delete-username", "username", $data['USER_NAMES'], ""); ?></td><td><button class="buttonbox" type="submit"><?php e(tl('manageusers_element_submit')); ?></button></td> + <tr><td><label for="delete-username"><?php + e(tl('manageusers_element_delete_username'))?></label></td> + <td><?php $this->view->optionsHelper->render( + "delete-username", "username", $data['USER_NAMES'], ""); + ?></td><td><button class="buttonbox" type="submit"><?php + e(tl('manageusers_element_submit')); ?></button></td> </tr> </table> </form> @@ -91,12 +106,16 @@ class ManageusersElement extends Element <h2><?php e(tl('manageusers_element_view_user_roles'))?></h2> <form id="viewUserRoleForm" method="get" action='' > <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageUsers" /> <input type="hidden" name="arg" value="viewuserroles" /> <table class="nametable"> - <tr><td><label for="select-user"><?php e(tl('manageusers_element_select_user'))?></label></td> - <td><?php $this->view->optionsHelper->render("select-user", "selectuser", $data['USER_NAMES'], $data['SELECT_USER']); ?></td></tr> + <tr><td><label for="select-user"><?php + e(tl('manageusers_element_select_user'))?></label></td> + <td><?php $this->view->optionsHelper->render("select-user", + "selectuser", $data['USER_NAMES'], $data['SELECT_USER']); + ?></td></tr> </table> </form> <?php @@ -105,14 +124,21 @@ class ManageusersElement extends Element ?> <form id="addUserRoleForm" method="get" action='' > <input type="hidden" name="c" value="admin" /> - <input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> <input type="hidden" name="a" value="manageUsers" /> <input type="hidden" name="arg" value="adduserrole" /> - <input type="hidden" name="selectuser" value="<?php e($data['SELECT_USER']); ?>" /> - <table summary="organizes the fields and columns of the view user role form" cellpadding="5px"> - <tr><td><label for="add-role"><?php e(tl('manageusers_element_add_role'))?></label></td> - <td><?php $this->view->optionsHelper->render("add-userrole", "selectrole", $data['AVAILABLE_ROLES'], $data['SELECT_ROLE']); ?></td> - <td><button class="buttonbox" type="submit"><?php e(tl('manageusers_element_submit')); ?></button></td></tr> + <input type="hidden" name="selectuser" value="<?php + e($data['SELECT_USER']); ?>" /> + <table summary="organizes the fields and columns of the + view user role form" cellpadding="5px"> + <tr><td><label for="add-role"><?php + e(tl('manageusers_element_add_role'))?></label></td> + <td><?php $this->view->optionsHelper->render("add-userrole", + "selectrole", $data['AVAILABLE_ROLES'], + $data['SELECT_ROLE']); ?></td> + <td><button class="buttonbox" type="submit"><?php + e(tl('manageusers_element_submit')); ?></button></td></tr> </table> </form> <?php @@ -120,8 +146,11 @@ class ManageusersElement extends Element ?> <table class="roletable" ><?php foreach($data['SELECT_ROLES'] as $role_array) { - echo "<tr><td>".$role_array['ROLE_NAME']."</td><td><a href='?c=admin&a=manageUsers&arg=deleteuserrole&selectrole=".$role_array['ROLE_ID']; - echo "&selectuser=".$data['SELECT_USER']."&YIOOP_TOKEN=".$data['YIOOP_TOKEN']."'>Delete</a></td>"; + echo "<tr><td>".$role_array['ROLE_NAME']. + "</td><td><a href='?c=admin&a=manageUsers". + "&arg=deleteuserrole&selectrole=".$role_array['ROLE_ID']; + echo "&selectuser=".$data['SELECT_USER']. + "&YIOOP_TOKEN=".$data['YIOOP_TOKEN']."'>Delete</a></td>"; } ?> </table> diff --git a/views/elements/signin_element.php b/views/elements/signin_element.php index 55c1b26dd..18419cfbe 100755 --- a/views/elements/signin_element.php +++ b/views/elements/signin_element.php @@ -54,15 +54,20 @@ class SigninElement extends Element ?> <div class="userNav" > <ul> - <li><a href="?c=settings"><?php e(tl('signin_element_settings')); ?></a></li> + <li><a href="?c=settings"><?php + e(tl('signin_element_settings')); ?></a></li> <?php if(!isset($_SESSION['USER_ID'])) { ?> - <li><a href="?c=admin"><?php e(tl('signin_element_signin')); ?></a></li> + <li><a href="?c=admin"><?php + e(tl('signin_element_signin')); ?></a></li> <?php } else { ?> - <li><a href="?c=admin&YIOOP_TOKEN=<?php e($data['YIOOP_TOKEN'])?>"><?php e(tl('signin_element_admin')); ?></a></li> - <li><a href="?c=search&a=signout"><?php e(tl('signin_element_signout')); ?></a></li> + <li><a href="?c=admin&YIOOP_TOKEN=<?php + e($data['YIOOP_TOKEN'])?>"><?php + e(tl('signin_element_admin')); ?></a></li> + <li><a href="?c=search&a=signout"><?php + e(tl('signin_element_signout')); ?></a></li> <?php } ?> diff --git a/views/helpers/options_helper.php b/views/helpers/options_helper.php index 37036a8bc..9a6a34a2a 100755 --- a/views/helpers/options_helper.php +++ b/views/helpers/options_helper.php @@ -68,8 +68,9 @@ class OptionsHelper extends Helper <?php foreach($options as $value => $text) { ?> - <option value="<?php e($value); ?>" - <?php if($value==$selected) { e('selected="selected"'); } ?>><?php e($text); ?></option> + <option value="<?php e($value); ?>" <?php + if($value==$selected) { e('selected="selected"'); } + ?>><?php e($text); ?></option> <?php } ?> diff --git a/views/helpers/pagination_helper.php b/views/helpers/pagination_helper.php index e209727de..1ab3224d7 100755 --- a/views/helpers/pagination_helper.php +++ b/views/helpers/pagination_helper.php @@ -48,21 +48,26 @@ require_once BASE_DIR."/views/helpers/helper.php"; class PaginationHelper extends Helper { /** - * The maixmum numbered links to pages to show besides the next and previous links + * The maixmum numbered links to pages to show besides the next and + * previous links * @var int */ const MAX_PAGES_TO_SHOW = 11; /** - * Draws a strip of links which begins with a previous - * link (if their are previous pages of links) followed by up to - * ten links to more search result page (if available) followed - * by a next set of pages link. + * Draws a strip of links which begins with a previous + * link (if their are previous pages of links) followed by up to + * ten links to more search result page (if available) followed + * by a next set of pages link. * - * @param string $base_url the url together with base query that the search was done on - * @param int $limit the number of the first link to display in the set of search results. - * @param int $results_per_page how many links are displayed on a given page of search results - * @param int $total_results the total number of search results for the current search term + * @param string $base_url the url together with base query that the + * search was done on + * @param int $limit the number of the first link to display in the + * set of search results. + * @param int $results_per_page how many links are displayed on a given + * page of search results + * @param int $total_results the total number of search results for the + * current search term */ public function render($base_url, $limit, $results_per_page, $total_results) { @@ -84,7 +89,9 @@ class PaginationHelper extends Helper echo "<div class='pagination'><ul>"; if(0 < $num_earlier_pages) { $prev_limit = ($num_earlier_pages - 1)*$results_per_page; - echo "<li><span class='end'>«<a href='$base_url&limit=$prev_limit'>".tl('pagination_helper_previous')."</a></span></li>"; + echo "<li><span class='end'>«". + "<a href='$base_url&limit=$prev_limit'>". + tl('pagination_helper_previous')."</a></span></li>"; } for($i=$first_page; $i < $last_page; $i++) { @@ -92,12 +99,15 @@ class PaginationHelper extends Helper echo "<li><span class='item'>$i</span></li>"; } else { $cur_limit = $i * $results_per_page; - echo "<li><a class='item' href='$base_url&limit=$cur_limit'>$i</a></li>"; + echo "<li><a class='item' href='$base_url". + "&limit=$cur_limit'>$i</a></li>"; } } if($num_earlier_pages < $total_pages - 1) { $next_limit = ($num_earlier_pages + 1)*$results_per_page; - echo "<li><span class='end'><a href='$base_url&limit=$next_limit'>".tl('pagination_helper_next')."</a>»</span></li>"; + echo "<li><span class='end'><a href='$base_url". + "&limit=$next_limit'>". + tl('pagination_helper_next')."</a>»</span></li>"; } echo "</ul></div>"; diff --git a/views/layouts/layout.php b/views/layouts/layout.php index efa3d07ff..af770b9d1 100755 --- a/views/layouts/layout.php +++ b/views/layouts/layout.php @@ -45,14 +45,14 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} class Layout { /** - * The view that is to be drawn on this layout - * @var object + * The view that is to be drawn on this layout + * @var object */ var $view; /** - * The constructor sets the view that will be drawn inside the - * Layout. + * The constructor sets the view that will be drawn inside the + * Layout. * */ public function __construct($v) @@ -61,13 +61,13 @@ class Layout } /** - * The render method of Layout and its subclasses is responsible for drawing the - * header of the document, calling the renderView method of the - * View that lives on the layout and then drawing the footer of - * the document. + * The render method of Layout and its subclasses is responsible for drawing + * the header of the document, calling the renderView method of the + * View that lives on the layout and then drawing the footer of + * the document. * - * @param array $data an array of data set up by the controller to be - * be used in drawing the Layout and its View. + * @param array $data an array of data set up by the controller to be + * be used in drawing the Layout and its View. */ public function render($data) { $this->view->renderView($data); diff --git a/views/layouts/web_layout.php b/views/layouts/web_layout.php index 8796c9af9..675b28a47 100755 --- a/views/layouts/web_layout.php +++ b/views/layouts/web_layout.php @@ -46,11 +46,11 @@ class WebLayout extends Layout { /** - * Responsible for drawing the header of the document containing - * Yioop! title and including basic.js. It calls the renderView method of the - * View that lives on the layout. If the QUERY_STATISTIC config setting is set, - * it output statistics about each query run on the database. Finally, it draws the footer of - * the document. + * Responsible for drawing the header of the document containing + * Yioop! title and including basic.js. It calls the renderView method of + * the View that lives on the layout. If the QUERY_STATISTIC config setting + * is set, it output statistics about each query run on the database. + * Finally, it draws the footer of the document. * * @param array $data an array of data set up by the controller to be * be used in drawing the WebLayout and its View. @@ -59,19 +59,22 @@ class WebLayout extends Layout ?> <!DOCTYPE html> - <html lang="<?php e($data['LOCALE_TAG']);?>" dir="<?php e($data['LOCALE_DIR']);?>"> + <html lang="<?php e($data['LOCALE_TAG']); + ?>" dir="<?php e($data['LOCALE_DIR']);?>"> <head> <title>Yioop!</title> <meta name="Author" content="Christopher Pollett" /> - <meta name="description" content="<?php e(tl('web_layout_description')); ?>" /> + <meta name="description" content="<?php + e(tl('web_layout_description')); ?>" /> <meta charset="utf-8" /> <link rel="shortcut icon" href="favicon.ico" /> <link rel="stylesheet" type="text/css" href="css/search.css" /> </head> - <body class="html-<?php e($data['BLOCK_PROGRESSION']);?> html-<?php e($data['LOCALE_DIR']);?> html-<?php e($data['WRITING_MODE']);?>"> + <body class="html-<?php e($data['BLOCK_PROGRESSION']);?> html-<?php + e($data['LOCALE_DIR']);?> html-<?php e($data['WRITING_MODE']);?>"> <div id="message" ></div> <?php $this->view->renderView($data); @@ -79,10 +82,14 @@ class WebLayout extends Layout <div id="query-statistics"> <?php e("<h1>".tl('web_layout_query_statistics')."</h1>"); - e("<b>".tl('web_layout_total_elapsed_time', $data['TOTAL_ELAPSED_TIME'])."</b>"); + e("<b>".tl('web_layout_total_elapsed_time', + $data['TOTAL_ELAPSED_TIME'])."</b>"); foreach($data['QUERY_STATISTICS'] as $query_info) { - e("<div class='query'><div>".$query_info['QUERY']."</div><div><b>". - tl('web_layout_query_time', $query_info['ELASPED_TIME'])."</b></div></div>"); + e("<div class='query'><div>".$query_info['QUERY']. + "</div><div><b>". + tl('web_layout_query_time', + $query_info['ELASPED_TIME']). + "</b></div></div>"); } ?> </div> diff --git a/views/nocache_view.php b/views/nocache_view.php index 9a5fdc93f..bf73c4593 100755 --- a/views/nocache_view.php +++ b/views/nocache_view.php @@ -49,7 +49,7 @@ class NocacheView extends View * @var string */ var $layout = "web"; - /** An array of names of element objects that the view uses to display itself + /** Names of element objects that the view uses to display itself * @var array */ var $elements = array("language"); diff --git a/views/search_view.php b/views/search_view.php index c93d7cd0c..7da32826c 100755 --- a/views/search_view.php +++ b/views/search_view.php @@ -37,7 +37,6 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} require_once BASE_DIR."/lib/crawl_constants.php"; /** - * * Web page used to present search results * It is also contains the search box for * people to types searches into @@ -49,11 +48,11 @@ require_once BASE_DIR."/lib/crawl_constants.php"; class SearchView extends View implements CrawlConstants { - /** An array of names of helper objects that the view uses to help draw itself + /** Names of helper objects that the view uses to help draw itself * @var array */ var $helpers = array("pagination", "filetype"); - /** An array of names of element objects that the view uses to display itself + /** Names of element objects that the view uses to display itself * @var array */ var $elements = array("signin"); @@ -79,25 +78,35 @@ class SearchView extends View implements CrawlConstants e('<div class="landing">'); } ?> - <h1 class="logo"><a href="."><img src="resources/yioop.png" alt="Yioop!" /></a></h1> + <h1 class="logo"><a href="."><img + src="resources/yioop.png" alt="Yioop!" /></a></h1> <div class="searchbox"> <form id="searchForm" method="get" action=''> <p> - <input type="text" title="<?php e(tl('search_view_input_label')); ?>" id="search-name" name="q" value="<?php if(isset($data['QUERY'])) {e(urldecode($data['QUERY']));} ?>" - placeholder="<?php e(tl('search_view_input_placeholder')); ?>" /> - <button class="buttonbox" type="submit"><?php e(tl('search_view_search')); ?></button> + <input type="text" title="<?php e(tl('search_view_input_label')); ?>" + id="search-name" name="q" value="<?php if(isset($data['QUERY'])) { + e(urldecode($data['QUERY']));} ?>" + placeholder="<?php e(tl('search_view_input_placeholder')); ?>" /> + <button class="buttonbox" type="submit"><?php + e(tl('search_view_search')); ?></button> </p> </form> </div> <?php if(!isset($data['PAGES'])) { ?> - <div class="landing-footer"><a href="http://www.seekquarry.com"><?php e(tl('search_view_developed_seek_quarry')); ?></a></div> + <div class="landing-footer"> + <a href="http://www.seekquarry.com/"><?php + e(tl('search_view_developed_seek_quarry')); ?></a></div> </div><?php } else { ?> - <h2><?php e(tl('search_view_query_results')); ?> (<?php e(tl('search_view_calculated', $data['ELAPSED_TIME']));?> <?php - e(tl('search_view_results', $data['LIMIT'], min($data['TOTAL_ROWS'], $data['LIMIT'] + $data['RESULTS_PER_PAGE']), $data['TOTAL_ROWS'])); + <h2><?php e(tl('search_view_query_results')); ?> (<?php + e(tl('search_view_calculated', $data['ELAPSED_TIME']));?> <?php + e(tl('search_view_results', $data['LIMIT'], + min($data['TOTAL_ROWS'], + $data['LIMIT'] + $data['RESULTS_PER_PAGE']), + $data['TOTAL_ROWS'])); ?> )</h2> <?php foreach($data['PAGES'] as $page) {?> @@ -105,7 +114,8 @@ class SearchView extends View implements CrawlConstants <h2> <a href="<?php e($page[self::URL]); ?>" ><?php if(isset($page[self::THUMB]) && $page[self::THUMB] != 'NULL') { - ?><img src="<?php e($page[self::THUMB]); ?>" alt="<?php e($page[self::TITLE]); ?>" /> <?php + ?><img src="<?php e($page[self::THUMB]); ?>" alt="<?php + e($page[self::TITLE]); ?>" /> <?php } else { echo $page[self::TITLE]; $this->filetypeHelper->render($page[self::TYPE]); @@ -113,24 +123,37 @@ class SearchView extends View implements CrawlConstants ?></a></h2> <p><?php echo $page[self::DESCRIPTION]; ?></p> - <p class="echolink" ><?php e($page[self::URL]." "); e(tl('search_view_rank', number_format($page[self::DOC_RANK], 2))); - e(tl('search_view_relevancy',number_format(1.25*floatval($page[self::SCORE]) - floatval($page[self::DOC_RANK]), 2) )); - e(tl('search_view_score', 1.25* $page[self::SCORE]));?> - <a href="?c=search&a=cache&q=<?php e($data['QUERY']); ?>&arg=<?php e(urlencode($page[self::URL])); - ?>&so=<?php e(urlencode($page[self::SUMMARY_OFFSET])); ?>" > + <p class="echolink" ><?php e($page[self::URL]." "); + e(tl('search_view_rank', + number_format($page[self::DOC_RANK], 2))); + e(tl('search_view_relevancy', + number_format(1.25*floatval($page[self::SCORE]) + - floatval($page[self::DOC_RANK]), 2) )); + e(tl('search_view_score', 1.25* $page[self::SCORE]));?> + <a href="?c=search&a=cache&q=<?php + e($data['QUERY']); ?>&arg=<?php + e(urlencode($page[self::URL])); + ?>&so=<?php + e(urlencode($page[self::SUMMARY_OFFSET])); ?>" > <?php - if($page[self::TYPE] == "text/html" || stristr($page[self::TYPE], "image")) { - e(tl('search_view_cache')); + if($page[self::TYPE] == "text/html" || + stristr($page[self::TYPE], "image")) { + e(tl('search_view_cache')); } else { e(tl('search_view_as_text')); } - ?></a>. <a href="?c=search&a=related&arg=<?php e(urlencode($page[self::URL])); ?>&so=<?php e(urlencode($page[self::SUMMARY_OFFSET])); ?>" ><?php e(tl('search_view_similar')); ?></a>.</p> + ?></a>. <a href="?c=search&a=related&arg=<?php + e(urlencode($page[self::URL])); ?>&so=<?php + e(urlencode($page[self::SUMMARY_OFFSET])); ?>" ><?php + e(tl('search_view_similar')); ?></a>.</p> </div> <?php } //end foreach - $this->paginationHelper->render($data['PAGING_QUERY'], $data['LIMIT'], $data['RESULTS_PER_PAGE'], $data['TOTAL_ROWS']); + $this->paginationHelper->render( + $data['PAGING_QUERY'], $data['LIMIT'], + $data['RESULTS_PER_PAGE'], $data['TOTAL_ROWS']); } } } diff --git a/views/settings_view.php b/views/settings_view.php index 6c2c48f33..08940ed91 100755 --- a/views/settings_view.php +++ b/views/settings_view.php @@ -50,42 +50,56 @@ class SettingsView extends View * @var string */ var $layout = "web"; - /** An array of names of element objects that the view uses to display itself - * @var array + /** + * Names of element objects that the view uses to display itself + * @var array */ var $elements = array("language"); - /** An array of names of helper objects that the view uses to help draw itself - * @var array + /** + * Names of helper objects that the view uses to help draw itself + * @var array */ var $helpers = array('options'); /** - * Draws the web page on which users can control their search settings. + * sDraws the web page on which users can control their search settings. * - * @param array $data contains anti CSRF token YIOOP_TOKEN, as well - * the language info and the current and possible per page settings + * @param array $data contains anti CSRF token YIOOP_TOKEN, as well + * the language info and the current and possible per page settings */ public function renderView($data) { ?> <div class="landing"> -<h1 class="logo"><a href="." ><img src="resources/yioop.png" alt="Yioop!" /></a><span> - <?php e(tl('settings_view_settings')); ?></span></h1> +<h1 class="logo"><a href="." ><img + src="resources/yioop.png" alt="Yioop!" /></a><span> - <?php + e(tl('settings_view_settings')); ?></span></h1> <div class="settings"> <form class="user_settings" method="get" action=""> <table> <tr> -<td class="table-label"><label for="per-page"><b><?php e(tl('settings_view_results_per_page')); ?></b></label></td><td class="table-input"> -<?php $this->optionsHelper->render("per-page", "perpage", $data['PER_PAGE'], $data['PER_PAGE_SELECTED']); ?> +<td class="table-label"><label for="per-page"><b><?php + e(tl('settings_view_results_per_page')); ?></b></label></td><td + class="table-input"><?php $this->optionsHelper->render( + "per-page", "perpage", $data['PER_PAGE'], $data['PER_PAGE_SELECTED']); ?> </td></tr> -<tr><td class="table-label"><label for="locale"><b><?php e(tl('settings_view_language_label')); ?></b></label></td><td class="table-input"> -<?php $this->languageElement->render($data); ?></td></tr> -<tr><td><input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /><button class="topmargin" type="submit" name="c" value="search"><?php e(tl('settings_view_return_yioop')); ?></button></td><td class="table-input"> -<button class="topmargin" type="submit" name="c" value="settings"><?php e(tl('settings_view_save')); ?></button> +<tr><td class="table-label"><label for="locale"><b><?php + e(tl('settings_view_language_label')); ?></b></label></td><td + class="table-input"><?php $this->languageElement->render($data); ?> +</td></tr> +<tr><td><input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /><button + class="topmargin" type="submit" name="c" value="search"><?php + e(tl('settings_view_return_yioop')); + ?></button></td><td class="table-input"> +<button class="topmargin" type="submit" name="c" value="settings"><?php + e(tl('settings_view_save')); ?></button> </td></tr> </table> </form> </div> <div class="setting-footer"><a - href="javascript:window.external.AddSearchProvider('<?php e(QUEUE_SERVER."yioopbar.xml");?>')"><?php + href="javascript:window.external.AddSearchProvider('<?php + e(QUEUE_SERVER."yioopbar.xml");?>')"><?php e(tl('setting_install_search_plugin')); ?></a>.</div> </div> diff --git a/views/signin_view.php b/views/signin_view.php index d414380f6..ca2af5e3e 100755 --- a/views/signin_view.php +++ b/views/signin_view.php @@ -59,28 +59,36 @@ class SigninView extends View public function renderView($data) { ?> <div class="landing"> - <h1 class="logo"><a href="."><img src="resources/yioop.png" alt="Yioop!" /></a><span> - <?php e(tl('signin_view_signin')); ?></span></h1> + <h1 class="logo"><a href="."><img src="resources/yioop.png" alt="Yioop!" + /></a><span> - <?php e(tl('signin_view_signin')); ?></span></h1> <form class="user_settings" method="post" action=""> <div class="login"> <table> <tr> - <td class="table-label" ><b><label for="username"><?php e(tl('signin_view_username')); ?></label>:</b></td><td class="table-input"><input - id="username" type="text" class="narrowfield" maxlength="80" name="u"/> + <td class="table-label" ><b><label for="username"><?php + e(tl('signin_view_username')); ?></label>:</b></td><td + class="table-input"><input id="username" type="text" + class="narrowfield" maxlength="80" name="u"/> </td><td></td></tr> <tr> - <td class="table-label" ><b><label for="password"><?php e(tl('signin_view_password')); ?></label>:</b></td><td class="table-input"><input - id="password" type="password" class="narrowfield" maxlength="80" name="p" /></td> - <td><input type="hidden" name="YIOOP_TOKEN" value="<?php e($data['YIOOP_TOKEN']); ?>" /> + <td class="table-label" ><b><label for="password"><?php + e(tl('signin_view_password')); ?></label>:</b></td><td + class="table-input"><input id="password" type="password" + class="narrowfield" maxlength="80" name="p" /></td> + <td><input type="hidden" name="YIOOP_TOKEN" value="<?php + e($data['YIOOP_TOKEN']); ?>" /> </td> </tr> <tr><td> </td><td class="center"> - <button type="submit" name="c" value="admin"><?php e(tl('signin_view_login')); ?></button> + <button type="submit" name="c" value="admin"><?php + e(tl('signin_view_login')); ?></button> </td><td> </td></tr> </table> </div> </form> -<div class="signin-exit"><a href="."><?php e(tl('signin_view_return_yioop')); ?></a></div> +<div class="signin-exit"><a href="."><?php + e(tl('signin_view_return_yioop')); ?></a></div> </div> <?php } diff --git a/views/view.php b/views/view.php index f41ba155f..c6550770a 100755 --- a/views/view.php +++ b/views/view.php @@ -43,18 +43,21 @@ if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();} */ abstract class View { - /** An array of names of element objects that the view uses to display itself - * @var array + /** + * Names of element objects that the view uses to display itself + * @var array */ var $elements = array(); - /** An array of names of helper objects that the view uses to help draw itself - * @var array + /** + * Names of helper objects that the view uses to help draw itself + * @var array */ var $helpers = array(); - /** An array of localized static page elements used by this view - * @var array + /** + * Localized static page elements used by this view + * @var array */ var $pages = array(); @@ -120,22 +123,24 @@ abstract class View } /** - * This method is responsible for drawing both the layout and the view. It should - * not be modified to change the display of then view. Instead, implement renderView. + * This method is responsible for drawing both the layout and the view. It + * should not be modified to change the display of then view. Instead, + * implement renderView. * - * @param array $data an array of values set up by a controller to be used in rendering - * the view + * @param array $data an array of values set up by a controller to be used + * in rendering the view */ function render($data) { $this->layout_object->render($data); } /** - * This abstract method is implemented in sub classes with code which actually draws - * the view. The current layouts render method calls this function. + * This abstract method is implemented in sub classes with code which + * actually draws the view. The current layouts render method calls this + * function. * - * @param array $data an array of values set up by a controller to be used in rendering - * the view + * @param array $data an array of values set up by a controller to be used + * in rendering the view */ abstract function renderView($data); }