diff --git a/src/controllers/Controller.php b/src/controllers/Controller.php index 4e8cb5b5c..e2377a523 100755 --- a/src/controllers/Controller.php +++ b/src/controllers/Controller.php @@ -925,12 +925,14 @@ abstract class Controller } } /** - * Fuzzifies the data to achieve differential privacy + * Adds to an integer, $actual_value, epsilon-noise taken from an + * L_1 gaussian source to centered at $actual_value to get a epsilon + * private, integre value. * - * @param int $actual_value - * @return int $level_privacy + * @param int $actual_value number want to make private + * @return int $fuzzy_value number after noise added */ - public function addDifferentialPrivacy($actual_value, $level_privacy = 1) + public function addDifferentialPrivacy($actual_value) { $sigma = 1 / C\PRIVACY_EPSILON; $max_value = (2 * $actual_value) + 1; diff --git a/src/controllers/MachineController.php b/src/controllers/MachineController.php index 6803a4875..3a8aabf28 100644 --- a/src/controllers/MachineController.php +++ b/src/controllers/MachineController.php @@ -148,8 +148,7 @@ class MachineController extends Controller implements CrawlConstants case 'RestartFetcher': $error_log = C\CRASH_LOG_NAME; $id = $_REQUEST['id']; - $msg = "Restarting Machine " . $machine["NAME"] . - "Fetcher $id"; + $msg = "Restarting Fetcher $id"; $time_string = date("r", time()); $out_msg = "[$time_string] $msg\n"; $lines = L\tail(C\LOG_DIR."/$id-Fetcher.log", 10); diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php index 8eb4f8c72..12ddb6c21 100755 --- a/src/library/PhraseParser.php +++ b/src/library/PhraseParser.php @@ -327,6 +327,8 @@ class PhraseParser * * @param string $string to extract terms from * @param string $lang IANA tag to look up stemmer under + * @param boolean $extract_sentences whether to extract sentences to + * be used by question answering system * @return array of terms and n word grams in the order they appeared in * string */ diff --git a/src/library/summarizers/CentroidWeightedSummarizer.php b/src/library/summarizers/CentroidWeightedSummarizer.php index dd9b7bdbd..4de3fd4d5 100644 --- a/src/library/summarizers/CentroidWeightedSummarizer.php +++ b/src/library/summarizers/CentroidWeightedSummarizer.php @@ -79,6 +79,8 @@ class CentroidWeightedSummarizer extends Summarizer /** * Generate a summary based on it closeness to the average sentence. * It also weights sentences based on the CMS that produced it. + * + * @param object $dom document object model of page to summarize * @param string $page complete raw page to generate the summary from. * @param string $lang language of the page to decide which stop words to * call proper tokenizer.php of the specified language. @@ -87,7 +89,6 @@ class CentroidWeightedSummarizer extends Summarizer */ public static function getSummary($dom, $page, $lang) { - $raw_doc = $page; $page = self::pageProcessing($page); /* Format the document to remove characters other than periods and alphanumerics. @@ -97,7 +98,7 @@ class CentroidWeightedSummarizer extends Summarizer /* Splitting into sentences */ $out_sentences = self::getSentences($page); $sentences = self::removeStopWords($out_sentences, $stop_obj); - $sentence_array = self::splitSentences($sentences, $lang, $raw_doc); + $sentence_array = self::splitSentences($sentences, $lang); $terms = $sentence_array[0]; $tf_per_sentence = $sentence_array[1]; $tf_per_sentence_normalized = $sentence_array[2]; @@ -294,7 +295,6 @@ class CentroidWeightedSummarizer extends Summarizer $page = preg_replace("/\&\#\d{3}(\d?)\;|\&\w+\;/", " ", $page); $page = preg_replace("/\</", " <", $page); $page = strip_tags($page); - if ($changed) { $page = preg_replace("/(\r?\n[\t| ]*){2}/", "\n", $page); } @@ -303,14 +303,15 @@ class CentroidWeightedSummarizer extends Summarizer return $page; } /** - * Calculate the term frequencies. + * Calculates an array with key terms and values their frequencies + * based on a supplied sentence + * * @param array $terms the list of all terms in the doc - * @param array $sentences the sentences in the doc - * @param string $doc complete raw page to generate the summary from. + * @param array $sentence the sentences in the doc * @return array a two dimensional array where the word is the key and * the frequency is the value */ - public static function getTermFrequencies($terms, $sentence, $doc) + public static function getTermFrequencies($terms, $sentence) { $t = count($terms); $nk = []; @@ -322,8 +323,7 @@ class CentroidWeightedSummarizer extends Summarizer } $term_frequencies = []; for ($i = 0; $i < count($nk); $i++ ) { - //$additional_weight = self::getAdditionalWeight($terms[$i], $doc); - $term_frequencies[$terms[$i]] = $nk[$i];// + $additional_weight; + $term_frequencies[$terms[$i]] = $nk[$i]; } return $term_frequencies; } @@ -448,13 +448,11 @@ class CentroidWeightedSummarizer extends Summarizer * Split up the sentences and return an array with all of the needed parts * @param array $sentences the array of sentences to process * @param string $lang the current locale - * @param string $doc complete raw page to generate the summary from. * @return array an array with all of the needed parts */ - public static function splitSentences($sentences, $lang, $doc) + public static function splitSentences($sentences, $lang) { $result = []; - $terms = []; $tf_index = 0; $tf_per_sentence = []; @@ -463,7 +461,7 @@ class CentroidWeightedSummarizer extends Summarizer $temp_terms = PhraseParser::segmentSegment($sentence, $lang); $terms = array_merge($terms, $temp_terms); $tf_per_sentence[$tf_index] = - self::getTermFrequencies($temp_terms, $sentence, $doc); + self::getTermFrequencies($temp_terms, $sentence); $tf_per_sentence_normalized[$tf_index] = self::normalizeTermFrequencies($tf_per_sentence[$tf_index]); $tf_index++; diff --git a/src/library/summarizers/GraphBasedSummarizer.php b/src/library/summarizers/GraphBasedSummarizer.php index c90ed6751..22543817f 100644 --- a/src/library/summarizers/GraphBasedSummarizer.php +++ b/src/library/summarizers/GraphBasedSummarizer.php @@ -56,8 +56,13 @@ class GraphBasedSummarizer extends Summarizer */ const OUTPUT_FILE_PATH = "/temp/graph_summarizer_result.txt"; /** - * This is a graph based summarizer + * This summarizer uses a page rank -like algorithm to find the + * important sentences in a document. It them takes those sentences and + * compresses them to make a summary. The adjency matrix used at the + * start of the algorithm determine how close the ith sentence is to th + * jth sentence using a distortion measure * + * @param object $dom document object model of page to summarize * @param string $page complete raw page to generate the summary from. * @param string $lang language of the page to decide which stop words to * call proper tokenizer.php of the specified language. @@ -76,8 +81,7 @@ class GraphBasedSummarizer extends Summarizer $sentences = self::removePunctuation($sentences); $sentences = PhraseParser::stemTermsK($sentences, $lang, true); $terms = self::getTerms($sentences, $lang); - $term_frequencies = self::getTermFrequencies($terms, $sentences, - $unmodified_doc); + $term_frequencies = self::getTermFrequencies($terms, $sentences); $term_frequencies_normalized = self::normalizeTermFrequencies($term_frequencies); $adjacency = self::computeAdjacency($term_frequencies_normalized, @@ -89,7 +93,10 @@ class GraphBasedSummarizer extends Summarizer return [$summary, []]; } /** - * Get the summary from the sentences + * Given as array of sentences and an array of their importance between 0 + * and 1, computes a summary based on compressing the most important + * sentences + * * @param array $sentences the sentences in the doc * @param array $p the sentence probabilities * @param string $lang language of the page to decide which stop words to @@ -134,6 +141,7 @@ class GraphBasedSummarizer extends Summarizer } /** * Find the largest value in the array and return it + * * @param array $v the array to search for the largest value * @return double the largest value found in the array */ @@ -152,7 +160,8 @@ class GraphBasedSummarizer extends Summarizer } /** * Compute the sentence ranks using a version of the famous - * page ranking algorithm developed by the founder of Google. + * page ranking algorithm. + * * @param array $adjacency the adjacency matrix generated for the * sentences * @return array the sentence ranks @@ -173,7 +182,7 @@ class GraphBasedSummarizer extends Summarizer /** * Compute the difference of squares * @param array $v the minuend vector - * @param array $m the subtrahend vector + * @param array $w the subtrahend vector * @result double the difference of the squares of vectors */ public static function squareDiff($v, $w) @@ -220,9 +229,10 @@ class GraphBasedSummarizer extends Summarizer for ($i = 0; $i < $n; $i++ ) { $result[$i][$i] = 0; for ($j = $i + 1; $j < $n; $j++ ) { - $result[$i][$j] = $result[$j][$i] = + $result[$i][$j] = self::findDistortion($sentences[$i], $sentences[$j], $term_frequencies_normalized, $lang, $doc); + $result[$j][$i] = $result[$i][$j]; } } return $result; @@ -263,11 +273,10 @@ class GraphBasedSummarizer extends Summarizer * Calculate the term frequencies. * @param array $terms the list of all terms in the doc * @param array $sentences the sentences in the doc - * @param string $doc complete raw page to generate the summary from. * @return array a two dimensional array where the word is the key and * the frequency is the value */ - public static function getTermFrequencies($terms, $sentences, $doc) + public static function getTermFrequencies($terms, $sentences) { $t = count($terms); $n = count($sentences); @@ -360,7 +369,7 @@ class GraphBasedSummarizer extends Summarizer return $result; } /** - * Calcluate the distortion measure. + * Calculate the distortion measure. * 1. Check each word in sentence1 to see if it exists in sentence2. * If the word X of sentence1 does not exist in sentence2, * square the score of word X and add to the sum @@ -373,7 +382,7 @@ class GraphBasedSummarizer extends Summarizer * with sentence1, in case the word Y is not in sentence1, * square the score of word Y and add to sum and increase * the number of not-common words by one. - * 4. At the end, calcualte the distortion between sentence1 and + * 4. At the end, calculate the distortion between sentence1 and * sentence2 by dividing sum by the number of not-common * words. * @param string $first_sentence the first sentence to compare @@ -490,7 +499,6 @@ class GraphBasedSummarizer extends Summarizer $page = preg_replace("/\&\#\d{3}(\d?)\;|\&\w+\;/u", " ", $page); $page = preg_replace("/\</u", " <", $page); $page = strip_tags($page); - if ($changed) { $page = preg_replace("/(\r?\n[\t| ]*){2}/u", "\n", $page); } diff --git a/src/library/summarizers/Summarizer.php b/src/library/summarizers/Summarizer.php index 258599cd9..e1adec1dd 100644 --- a/src/library/summarizers/Summarizer.php +++ b/src/library/summarizers/Summarizer.php @@ -61,7 +61,12 @@ class Summarizer */ public static $CLASS_FIVE_WEIGHT = 0; /** + * Compute a summary of a document in a given language * + * @param object $dom document object model used to locate items for + * summary + * @param string $page raw document sentences should be extracted from + * @param string $lang locale tag for language the summary is in */ public static function getSummary($dom, $page, $lang) { throw \Exception("Not defined"); diff --git a/src/locale/hi/resources/Tokenizer.php b/src/locale/hi/resources/Tokenizer.php index e18d285f5..4d90565ac 100755 --- a/src/locale/hi/resources/Tokenizer.php +++ b/src/locale/hi/resources/Tokenizer.php @@ -143,10 +143,12 @@ class Tokenizer return $word; } /** - * The method ttakes as input a phrase and returns a string with each + * The method takes as input a phrase and returns a string with each * term tagged with a part of speech. * - * @param string $phrase which is the input string to be tagged. + * @param string $phrase text to add parts speech tags to + * @param bool $with_tokens whether to include the terms and the tags + * in the output string or just the part of speech tags * @return string $tagged_phrase which is a string of format term~pos */ public static function tagPartsOfSpeechPhrase($phrase, $with_tokens = true) @@ -282,10 +284,12 @@ class Tokenizer return $result; } /** - * This menthod is used to simplify the different tags of speech to a + * This method is used to simplify the different tags of speech to a * common form * * @param array $tagged_tokens which is an array of tokens assigned tags. + * @param bool $with_tokens whether to include the terms and the tags + * in the output string or just the part of speech tags * @return string $tagged_phrase which is a string fo form token~pos */ public static function taggedPartOfSpeechTokensToString($tagged_tokens, diff --git a/src/locale/nl/resources/Tokenizer.php b/src/locale/nl/resources/Tokenizer.php index e64ccc78c..83f65132c 100755 --- a/src/locale/nl/resources/Tokenizer.php +++ b/src/locale/nl/resources/Tokenizer.php @@ -372,7 +372,7 @@ class Tokenizer * Replace a string based on a regex expression * * @param string $word the string to search for regex replacement - * @param string $reges the regex to use to find and replacement + * @param string $regex the regex to use to find and replacement * @param string $replace the string to replace if the pattern is matched * @param int $offset the int to start to look for the regex replacement * @return string the string with the characters replaced if the regex diff --git a/src/models/AdvertisementModel.php b/src/models/AdvertisementModel.php index 50a56591b..d2c0a9644 100644 --- a/src/models/AdvertisementModel.php +++ b/src/models/AdvertisementModel.php @@ -170,7 +170,7 @@ class AdvertisementModel extends Model /** * Update an existing advertisement in the database * - * @param object $advertisement_advertisement to be updated + * @param object $advertisement advertisement to be updated * @param string $id an advertisement id */ public function updateAdvertisement($advertisement, $id) diff --git a/src/models/ImpressionModel.php b/src/models/ImpressionModel.php index e38cdba7e..94d26edea 100644 --- a/src/models/ImpressionModel.php +++ b/src/models/ImpressionModel.php @@ -85,10 +85,7 @@ class ImpressionModel extends Model * model for this function, but didn't want to create a new model for * just this one method. * - * @param int $item_id id of particular item we are adding analytic - * information of - * @param int $type_id type of particular item we are adding analytic - * information of (group, wiki, thread, etc) + * @param int $query search query we are adding an impression for */ public function addQueryImpression($query) { @@ -98,7 +95,7 @@ class ImpressionModel extends Model $result = $db->execute($sql, [$query_hash]); $row = $db->fetchArray($result); if (empty($row['ID'])) { - $sql = "INSERT INTO QUERY_ITEM(QUERY_HASH, QUERY, CREATION) + $sql = "INSERT INTO QUERY_ITEM (QUERY_HASH, QUERY, CREATION) VALUES (?, ?, ?)"; $result = $db->execute($sql, [$query_hash, $query, time()]); $this->initWithDb(C\PUBLIC_USER_ID, $db->insertID("QUERY_ITEM"), diff --git a/src/views/elements/WikiElement.php b/src/views/elements/WikiElement.php index 4fafd07f6..9d5c8942d 100644 --- a/src/views/elements/WikiElement.php +++ b/src/views/elements/WikiElement.php @@ -1077,15 +1077,21 @@ class WikiElement extends Element implements CrawlConstants } } /** - * Used to render the dropdown for paths within media lists folders, - * recent wiki pages and groups a user has been to + * Used to render the dropdown that lists paths within media lists folders, + * recent wiki pages, and groups a user has been to * + * @param string $dropdown_id element id of select tag to be used for + * dropdown * @param array $data set up in controller and SocialComponent with * data fields view and this element are supposed to render - * @param string $folder_prefix url for root media list folder. - * @param string $root_name name of root media list folder (defaults + * @param array $options if nonempty, then this should be items, key-values + * in the form (url => label), to list first in dropdown + * @param string $selected_url url which is selected by default in dropdown. + * @param string $top_name name of root media list folder (defaults * to something like "Root Folder" in the language of current locale) - * @param string $render_type + * @param string $render_type can be: "paths" if jusdt listing folder path + * in wiki page resource folder, "just_groups_and_pages" if want a list + * of recent groups and wiki pages viewed, or "all" if want both */ public function renderPath($dropdown_id, $data, $options, $selected_url = "", $top_name = "", $render_type = "paths") diff --git a/src/views/helpers/HelpbuttonHelper.php b/src/views/helpers/HelpbuttonHelper.php index 05d3812d9..bef540680 100644 --- a/src/views/helpers/HelpbuttonHelper.php +++ b/src/views/helpers/HelpbuttonHelper.php @@ -77,9 +77,8 @@ class HelpbuttonHelper extends Helper * This method is used to render the help button, * given a help point CSRF token and target controller name. * - * @param $help_point_id - used to set as help button id - * @param $csrf_token_value - CSRF token to make api call/open edit link - * @param $target_controller - target controller to remember the view. + * @param $help_point_id used to set as help button id + * @param $csrf_token_value CSRF token to make api call/open edit link * @return String button html. */ public function render($help_point_id, $csrf_token_value)