Fixes a NOTICE that was preventing fetcher restarts, continuing to add more documentation, a=chris

Chris Pollett [2017-01-13 01:Jan:th]

Fixes a NOTICE that was preventing fetcher restarts, continuing to add more documentation, a=chris

Filename
src/controllers/Controller.php
src/controllers/MachineController.php
src/library/PhraseParser.php
src/library/summarizers/CentroidWeightedSummarizer.php
src/library/summarizers/GraphBasedSummarizer.php
src/library/summarizers/Summarizer.php
src/locale/hi/resources/Tokenizer.php
src/locale/nl/resources/Tokenizer.php
src/models/AdvertisementModel.php
src/models/ImpressionModel.php
src/views/elements/WikiElement.php
src/views/helpers/HelpbuttonHelper.php

diff --git a/src/controllers/Controller.php b/src/controllers/Controller.php
index 4e8cb5b5c..e2377a523 100755
--- a/src/controllers/Controller.php
+++ b/src/controllers/Controller.php
@@ -925,12 +925,14 @@ abstract class Controller
         }
      }
     /**
-     * Fuzzifies the data to achieve differential privacy
+     * Adds to an integer, $actual_value, epsilon-noise taken from an
+     * L_1 gaussian source to centered at $actual_value to get a epsilon
+     * private, integre  value.
      *
-     * @param int $actual_value
-     * @return int $level_privacy
+     * @param int $actual_value number want to make private
+     * @return int $fuzzy_value number after noise added
      */
-    public function addDifferentialPrivacy($actual_value, $level_privacy = 1)
+    public function addDifferentialPrivacy($actual_value)
     {
         $sigma = 1 / C\PRIVACY_EPSILON;
         $max_value = (2 * $actual_value) + 1;
diff --git a/src/controllers/MachineController.php b/src/controllers/MachineController.php
index 6803a4875..3a8aabf28 100644
--- a/src/controllers/MachineController.php
+++ b/src/controllers/MachineController.php
@@ -148,8 +148,7 @@ class MachineController extends Controller implements CrawlConstants
             case 'RestartFetcher':
                 $error_log = C\CRASH_LOG_NAME;
                 $id = $_REQUEST['id'];
-                $msg = "Restarting Machine " . $machine["NAME"] .
-                    "Fetcher $id";
+                $msg = "Restarting Fetcher $id";
                 $time_string = date("r", time());
                 $out_msg = "[$time_string] $msg\n";
                 $lines = L\tail(C\LOG_DIR."/$id-Fetcher.log", 10);
diff --git a/src/library/PhraseParser.php b/src/library/PhraseParser.php
index 8eb4f8c72..12ddb6c21 100755
--- a/src/library/PhraseParser.php
+++ b/src/library/PhraseParser.php
@@ -327,6 +327,8 @@ class PhraseParser
      *
      * @param string $string to extract terms from
      * @param string $lang IANA tag to look up stemmer under
+     * @param boolean $extract_sentences whether to extract sentences to
+     *  be used by question answering system
      * @return array of terms and n word grams in the order they appeared in
      *     string
      */
diff --git a/src/library/summarizers/CentroidWeightedSummarizer.php b/src/library/summarizers/CentroidWeightedSummarizer.php
index dd9b7bdbd..4de3fd4d5 100644
--- a/src/library/summarizers/CentroidWeightedSummarizer.php
+++ b/src/library/summarizers/CentroidWeightedSummarizer.php
@@ -79,6 +79,8 @@ class CentroidWeightedSummarizer extends Summarizer
     /**
      * Generate a summary based on it closeness to the average sentence.
      * It also weights sentences based on the CMS that produced it.
+     *
+     * @param object $dom document object model of page to summarize
      * @param string $page complete raw page to generate the summary from.
      * @param string $lang language of the page to decide which stop words to
      *     call proper tokenizer.php of the specified language.
@@ -87,7 +89,6 @@ class CentroidWeightedSummarizer extends Summarizer
      */
     public static function getSummary($dom, $page, $lang)
     {
-        $raw_doc = $page;
         $page = self::pageProcessing($page);
         /* Format the document to remove characters other than periods and
            alphanumerics.
@@ -97,7 +98,7 @@ class CentroidWeightedSummarizer extends Summarizer
         /* Splitting into sentences */
         $out_sentences = self::getSentences($page);
         $sentences = self::removeStopWords($out_sentences, $stop_obj);
-        $sentence_array = self::splitSentences($sentences, $lang, $raw_doc);
+        $sentence_array = self::splitSentences($sentences, $lang);
         $terms = $sentence_array[0];
         $tf_per_sentence = $sentence_array[1];
         $tf_per_sentence_normalized = $sentence_array[2];
@@ -294,7 +295,6 @@ class CentroidWeightedSummarizer extends Summarizer
         $page = preg_replace("/\&\#\d{3}(\d?)\;|\&\w+\;/", " ", $page);
         $page = preg_replace("/\</", " <", $page);
         $page = strip_tags($page);
-
         if ($changed) {
             $page = preg_replace("/(\r?\n[\t| ]*){2}/", "\n", $page);
         }
@@ -303,14 +303,15 @@ class CentroidWeightedSummarizer extends Summarizer
         return $page;
     }
     /**
-     * Calculate the term frequencies.
+     * Calculates an array with key terms and values their frequencies
+     * based on a supplied sentence
+     *
      * @param array $terms the list of all terms in the doc
-     * @param array $sentences the sentences in the doc
-     * @param string $doc complete raw page to generate the summary from.
+     * @param array $sentence the sentences in the doc
      * @return array a two dimensional array where the word is the key and
      *      the frequency is the value
      */
-    public static function getTermFrequencies($terms, $sentence, $doc)
+    public static function getTermFrequencies($terms, $sentence)
     {
         $t = count($terms);
         $nk = [];
@@ -322,8 +323,7 @@ class CentroidWeightedSummarizer extends Summarizer
         }
         $term_frequencies = [];
         for ($i = 0; $i <  count($nk); $i++ ) {
-            //$additional_weight = self::getAdditionalWeight($terms[$i], $doc);
-            $term_frequencies[$terms[$i]] = $nk[$i];// + $additional_weight;
+            $term_frequencies[$terms[$i]] = $nk[$i];
         }
         return $term_frequencies;
     }
@@ -448,13 +448,11 @@ class CentroidWeightedSummarizer extends Summarizer
      * Split up the sentences and return an array with all of the needed parts
      * @param array $sentences the array of sentences to process
      * @param string $lang the current locale
-     * @param string $doc complete raw page to generate the summary from.
      * @return array an array with all of the needed parts
      */
-    public static function splitSentences($sentences, $lang, $doc)
+    public static function splitSentences($sentences, $lang)
     {
         $result = [];
-
         $terms = [];
         $tf_index = 0;
         $tf_per_sentence = [];
@@ -463,7 +461,7 @@ class CentroidWeightedSummarizer extends Summarizer
             $temp_terms = PhraseParser::segmentSegment($sentence, $lang);
             $terms = array_merge($terms, $temp_terms);
             $tf_per_sentence[$tf_index] =
-                self::getTermFrequencies($temp_terms, $sentence, $doc);
+                self::getTermFrequencies($temp_terms, $sentence);
             $tf_per_sentence_normalized[$tf_index] =
                 self::normalizeTermFrequencies($tf_per_sentence[$tf_index]);
             $tf_index++;
diff --git a/src/library/summarizers/GraphBasedSummarizer.php b/src/library/summarizers/GraphBasedSummarizer.php
index c90ed6751..22543817f 100644
--- a/src/library/summarizers/GraphBasedSummarizer.php
+++ b/src/library/summarizers/GraphBasedSummarizer.php
@@ -56,8 +56,13 @@ class GraphBasedSummarizer extends Summarizer
      */
     const OUTPUT_FILE_PATH = "/temp/graph_summarizer_result.txt";
     /**
-     * This is a graph based summarizer
+     * This summarizer uses a page rank -like algorithm to find the
+     * important sentences in a document. It them takes those sentences and
+     * compresses them to make a summary. The adjency matrix used at the
+     * start of the algorithm determine how close the ith sentence is to th
+     * jth sentence using a distortion measure
      *
+     * @param object $dom document object model of page to summarize
      * @param string $page complete raw page to generate the summary from.
      * @param string $lang language of the page to decide which stop words to
      *     call proper tokenizer.php of the specified language.
@@ -76,8 +81,7 @@ class GraphBasedSummarizer extends Summarizer
         $sentences = self::removePunctuation($sentences);
         $sentences = PhraseParser::stemTermsK($sentences, $lang, true);
         $terms = self::getTerms($sentences, $lang);
-        $term_frequencies = self::getTermFrequencies($terms, $sentences,
-            $unmodified_doc);
+        $term_frequencies = self::getTermFrequencies($terms, $sentences);
         $term_frequencies_normalized =
             self::normalizeTermFrequencies($term_frequencies);
         $adjacency = self::computeAdjacency($term_frequencies_normalized,
@@ -89,7 +93,10 @@ class GraphBasedSummarizer extends Summarizer
         return [$summary, []];
     }
     /**
-     * Get the summary from the sentences
+     * Given as array of sentences and an array of their importance between 0
+     * and 1, computes a summary based on compressing the most important
+     * sentences
+     *
      * @param array $sentences the sentences in the doc
      * @param array $p the sentence probabilities
      * @param string $lang language of the page to decide which stop words to
@@ -134,6 +141,7 @@ class GraphBasedSummarizer extends Summarizer
     }
     /**
      * Find the largest value in the array and return it
+     *
      * @param array $v the array to search for the largest value
      * @return double the largest value found in the array
      */
@@ -152,7 +160,8 @@ class GraphBasedSummarizer extends Summarizer
     }
     /**
      * Compute the sentence ranks using a version of the famous
-     * page ranking algorithm developed by the founder of Google.
+     * page ranking algorithm.
+     *
      * @param array $adjacency the adjacency matrix generated for the
      *      sentences
      * @return array the sentence ranks
@@ -173,7 +182,7 @@ class GraphBasedSummarizer extends Summarizer
     /**
      * Compute the difference of squares
      * @param array $v the  minuend vector
-     * @param array $m the subtrahend vector
+     * @param array $w the subtrahend vector
      * @result double the difference of the squares of vectors
      */
     public static function squareDiff($v, $w)
@@ -220,9 +229,10 @@ class GraphBasedSummarizer extends Summarizer
         for ($i = 0; $i < $n; $i++ ) {
             $result[$i][$i] = 0;
             for ($j = $i + 1; $j < $n; $j++ ) {
-                $result[$i][$j] = $result[$j][$i] =
+                $result[$i][$j] =
                     self::findDistortion($sentences[$i], $sentences[$j],
                     $term_frequencies_normalized, $lang, $doc);
+                $result[$j][$i] = $result[$i][$j];
             }
         }
         return $result;
@@ -263,11 +273,10 @@ class GraphBasedSummarizer extends Summarizer
      * Calculate the term frequencies.
      * @param array $terms the list of all terms in the doc
      * @param array $sentences the sentences in the doc
-     * @param string $doc complete raw page to generate the summary from.
      * @return array a two dimensional array where the word is the key and
      *      the frequency is the value
      */
-    public static function getTermFrequencies($terms, $sentences, $doc)
+    public static function getTermFrequencies($terms, $sentences)
     {
         $t = count($terms);
         $n = count($sentences);
@@ -360,7 +369,7 @@ class GraphBasedSummarizer extends Summarizer
         return $result;
     }
     /**
-     * Calcluate the distortion measure.
+     * Calculate the distortion measure.
      * 1. Check each word in sentence1 to see if it exists in sentence2.
      * If the word X of sentence1 does not exist in sentence2,
      * square the score of word X and add to the sum
@@ -373,7 +382,7 @@ class GraphBasedSummarizer extends Summarizer
      * with sentence1, in case the word Y is not in sentence1,
      * square the score of word Y and add to sum and increase
      * the number of not-common words by one.
-     * 4. At the end, calcualte the distortion between sentence1 and
+     * 4. At the end, calculate the distortion between sentence1 and
      * sentence2 by dividing sum by the number of not-common
      * words.
      * @param string $first_sentence the first sentence to compare
@@ -490,7 +499,6 @@ class GraphBasedSummarizer extends Summarizer
         $page = preg_replace("/\&\#\d{3}(\d?)\;|\&\w+\;/u", " ", $page);
         $page = preg_replace("/\</u", " <", $page);
         $page = strip_tags($page);
-
         if ($changed) {
             $page = preg_replace("/(\r?\n[\t| ]*){2}/u", "\n", $page);
         }
diff --git a/src/library/summarizers/Summarizer.php b/src/library/summarizers/Summarizer.php
index 258599cd9..e1adec1dd 100644
--- a/src/library/summarizers/Summarizer.php
+++ b/src/library/summarizers/Summarizer.php
@@ -61,7 +61,12 @@ class Summarizer
      */
     public static $CLASS_FIVE_WEIGHT = 0;
     /**
+     * Compute a summary of a document in a given language
      *
+     * @param object $dom document object model used to locate items for
+     *      summary
+     * @param string $page raw document sentences should be extracted from
+     * @param string $lang locale tag for language the summary is in
      */
     public static function getSummary($dom, $page, $lang) {
         throw \Exception("Not defined");
diff --git a/src/locale/hi/resources/Tokenizer.php b/src/locale/hi/resources/Tokenizer.php
index e18d285f5..4d90565ac 100755
--- a/src/locale/hi/resources/Tokenizer.php
+++ b/src/locale/hi/resources/Tokenizer.php
@@ -143,10 +143,12 @@ class Tokenizer
         return $word;
     }
     /**
-     * The method ttakes as input a phrase and returns a string with each
+     * The method takes as input a phrase and returns a string with each
      * term tagged with a part of speech.
      *
-     * @param string $phrase which is the input string to be tagged.
+     * @param string $phrase text to add parts speech tags to
+     * @param bool $with_tokens whether to include the terms and the tags
+     *      in the output string or just the part of speech tags
      * @return string $tagged_phrase which is a string of format term~pos
      */
     public static function tagPartsOfSpeechPhrase($phrase, $with_tokens = true)
@@ -282,10 +284,12 @@ class Tokenizer
         return $result;
     }
     /**
-     * This menthod is used to simplify the different tags of speech to a
+     * This method is used to simplify the different tags of speech to a
      * common form
      *
      * @param array $tagged_tokens which is an array of tokens assigned tags.
+     * @param bool $with_tokens whether to include the terms and the tags
+     *      in the output string or just the part of speech tags
      * @return string $tagged_phrase which is a string fo form token~pos
      */
     public static function taggedPartOfSpeechTokensToString($tagged_tokens,
diff --git a/src/locale/nl/resources/Tokenizer.php b/src/locale/nl/resources/Tokenizer.php
index e64ccc78c..83f65132c 100755
--- a/src/locale/nl/resources/Tokenizer.php
+++ b/src/locale/nl/resources/Tokenizer.php
@@ -372,7 +372,7 @@ class Tokenizer
      * Replace a string based on a regex expression
      *
      * @param string $word the string to search for regex replacement
-     * @param string $reges the regex to use to find and replacement
+     * @param string $regex the regex to use to find and replacement
      * @param string $replace the string to replace if the pattern is matched
      * @param int $offset the int to start to look for the regex replacement
      * @return string the string with the characters replaced if the regex
diff --git a/src/models/AdvertisementModel.php b/src/models/AdvertisementModel.php
index 50a56591b..d2c0a9644 100644
--- a/src/models/AdvertisementModel.php
+++ b/src/models/AdvertisementModel.php
@@ -170,7 +170,7 @@ class AdvertisementModel extends Model
     /**
      * Update an existing advertisement in the database
      *
-     * @param object $advertisement_advertisement to be updated
+     * @param object $advertisement advertisement to be updated
      * @param string $id an advertisement id
      */
     public function updateAdvertisement($advertisement, $id)
diff --git a/src/models/ImpressionModel.php b/src/models/ImpressionModel.php
index e38cdba7e..94d26edea 100644
--- a/src/models/ImpressionModel.php
+++ b/src/models/ImpressionModel.php
@@ -85,10 +85,7 @@ class ImpressionModel extends Model
      * model for this function, but didn't want to create a new model for
      * just this one method.
      *
-     * @param int $item_id id of particular item we are adding analytic
-     *      information of
-     * @param int $type_id type of particular item we are adding analytic
-     *      information of (group, wiki, thread, etc)
+     * @param int $query search query we are adding an impression for
      */
     public function addQueryImpression($query)
     {
@@ -98,7 +95,7 @@ class ImpressionModel extends Model
         $result = $db->execute($sql, [$query_hash]);
         $row = $db->fetchArray($result);
         if (empty($row['ID'])) {
-            $sql = "INSERT INTO QUERY_ITEM(QUERY_HASH, QUERY, CREATION)
+            $sql = "INSERT INTO QUERY_ITEM (QUERY_HASH, QUERY, CREATION)
                 VALUES (?, ?, ?)";
             $result = $db->execute($sql, [$query_hash, $query, time()]);
             $this->initWithDb(C\PUBLIC_USER_ID, $db->insertID("QUERY_ITEM"),
diff --git a/src/views/elements/WikiElement.php b/src/views/elements/WikiElement.php
index 4fafd07f6..9d5c8942d 100644
--- a/src/views/elements/WikiElement.php
+++ b/src/views/elements/WikiElement.php
@@ -1077,15 +1077,21 @@ class WikiElement extends Element implements CrawlConstants
         }
     }
     /**
-     * Used to render the dropdown for paths within media lists folders,
-     * recent wiki pages and groups a user has been to
+     * Used to render the dropdown that lists paths within media lists folders,
+     * recent wiki pages, and groups a user has been to
      *
+     * @param string $dropdown_id element id of select tag to be used for
+     *      dropdown
      * @param array $data set up in controller and SocialComponent with
      *      data fields view and this element are supposed to render
-     * @param string $folder_prefix url for root media list folder.
-     * @param string $root_name name of root media list folder (defaults
+     * @param array $options if nonempty, then this should be items, key-values
+     *      in the form (url => label), to list first in dropdown
+     * @param string $selected_url url which is selected by default in dropdown.
+     * @param string $top_name name of root media list folder (defaults
      *      to something like "Root Folder" in the language of current locale)
-     * @param string $render_type
+     * @param string $render_type can be: "paths" if jusdt listing folder path
+     *      in wiki page resource folder, "just_groups_and_pages" if want a list
+     *      of recent groups and wiki pages viewed, or "all" if want both
      */
     public function renderPath($dropdown_id, $data, $options,
         $selected_url = "", $top_name = "", $render_type = "paths")
diff --git a/src/views/helpers/HelpbuttonHelper.php b/src/views/helpers/HelpbuttonHelper.php
index 05d3812d9..bef540680 100644
--- a/src/views/helpers/HelpbuttonHelper.php
+++ b/src/views/helpers/HelpbuttonHelper.php
@@ -77,9 +77,8 @@ class HelpbuttonHelper extends Helper
      * This method is used to render the help button,
      * given a help point  CSRF token and target controller name.
      *
-     * @param  $help_point_id - used to set as help button id
-     * @param  $csrf_token_value - CSRF token to make api call/open edit link
-     * @param  $target_controller - target controller to remember the view.
+     * @param  $help_point_id used to set as help button id
+     * @param  $csrf_token_value  CSRF token to make api call/open edit link
      * @return String button html.
      */
     public function render($help_point_id, $csrf_token_value)

ViewGit