diff --git a/src/configs/Config.php b/src/configs/Config.php index 49979cfad..08b9df413 100755 --- a/src/configs/Config.php +++ b/src/configs/Config.php @@ -367,7 +367,10 @@ if (MAINTENANCE_MODE && !empty($_SERVER["SERVER_ADDR"]) && exit(); } -/** */ +/** + * Work Directory (where Yioop stores stuff as it is running) to use during + * initial configuration and if the user doesn't opt for a custom location. + */ nsdefine('DEFAULT_WORK_DIRECTORY', PARENT_DIR . "/work_directory"); if (!nsdefined('WORK_DIRECTORY')) { @@ -394,11 +397,17 @@ nsdefine('FALLBACK_LOCALE_DIR', BASE_DIR . "/locale"); nsdefine('HASH_CAPTCHA', 2); /** Captcha mode indicating to use a classic image based captcha*/ nsdefine('IMAGE_CAPTCHA', 3); -/** */ +/** Account RECOVERY_MODE value to indicate no non admin account recovery */ nsdefine('NO_RECOVERY', 0); -/** */ +/** + * Account RECOVERY_MODE value to indicaterecovery via email address on file + * with Yioop + */ nsdefine('EMAIL_RECOVERY', 1); -/** */ +/** + * Account RECOVERY_MODE value to indicate recovery via email address on file + * with Yioop as well as correct answer to a set of previous chosen questions + */ nsdefine('EMAIL_AND_QUESTIONS_RECOVERY', 2); if (file_exists(WORK_DIRECTORY . PROFILE_FILE_NAME)) { if ((file_exists(WORK_DIRECTORY . "/locale/en-US") && @@ -583,8 +592,9 @@ nsdefine('PUBLIC_GROUP_ID', 2); nsdefine('PUBLIC_USER_ID', 2); /** ID of the group to which all Yioop Help Wiki articles belong */ nsdefine('HELP_GROUP_ID', 3); -/** ID of the group to search sidebar wiki pages and edited search results - belong. +/** + * ID of the group to search sidebar wiki pages and edited search results + * belong. */ nsconddefine('SEARCH_GROUP_ID', 4); /** Length of advertisement name string */ @@ -651,18 +661,21 @@ nsconddefine('CACHE_DNS_TIME', ONE_DAY); */ nsconddefine('SIZE_ROBOT_TXT_CACHE', 1000); /** - * + * crawl.ini robots_txt behavior value used to indicate that robots.txt + * files should always be followed */ nsdefine('ALWAYS_FOLLOW_ROBOTS', 1); /** - * + * crawl.ini robots_txt behavior value used to indicate that robots.txt + * files should always be followed except if the page is a landing page, in + * which case it is okay to crawl it. */ nsdefine('ALLOW_LANDING_ROBOTS', 2); /** - * + * crawl.ini robots_txt behavior value used to indicate that robot.txt files + * can be ignored and not followed. */ nsdefine('IGNORE_ROBOTS', 3); - /** * Whether the scheduler should track ETag and Expires headers. * If you want to turn this off set the variable to false in @@ -849,7 +862,7 @@ nsconddefine('CRASH_LOG_NAME', LOG_DIR . "/YioopCrashes.log"); nsconddefine('PROCESS_TIMEOUT', 15 * ONE_MINUTE); /** Number of seconds of no fetcher contact before crawl is deemed dead * The files C\CRAWL_DIR . "/schedules/{$this->channel}-crawl_status.txt" - is used to determine if CRAWL_TIMEOUT reached. + * is used to determine if CRAWL_TIMEOUT reached. * This is modified by QueueServer::writeAdminMessages only when * the crawl state (waiting/start crawl/ shutdown, etc) changes. * It is also updated when a fetcher sends an update command to @@ -907,11 +920,11 @@ nsconddefine('PUNCT', "\.|\,|\:|\;|\"|\'|\[|\/|\%|\?|-|\^" . /** Number of total description deemed title */ nsconddefine('AD_HOC_TITLE_LENGTH', 50); /** Maximum number of simultaneous crawls (each concurrent crawl gets one - channel -*/ + * channel + */ nsconddefine('MAX_CHANNELS', 10); /** Used to say number of bytes in histogram bar (stats page) for file - download sizes + * download sizes */ nsconddefine('DOWNLOAD_SIZE_INTERVAL', 5000); /** Used to say number of secs in histogram bar for file download times*/ @@ -923,8 +936,10 @@ nsconddefine('DOWNLOAD_TIME_INTERVAL', 0.5); nsconddefine('SEEN_URLS_BEFORE_UPDATE_SCHEDULER', MEMORY_PROFILE * 95); /** maximum number of urls to schedule to a given fetcher in one go */ nsconddefine('MAX_FETCH_SIZE', MEMORY_PROFILE * 1000); -/** maximum number url queue files to process in trying to create a - fetch batch from a tier queue*/ +/** + * maximum number url queue files to process in trying to create a + * fetch batch from a tier queue + */ nsconddefine('MAX_FILES_PROCESS_BATCH', 5); /** fetcher must wait at least this long between multi-curl requests */ nsconddefine('MINIMUM_FETCH_LOOP_TIME', 5); @@ -934,8 +949,10 @@ nsconddefine('FETCH_SLEEP_TIME', 10); nsconddefine('QUEUE_SLEEP_TIME', 5); /** How often mirror script tries to synchronize with machine it is mirroring*/ nsconddefine('MIRROR_SYNC_FREQUENCY', ONE_HOUR); -/** How often mirror script tries to notify machine it is mirroring that it -is still alive*/ +/** + * How often mirror script tries to notify machine it is mirroring that it + * is still alive + */ nsconddefine('MIRROR_NOTIFY_FREQUENCY', ONE_MINUTE); /** Max time before current index shard is rebuilt (queue_server) */ nsconddefine('FORCE_SAVE_TIME', 10 * ONE_MINUTE); @@ -1010,11 +1027,18 @@ nsdefine('SUSPENDED_STATUS', 3); */ nsdefine('INVITED_STATUS', 4); /** - * + * Yioop's messaging system is implemented using a Yioop group for each + * user called the users personal group. Messages between users + * correspond to a thread in this personal group's feed with the title + * PERSONAL_GROUP_PREFIX and then the user the chat is with. */ nsdefine('PERSONAL_GROUP_PREFIX', 'Personal$'); /** - * + * In Yioop each Wiki page has its own set of file/image/etc resources/assets. + * To move resource between pages, one can cut/copy them to a clipboard, + * then past to the new location from the clipboard. The clipboard is + * implemented as a wiki page in the user's personal group. The constant + * CLIPBOARD_PAGE_NAME is used to indicate how that page should be named. */ nsdefine('CLIPBOARD_PAGE_NAME', 'Clipboard'); /** diff --git a/src/controllers/components/SocialComponent.php b/src/controllers/components/SocialComponent.php index 23099bd43..defae9bcc 100644 --- a/src/controllers/components/SocialComponent.php +++ b/src/controllers/components/SocialComponent.php @@ -732,10 +732,13 @@ class SocialComponent extends Component implements CrawlConstants /** * Used to handle request related to usage statistics for groups * - * @param array &$data - * @param int $impression_type - * @param int $period - * @param int $item_id + * @param array &$data fields to be sent to the view with chart data + * @param int $impression_type what type $item_id is. Could have values: + * C\WIKI_IMPRESSION, C\THREAD_IMPRESSION, C\GROUP_IMPRESSION, + * C\QUERY_IMPRESSION + * @param int $period C\ONE_HOUR, C\ONE_DAY, etc that the chart is drawn + * should be for + * @param int $item_id id of group, wiki page, thread, chart should be for */ public function makeImpressionChart(&$data, $impression_type, $period, $item_id, $chart_name = "chart", $chart_id = "chart") @@ -2159,9 +2162,13 @@ class SocialComponent extends Component implements CrawlConstants return $data; } /** + * Contains the logic need to add a contact $data['CONTACT_ID'] to + * the list of $user_id's contacts for messaging. * - * @param int $user_id - * @param array $data + * @param int $user_id id of user adding contact to + * @param array $data current data to be sent to view after processing + * needs to contain a field $data['CONTACT_ID'] with the contact_id + * of user to add to contacts */ private function addContact($user_id, $data) { @@ -2228,7 +2235,18 @@ class SocialComponent extends Component implements CrawlConstants } } /** + * When a contact request is made the receiving user can either accept, + * ignore, or block. Accept means connection is made, ignore means + * the connection request is removed from the list of request, but the + * requestor could send a new request, and block means that a new request + * will automatically be discarded. This method implements the ignore + * connection request for a user with id $user_id from a user + * $data['CONTACT_ID']. * + * @param int $user_id id of user who is doing the ignoring + * @param array $data current data to be sent to view after processing + * needs to contain a field $data['CONTACT_ID'] with the contact_id + * of user to ignore */ private function ignoreContact($user_id, $data) { @@ -2262,7 +2280,18 @@ class SocialComponent extends Component implements CrawlConstants tl('social_component_connection_request_ignored')); } /** + * When a contact request is made the receiving user can either accept, + * ignore, or block. Accept means connection is made, ignore means + * the connection request is removed from the list of request, but the + * requestor could send a new request, and block means that a new request + * will automatically be discarded. This method implements the block + * connection request for a user with id $user_id from a user + * $data['CONTACT_ID']. * + * @param int $user_id id of user who is doing the blocking + * @param array $data current data to be sent to view after processing + * needs to contain a field $data['CONTACT_ID'] with the contact_id + * of user to block */ private function blockContact($user_id, $data) { @@ -2302,7 +2331,16 @@ class SocialComponent extends Component implements CrawlConstants tl('social_component_connection_request_blocked')); } /** + * Sends a message cleaned $_REQUEST["description"] from user with id + * $user_id to user with $data['CONTACT_ID'] + * + * Expects a message $_REQUEST["description"] coming from message form to + * to send. * + * @param int $user_id id of user who is sending the message + * @param array $data current data to be sent to view after processing + * needs to contain a field $data['CONTACT_ID'] with the contact_id + * of user to send to */ private function newMessage($user_id, $data) { diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php index db01daee9..bf8e25e85 100644 --- a/src/library/IndexDocumentBundle.php +++ b/src/library/IndexDocumentBundle.php @@ -224,10 +224,32 @@ class IndexDocumentBundle implements CrawlConstants if (!empty($description)) { $this->archive_info["DESCRIPTION"] = $description; } + $just_got_info = false; if(file_exists($archive_info_path)) { $this->archive_info = unserialize(file_get_contents( $archive_info_path)); - } else if (!$read_only_archive) { + $just_got_info = true; + } + if ($this->archive_info['VERSION'] < 3.1) { + $this->archive_info['RECORD_COMPRESSOR'] = + C\NS_COMPRESSORS . "GzipCompressor"; + } + $this->archive_info['RECORD_COMPRESSOR'] ??= + C\NS_COMPRESSORS . "NonCompressor"; + $record_compressor = $this->archive_info['RECORD_COMPRESSOR']; + $this->archive_info['BLOB_COMPRESSOR'] ??= + C\NS_COMPRESSORS . "GzipCompressor"; + $blob_compressor = $this->archive_info['BLOB_COMPRESSOR']; + if ($just_got_info && + empty($this->archive_info['BPLUS_BLOB_COMPRESSOR'])) { + $this->archive_info['BPLUS_BLOB_COMPRESSOR'] = + C\NS_COMPRESSORS . "GzipCompressor"; + } + $this->archive_info['BPLUS_BLOB_COMPRESSOR'] ??= + C\NS_COMPRESSORS . "NonCompressor"; + $bplus_blob_compressor = $this->archive_info['BPLUS_BLOB_COMPRESSOR']; + $blob_compressor = C\NS_COMPRESSORS . "GzipCompressor"; + if (!$read_only_archive && !$just_got_info) { file_put_contents($archive_info_path, serialize($this->archive_info)); } @@ -241,11 +263,6 @@ class IndexDocumentBundle implements CrawlConstants file_put_contents($next_partition_path, $this->next_partition_to_add); } - $record_compressor = C\NS_COMPRESSORS . "NonCompressor"; - if ($this->archive_info['VERSION'] < 3.1) { - $record_compressor = C\NS_COMPRESSORS . "GzipCompressor"; - } - $blob_compressor = C\NS_COMPRESSORS . "GzipCompressor"; $this->documents = new PartitionDocumentBundle($dir_name . "/" . self::DOCUMENTS_FOLDER, ["PRIMARY KEY" => [self::DOC_ID, self::DOCID_LEN], @@ -274,7 +291,7 @@ class IndexDocumentBundle implements CrawlConstants self::DICTIONARY_FOLDER, ["PRIMARY KEY" => ["TERM", 16], "PARTITION" => "INT", "NUM_DOCS" => "INT", "NUM_OCCURRENCES" => "INT", "POSTINGS" => "BLOB"], $max_keys, - $record_compressor, $blob_compressor); + $record_compressor, $bplus_blob_compressor); } /** * Add the array of $pages to the documents PartitionDocumentBundle diff --git a/src/library/index_bundle_iterators/DocIterator.php b/src/library/index_bundle_iterators/DocIterator.php index e4fe7eb02..7f0384c45 100755 --- a/src/library/index_bundle_iterators/DocIterator.php +++ b/src/library/index_bundle_iterators/DocIterator.php @@ -55,7 +55,10 @@ class DocIterator extends IndexBundleIterator */ public $index_name; /** - * + * The index version affects how the iterator cycles through documents + * There was a big change in index format between version 3 and prior + * formats + * @var int */ public $index_version; /** diff --git a/src/library/index_bundle_iterators/WordIterator.php b/src/library/index_bundle_iterators/WordIterator.php index 42b41a964..3b373e67f 100644 --- a/src/library/index_bundle_iterators/WordIterator.php +++ b/src/library/index_bundle_iterators/WordIterator.php @@ -51,6 +51,10 @@ use seekquarry\yioop\library\PartitionDocumentBundle; class WordIterator extends IndexBundleIterator { /** + * Weighting factor to multiply to make a doc-rank (approximate score of + * document based on its position in the index (when crawed)). + * This weight affects the amount doc_rank determines the overall score of + * a document. */ const DOC_RANK_WEIGHT = 50; /** @@ -109,7 +113,10 @@ class WordIterator extends IndexBundleIterator */ public $index_name; /** - * + * The index version affects how the iterator cycles through documents + * There was a big change in index format between version 3 and prior + * formats + * @var int */ public $index_version; /** @@ -346,7 +353,10 @@ class WordIterator extends IndexBundleIterator return $results; } /** - * + * Given the current_offset, result_per_block, and index used get the + * result_per_block postings starting from current_offset in the current + * direction (ascending or descending) for the term word iterator + * iterates over from the index. */ public function getPostingsSliceResults() { @@ -393,9 +403,13 @@ class WordIterator extends IndexBundleIterator return $key_postings; } /** + * Add to a set of postings from a partition scoring information, position + * list information and info about the relative weights of given position + * based on the position list file and doc_map file. * - * @param array $postings - * @param int $partition + * @param array $postings posting data to add scoring information to + * @param int $partition which partition from the PartitionDocumentBundle + * postings a re related to */ public function getDocKeyPositionsScoringInfo($postings, $partition) { @@ -640,7 +654,13 @@ class WordIterator extends IndexBundleIterator $this->current_block_fresh = false; } /** - * + * Computes a pair [posting_slice_offset, $doc_index], such that + * the $doc_index when shift to make a doc_offset is greater than + * $doc_offset and posting_slice_offset is the offset of the first + * posting with this property. + * @param int $doc_offset that we are try to find a posting whose + * doc_index has a bigger doc_offset + * @return array [posting_slice_offset, $doc_index] */ public function nextDocIndexOffsetPair($doc_offset) { diff --git a/src/models/GroupModel.php b/src/models/GroupModel.php index 2954b80de..1e18267cc 100644 --- a/src/models/GroupModel.php +++ b/src/models/GroupModel.php @@ -3638,7 +3638,10 @@ EOD; return false; } /** + * Get the names of the resources in the clipboard of $user_id * + * @param int $user_id of user we want to get clipboard resource names for + * @return array names of resources in clipboard */ public function getClipboardResourceNames($user_id) { @@ -3671,7 +3674,10 @@ EOD; return $resource_names; } /** + * Deletes the resources in the clipboard of user with id $user_id * + * @param int $user_id that we want to delete the contents of the clipboard + * for */ public function emptyClipFolder($user_id) { diff --git a/src/views/elements/ManageaccountElement.php b/src/views/elements/ManageaccountElement.php index 1ca1f2d21..3cbbe84e0 100755 --- a/src/views/elements/ManageaccountElement.php +++ b/src/views/elements/ManageaccountElement.php @@ -176,7 +176,11 @@ class ManageaccountElement extends Element <?php } /** + * Renders the form used to control account settings such as + * icon photo, email, password, etc. for a user * + * @param array $data has field with current values for these setting + * (except password) so that forms can be pre-populated. */ public function renderEditUser($data) { diff --git a/src/views/elements/ManagegroupsElement.php b/src/views/elements/ManagegroupsElement.php index cff3c1332..c6b45e868 100755 --- a/src/views/elements/ManagegroupsElement.php +++ b/src/views/elements/ManagegroupsElement.php @@ -116,7 +116,9 @@ class ManagegroupsElement extends Element } } /** + * Render the table of groups that a user could join. * + * @param array $data data from the controller used to draw this table */ public function renderJoinTable($data) { diff --git a/tests/BPlusTreeTest.php b/tests/BPlusTreeTest.php index d236311f7..d8e00a959 100644 --- a/tests/BPlusTreeTest.php +++ b/tests/BPlusTreeTest.php @@ -83,7 +83,9 @@ use seekquarry\yioop\library\UnitTest; { $table_dir = __DIR__ . self::TEST_DIR . $max_degree; $this->table_dirs[] = $table_dir; - return new L\BPlusTree($table_dir, $format, $max_degree); + return new L\BPlusTree($table_dir, $format, $max_degree, + C\NS_COMPRESSORS . "NonCompressor", + C\NS_COMPRESSORS . "NonCompressor"); } /** * Test putting items in bplustrees of odd sized nodes between 3 and 13 and