Add a return value for branch that didn't have one but needed one for crawl restarts, finished adding documentation for VersionManager, a=chris
Add a return value for branch that didn't have one but needed one for crawl restarts, finished adding documentation for VersionManager, a=chris
diff --git a/src/library/VersionManager.php b/src/library/VersionManager.php
index 7e1f9d435..4fcbf4752 100644
--- a/src/library/VersionManager.php
+++ b/src/library/VersionManager.php
@@ -487,6 +487,11 @@ class VersionManager
}
}
/**
+ * Restores the version of the repository that existed at a timestamp to
+ * the managed folder. Files currently in the managed folder before the
+ * restored but which exist in the HEAD version of the repository are
+ * removed from the managed folder (kept in repository).
+ *
* @param int $timestamp of version what to restore to
* @param bool $lock whether or not a lock should be obtained before
* carrying out the operation
@@ -532,6 +537,8 @@ class VersionManager
return self::SUCCESS;
}
/**
+ * Gets the most recent version timestamp of a version in the repository
+ * that is less than or equal to the searched for timestamp.
* @param int $timestamp want to find the version in the repository
* closest to, but not exceeding this value.
* @param bool $lock whether or not a lock should be obtained before
@@ -622,8 +629,13 @@ class VersionManager
return $max_timestamp;
}
/**
- * @param int $start_time
- * @param int $end_time
+ * Gets all the versions times that exist in the repository and which are
+ * between in time two values.
+ *
+ * @param int $start_time look for timestamps in repository above or equal
+ * this value
+ * @param int $end_timelook for timestamps in repository below or equal
+ * this value
* @param bool $lock whether or not a lock should be obtained before
* carrying out the operation
*/
@@ -668,7 +680,14 @@ class VersionManager
return $filtered_versions;
}
/**
- * @param string $hash_name
+ * Get the path in the repository archive that corresponds to the given
+ * hash name of an object that might be in the repository.
+ * Currently, the archive consists of two nested folders based on prefixes
+ * of objects stored in the repository, so this method calculates those
+ * prefixes and tacks them on to the archive path.
+ *
+ * @param string $hash_name to make a archive path for
+ * @return string path to $hash_name object in repository archive
*/
protected function getArchivePathHashName($hash_name)
{
@@ -678,10 +697,17 @@ class VersionManager
return $archive_path;
}
/**
+ * Gets the hash file name and path within the archive repository for
+ * a file name from the managed folder that existed at timestamp
+ *
* @param string $file name of file want to get the archive name and
* archive path for
- * @param int $timestamp
- * @param bool $make_path
+ * @param int $timestamp of version want to compute archive filename and
+ * path for
+ * @param bool $make_path whether to make folders (if they don't exists
+ * already) in the archive repository for the path calculated
+ * @return array [hash_name, archive_path] to use for file in the
+ * repository for object given that version timestamp
*/
protected function getArchiveFileAndPath($file, $timestamp,
$make_path = false)
@@ -702,8 +728,14 @@ class VersionManager
return [$hash_name, $archive_path];
}
/**
- * @param int $timestamp
- * @param bool $make_path
+ * Versions are stored in the version subfolder of the archive repository
+ * within a year folder within a day folder. Given a timestamp this
+ * function returns the path of the version folder it would correspond to
+ * @param int $timestamp to find version folder for
+ *
+ * @param bool $make_path whether to make folders (if they don't exists
+ * already) in the archive repository for the path calculated
+ * @return string path to version folder
*/
protected function getVersionPath($timestamp, $make_path = false)
{
@@ -722,11 +754,24 @@ class VersionManager
return $version_path;
}
/**
- * @param string $file name of file
- * @param int $timestamp
- * @param bool $get_nearest_version
- * @param string $path_so_far
- * @param string $hash_path_so_far
+ * Given a file or directory and a timestamp finds the path to that
+ * file in the repository by tranversing the repository and looking
+ * the hash names of folders subfolders in the repository.
+ * The timestamp that is lookedup might not be the timestamp of the file
+ * because when created a version that file might not have changed so
+ * its old info is copied into the new version. This is why a traversal
+ * might be needed.
+ *
+ * @param string $file name of file to get path for
+ * @param int $timestamp which version in repository want to get file
+ * for
+ * @param bool $get_nearest_version if true then if $timestamp doesn't
+ * exist as a version get the nearest version after $timestamp
+ * @param string $path_so_far path in managed folder that this recursive
+ * procedure has already traversed
+ * @param string $hash_path_so_far corresponding path to path_so_far but
+ * in the archive repository
+ * @return string path to file in the archive repository
*/
protected function getHashNamePath($file, $timestamp,
$get_nearest_version = false, $path_so_far = "", $hash_path_so_far = "")
@@ -780,7 +825,8 @@ class VersionManager
return $path_info;
}
/**
- *
+ * Delete all the files from the managed folder which exist in the HEAD
+ * version in the archive repository
*
* @param bool $lock whether or not a lock should be obtained before
* carrying out the operation
@@ -801,10 +847,15 @@ class VersionManager
return self::SUCCESS;
}
/**
- * @param string $file
- * @param string $target
- * @param int $timestamp
- * @param string $hash_name_type
+ * Copies to the target folder in the managed folder a file or
+ * directory that existed at a given timestamp in the archive repository
+ * @param string $file file name of file or direcotry to copy to managed
+ * folder
+ * @param string $target folder to copy to
+ * @param int $timestamp which archive version of the file to copy
+ * @param string $hash_name_type either f, l, or d depending on whether
+ * the object in the repository is known to be a file, link, or folder.
+ * If left empty then this is looked up in the repository
* @param bool $lock whether or not a lock should be obtained before
* carrying out the operation
*/
@@ -906,8 +957,10 @@ class VersionManager
/**
* Recursively traverse a directory structure and call a callback function
*
- * @param string $dir directory name
- * @param int $timestamp
+ * @param string $dir name of folder to delete
+ * @param int $timestamp only deletes if the file existed in the version
+ * given by the timestamp in the repository (by default this is
+ * the timestamp asscoaited with the HEAD version)
*/
protected function traverseUnlinkHead($dir, $timestamp = 0)
{
@@ -941,11 +994,12 @@ class VersionManager
return self::SUCCESS;
}
/**
- * This is a function used in the process of recursively deleting a
+ * This function is used in the process of recursively deleting a
* directory
*
* @param string $file_or_dir the filename or directory name to be deleted
- * @param int $timestamp
+ * @param int $timestamp only deletes if the file existed in the version
+ * given by the timestamp in the repository
*/
protected function deleteVersionFileOrFolder($file_or_dir, $timestamp = 0)
{
diff --git a/src/models/CrawlModel.php b/src/models/CrawlModel.php
index b37bb1655..4a40a2ee1 100755
--- a/src/models/CrawlModel.php
+++ b/src/models/CrawlModel.php
@@ -925,7 +925,7 @@ EOT;
serialize($crawl_time));
$this->execMachines("sendStartCrawlMessage",
$machine_urls, serialize($params), 0, true);
- return;
+ return true;
}
$statuses = CrawlDaemon::statuses();
if ($statuses == [] && $has_queue_server != -1) {
@@ -961,7 +961,7 @@ EOT;
}
/**
* Used to start QueueServers and Fetchers on current machine when
- * it is detected that someone tried to start a crawl bbut hadn't
+ * it is detected that someone tried to start a crawl but hadn't
* started any queue servers or fetchers.
*
* @param bool $has_queue_server whether current machine has a queue server