tweaks to try to speed up BPlusTree find operation, a=chris
tweaks to try to speed up BPlusTree find operation, a=chris
diff --git a/src/library/BPlusTree.php b/src/library/BPlusTree.php
index d5baddf52..682d3f78d 100644
--- a/src/library/BPlusTree.php
+++ b/src/library/BPlusTree.php
@@ -136,6 +136,7 @@ class BPlusTree
* Used to keep track of when this instance was created, as part of managing
* file handles expiration (could be set/updated externally to reflect
* some other instance using the BPlusTree)
+ * @var int
*/
public $instance_time;
/**
@@ -143,6 +144,23 @@ class BPlusTree
* @var string
*/
public $key_field;
+ /**
+ * Last folder path of a find operation, provided this was cacheable
+ * @var string
+ */
+ public $last_find_folder = null;
+ /**
+ * Last encoded key used for a find operation, provided this was cacheable
+ * Used to avoid recomputing path down tree if will be the same.
+ * @var string
+ */
+ public $last_find_key = null;
+ /**
+ * First key of next node after returned node for the last find operation,
+ * provided this was cacheable
+ * @var string
+ */
+ public $last_find_next_key = null;
/**
* Storage for root node of the B-Tree
* @var object
@@ -426,6 +444,9 @@ class BPlusTree
*/
public function splitRootNode()
{
+ $this->last_find_folder = null;
+ $this->last_find_key = null;
+ $this->last_find_next_key = null;
$folder = $this->folder;
$this->add_archive_cache = [null, "", -1];
$this->get_archive_cache = [null, "", -1];
@@ -479,6 +500,9 @@ class BPlusTree
*/
public function splitRecordsInLeaf($node_path, $node)
{
+ $this->last_find_folder = null;
+ $this->last_find_key = null;
+ $this->last_find_next_key = null;
$parent_folder = $this->getParentFolder($node_path);
$archive_prefix = self::ARCHIVE_PREFIX;
$temp_node_name = self::TEMP_NODE_NAME;
@@ -651,14 +675,22 @@ class BPlusTree
public function find($key, $is_encoded_key = false)
{
$encode_key = ($is_encoded_key) ? $key : rawurlencode($key);
+ if (!empty($this->last_find_folder) && !empty($this->last_find_key) &&
+ $this->last_find_key <= $encode_key
+ && !empty($this->last_find_next_key) &&
+ $encode_key < $this->last_find_next_key) {
+ return $this->last_find_folder;
+ }
$current_folder = $this->folder;
$cache = & $this->tree_path_cache;
$node_prefix = self::NODE_PREFIX;
$least_node_name = self::LEAST_NODE_NAME;
$node_prefix_and_key = self::NODE_PREFIX . $encode_key;
while (isset($cache[$current_folder]) || is_dir($current_folder)) {
+ $current_prefix = "$current_folder/$node_prefix";
+ $len_current_prefix = strlen($current_prefix);
if (!isset($cache[$current_folder])) {
- $cache[$current_folder] = glob("$current_folder/$node_prefix*");
+ $cache[$current_folder] = glob("$current_prefix*");
}
$nodes = $cache[$current_folder];
if (empty($nodes)) {
@@ -668,16 +700,45 @@ class BPlusTree
break;
}
$exact_node = "$current_folder/$node_prefix_and_key";
- $first = true;
- $next_node = "$current_folder/$least_node_name";
- foreach ($nodes as $node) {
- if (($first || $next_node < $node) &&
- $node <= $exact_node) {
- $first = false;
- $next_node = $node;
+ $least_node = "$current_folder/$least_node_name";
+ $first_index = 0;
+ $last_index = count($nodes) - 1;
+ $this->last_find_folder = null;
+ $this->last_find_key = null;
+ $this->last_find_next_key = null;
+ if ($exact_node < $nodes[$first_index]) {
+ $this->last_find_folder = $least_node;
+ $this->last_find_key = $encode_key;
+ $this->last_find_next_key = substr($nodes[$first_index],
+ $len_current_prefix);
+ $current_folder = $least_node;
+ } else if ($exact_node == $nodes[$first_index]) {
+ if (!empty($nodes[$first_index + 1])) {
+ $this->last_find_folder = $nodes[$first_index];
+ $this->last_find_key = $encode_key;
+ $this->last_find_next_key = substr($nodes[$first_index + 1],
+ $len_current_prefix);
}
+ $current_folder = $nodes[$first_index];
+ } else if ($nodes[$last_index] <= $exact_node) {
+ $current_folder = $nodes[$last_index];
+ } else {
+ while ($first_index < $last_index) {
+ $mid_index = ceil($first_index + $last_index);
+ if ($nodes[$mid_index] > $exact_node) {
+ $last_index = $mid_index - 1;
+ } else {
+ $first_index = $mid_index;
+ }
+ }
+ if (!empty($nodes[$first_index + 1])) {
+ $this->last_find_folder = $nodes[$first_index];
+ $this->last_find_key = $encode_key;
+ $this->last_find_next_key = substr($nodes[$first_index + 1],
+ $len_current_prefix);
+ }
+ $current_folder = $nodes[$first_index];
}
- $current_folder = $next_node;
}
$return_folder = null;
if ($nodes == $current_folder) {
@@ -688,6 +749,10 @@ class BPlusTree
}
$cache[$current_folder] = $current_folder;
$return_folder = $current_folder;
+ } else {
+ $this->last_find_folder = null;
+ $this->last_find_key = null;
+ $this->last_find_next_key = null;
}
return $return_folder;
}
@@ -702,7 +767,7 @@ class BPlusTree
{
$parent_folder = $this->getParentFolder($node_filename);
$len = strlen($parent_folder);
- $node_name = substr($node_filename, $len +1);
+ $node_name = substr($node_filename, $len + 1);
if ($node_name == self::LEAST_NODE_NAME) {
return $parent_folder. "/" . self::ARCHIVE_PREFIX .
self::LEAST_NODE_NAME;
diff --git a/src/locale/en_US/resources/Tokenizer.php b/src/locale/en_US/resources/Tokenizer.php
index f1e1acfb3..3c104d8b6 100755
--- a/src/locale/en_US/resources/Tokenizer.php
+++ b/src/locale/en_US/resources/Tokenizer.php
@@ -1293,6 +1293,9 @@ class Tokenizer
private static function stemPhrase($phrase)
{
$terms = mb_split("[[:space:]]", $phrase);
+ if (empty($terms)) {
+ return "";
+ }
$stemmed_phrase = "";
$space = "";
foreach ($terms as $term) {