Fix recommendation job memory issue using LRUCache, r=chris

Parth Patel [2022-12-10 21:Dec:th]
Fix recommendation job memory issue using LRUCache, r=chris

Signed-off-by: Chris Pollett <chris@pollett.org>
Filename
src/configs/Config.php
src/library/LRUCache.php
src/library/VersionFunctions.php
src/library/media_jobs/RecommendationJob.php
src/models/ProfileModel.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index b3d784ca8..306330d75 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -162,7 +162,7 @@ nsconddefine('GENERATOR_STRING', "Yioop");
  * Version number for upgrade database function
  * @var int
  */
-nsdefine('DATABASE_VERSION', 74);
+nsdefine('DATABASE_VERSION', 75);
 /**
  * Minimum Version fo Yioop for which keyword ad script
  * still works with this version
diff --git a/src/library/LRUCache.php b/src/library/LRUCache.php
new file mode 100644
index 000000000..befac4d9c
--- /dev/null
+++ b/src/library/LRUCache.php
@@ -0,0 +1,98 @@
+<?php
+/**
+ * SeekQuarry/Yioop --
+ * Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ * Copyright (C) 2009 - 2022  Chris Pollett chris@pollett.org
+ *
+ * LICENSE:
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * END LICENSE
+ *
+ * @author Parth Patel
+ * @license https://www.gnu.org/licenses/ GPL3
+ * @link https://www.seekquarry.com/
+ * @copyright 2009 - 2022
+ * @filesource
+ */
+namespace seekquarry\yioop\library;
+
+use seekquarry\yioop\library as L;
+
+/**
+ * Implements a least recently used cache
+ *
+ * @author Parth Patel
+ */
+class LRUCache
+{
+    /**
+     * An associative array that represent cache
+     * @var array
+     */
+    private $cache;
+    /**
+     * Size of the cache
+     * @var int
+     */
+    private $size;
+    /**
+     * Creates an empty cache and sets the size
+     *
+     * @param int $size size of the cache
+     */
+    public function __construct($size = 100)
+    {
+        $this->cache = [];
+        $this->size = $size;
+    }
+    /**
+     * Add or update a key with given value to the cache
+     *
+     * @param mixed $_key
+     * @param mixed $value
+     * @return mixed evicted key-value pair if any
+     */
+    public function put($key, $value)
+    {
+        if (array_key_exists($key, $this->cache)) {
+            unset($this->cache[$key]);
+            $this->cache = [$key => $value] + $this->cache;
+        } else {
+            if (count($this->cache) < $this->size) {
+                $this->cache = [$key => $value] + $this->cache;
+            } else {
+                $evicted_key = array_key_last($this->cache);
+                $evicted_value = $this->cache[$evicted_key];
+                unset($this->cache[$evicted_key]);
+                $this->cache = [$key => $value] + $this->cache;
+                return [$evicted_key, $evicted_value];
+            }
+        }
+    }
+    /**
+     * Returns the value for a given key if found in the cache
+     *
+     * @param mixed $key
+     * @return mixed $value if found
+     */
+    public function get($key)
+    {
+        if (array_key_exists($key, $this->cache)) {
+            return $this->cache[$key];
+        }
+    }
+}
diff --git a/src/library/VersionFunctions.php b/src/library/VersionFunctions.php
index aa982ced7..763a33d73 100644
--- a/src/library/VersionFunctions.php
+++ b/src/library/VersionFunctions.php
@@ -2059,3 +2059,16 @@ function upgradeDatabaseVersion74(&$db)
         "PAGE_ID $integer NOT NULL, RESOURCE_PATH VARCHAR(255), SCORE FLOAT, " .
         "TIMESTAMP NUMERIC(" . C\TIMESTAMP_LEN . "), RESOURCE_ID $integer)");
 }
+/**
+ * Upgrades a Version 74 version of the Yioop database to a Version 75 version
+ * @param object $db datasource to use to upgrade
+ */
+function upgradeDatabaseVersion75(&$db)
+{
+    $dbinfo = ["DBMS" => C\DBMS, "DB_HOST" => C\DB_HOST,
+        "DB_NAME" => C\DB_NAME, "DB_PASSWORD" => C\DB_PASSWORD];
+    $db->execute("CREATE INDEX GI_RECOMMENDATION_INDEX ON " .
+        "GROUP_ITEM_RECOMMENDATION (USER_ID, ITEM_TYPE)");
+    $db->execute("CREATE INDEX GR_RECOMMENDATION_INDEX ON " .
+        "GROUP_RESOURCE_RECOMMENDATION (USER_ID)");
+}
diff --git a/src/library/media_jobs/RecommendationJob.php b/src/library/media_jobs/RecommendationJob.php
index 8ff295a92..f772f88ac 100644
--- a/src/library/media_jobs/RecommendationJob.php
+++ b/src/library/media_jobs/RecommendationJob.php
@@ -34,6 +34,7 @@ namespace seekquarry\yioop\library\media_jobs;
 use seekquarry\yioop\configs as C;
 use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\LinearAlgebra as LinearAlgebra;
+use seekquarry\yioop\library\LRUCache as LRUCache;
 use seekquarry\yioop\library\PhraseParser as PhraseParser;
 use seekquarry\yioop\models\CronModel;

@@ -64,6 +65,10 @@ class RecommendationJob extends MediaJob
      * @var array
      */
     public $user_idf;
+    /**
+     * LRUCache for term embeddings
+     */
+    public $lru_cache;
     /**
      * Number of inserts to try to group into a single insert statement
      * before execution
@@ -102,6 +107,14 @@ class RecommendationJob extends MediaJob
     const DESCRIPTION_STOP_WORDS = ["author", "authors", "plot", "genre",
         "genres", "star", "stars", "credits", "rating", "ratings", "year",
         "director", "cast", "runtime"];
+    /**
+     * Hash algorithm to be used for calculating sign in Hash2Vec term embedding
+     */
+    const SIGN_HASH_ALGORITHM = "crc32";
+    /**
+     * MAX term embeddings fetched from database to initialize LRUCache
+     */
+    const MAX_TERM_EMBEDDINGS = 1000;
     /**
      * Sets up the database connection so can access tables related
      * to recommendations. Initialize timing info related to job.
@@ -116,6 +129,7 @@ class RecommendationJob extends MediaJob
         $db_class = C\NS_DATASOURCES . ucfirst(C\DBMS). "Manager";
         $this->db = new $db_class();
         $this->db->connect();
+        $this->size = self::EMBEDDING_VECTOR_SIZE;
     }
     /**
      * Only update if its been more than an hour since the last update
@@ -263,12 +277,14 @@ class RecommendationJob extends MediaJob
     public function computeThreadGroupRecommendations()
     {
         L\crawlLog("...Start computing Item Term Embeddings...");
-        [$term_embeddings, $item_terms] = $this->computeItemTermEmbeddings();
+        $item_terms = $this->computeItemTermEmbeddings();
         L\crawlLog("...Finished computing Item Term Embeddings...");
         L\crawlLog("...Start computing Item Embeddings...");
-        $item_embeddings = $this->computeItemEmbeddings(
-            $term_embeddings, $item_terms);
+        $item_embeddings = $this->computeItemEmbeddings($item_terms);
         L\crawlLog("...Finished computing Item Embeddings...");
+        L\crawlLog("...Start write back term embeddings from cache to db");
+        $this->saveTermEmbeddingsCacheToDb(C\THREAD_RECOMMENDATION);
+        L\crawlLog("...Finished write back term embeddings from cache to db");
         L\crawlLog("...Start computing Item User Embeddings...");
         [$item_user_embeddings, $user_items] = $this->
             computeItemUserEmbeddings($item_embeddings);
@@ -277,9 +293,12 @@ class RecommendationJob extends MediaJob
         $user_groups = $this->computeItemUserRecommendations($item_embeddings,
             $item_user_embeddings, $user_items);
         L\crawlLog("...Finished computing Item User Recommendations...");
+        unset($item_user_embeddings);
+        unset($user_items);
         L\crawlLog("...Start computing Group Embeddings...");
         $group_embeddings = $this->computeGroupEmbeddings($item_embeddings);
         L\crawlLog("...Finished computing Group Embeddings...");
+        unset($item_embedding);
         L\crawlLog("...Start computing Group User Embeddings...");
         [$group_user_embeddings, $user_group_impression] =
             $this->computeGroupUserEmbeddings($group_embeddings);
@@ -288,6 +307,10 @@ class RecommendationJob extends MediaJob
         $this->computeGroupUserRecommendations($group_embeddings,
             $group_user_embeddings, $user_groups, $user_group_impression);
         L\crawlLog("...Finished computing Group User Recommendations...");
+        unset($group_embeddings);
+        unset($group_user_embeddings);
+        unset($user_group_impression);
+        unset($user_groups);
     }
     /**
      * Computes the term embeddings for individual items (main thread only and
@@ -295,19 +318,19 @@ class RecommendationJob extends MediaJob
      * description text. Processes only MAX_GROUP_ITEMS which are either newly
      * created or recently edited
      *
-     * @return array [$term_embeddings, $item_terms] containing embeddings for
-     * terms in the items and terms in each item
+     * @return array $item_terms terms in each item
      */
     public function computeItemTermEmbeddings()
     {
         $db = $this->db;
+        $this->lru_cache = new LRUCache(self::MAX_TERM_EMBEDDINGS);
         $select_sql = "SELECT * FROM RECOMMENDATION_TERM_EMBEDDING WHERE" .
-            " ITEM_TYPE = ?";
+            " ITEM_TYPE = ?" . $db->limitOffset(self::MAX_TERM_EMBEDDINGS);
         $results = $db->execute($select_sql, [C\THREAD_RECOMMENDATION]);
         $term_embeddings = [];
         $item_terms = [];
         while ($row = $db->fetchArray($results)) {
-            $term_embeddings[$row['ID']] = unserialize($row['VECTOR']);
+            $this->lru_cache->put($row['ID'], unserialize($row['VECTOR']));
         }
         $context_distance_sum = (self::CONTEXT_WINDOW_LENGTH *
             (self::CONTEXT_WINDOW_LENGTH + 1)) / 2.0;
@@ -322,7 +345,6 @@ class RecommendationJob extends MediaJob
             " AND TITLE NOT LIKE '%Page%' ORDER BY EDIT_DATE DESC " .
             $db->limitOffset(self::MAX_GROUP_ITEMS);
         $results = $db->execute($group_item_sql);
-        $update_term_embeddings = [];
         while ($row = $db->fetchArray($results)) {
             $item_id = $row['ID'];
             $text_corpus = $row['TITLE'] . " " . $row['DESCRIPTION'];
@@ -332,55 +354,39 @@ class RecommendationJob extends MediaJob
             for ($i = 0; $i < count($terms); $i++) {
                 [$term_id, $term] = $terms[$i];
                 $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE;
-                if (!array_key_exists($term_id, $term_embeddings)) {
-                    $term_embeddings[$term_id] = array_fill(0,
-                        self::EMBEDDING_VECTOR_SIZE, 0);
-                }
+                $term_sign_hash = hash(self::SIGN_HASH_ALGORITHM, $term, true);
+                $term_sign = unpack('n', $term_sign_hash)[1] % 2 == 0 ? -1 : 1;
+                $term_embedding = $this->getTermEmbedding($term_id,
+                    C\THREAD_RECOMMENDATION);
+                $term_embedding = unpack("d$this->size", $term_embedding);
                 for ($j = $i - 1; $j >= 0 &&
                     $j >= $i - self::CONTEXT_WINDOW_LENGTH; $j--) {
                     [$context_term_id, $context_term] = $terms[$j];
+                    $context_term_embedding = $this->getTermEmbedding(
+                        $context_term_id, C\THREAD_RECOMMENDATION);
+                    $context_term_embedding = unpack("d$this->size",
+                        $context_term_embedding);
                     $weight = exp(-1 * pow(($i - $j) / $std_deviation, 2));
                     $context_term_hash = $context_term_id %
                         self::EMBEDDING_VECTOR_SIZE;
-                    $term_embeddings[$term_id][$context_term_hash] +=
-                        $weight;
-                    $term_embeddings[$context_term_id][$term_hash] +=
-                        $weight;
+                    $context_term_sign_hash = hash(self::SIGN_HASH_ALGORITHM,
+                        $context_term, true);
+                    $context_term_sign = unpack('n', $context_term_sign_hash)[1]
+                        % 2 == 0 ? -1 : 1;
+                    $term_embedding[$context_term_hash] +=
+                        $context_term_sign * $weight;
+                    $context_term_embedding[$term_hash] += $term_sign * $weight;
+                    $context_term_embedding = pack("d$this->size",
+                        ...$context_term_embedding);
+                    $this->updateTermEmbeddingCache($context_term_id,
+                        $context_term_embedding, C\THREAD_RECOMMENDATION);
                 }
+                $term_embedding = pack("d$this->size", ...$term_embedding);
+                $this->updateTermEmbeddingCache($term_id, $term_embedding,
+                    C\THREAD_RECOMMENDATION);
             }
         }
-        $normalized_term_embeddings = [];
-        foreach ($term_embeddings as $term_id => $embedding) {
-            $normalized_term_embeddings[$term_id] =
-                LinearAlgebra::normalize($embedding);
-        }
-        $delete_sql = "DELETE FROM RECOMMENDATION_TERM_EMBEDDING" .
-            " WHERE ITEM_TYPE = ?";
-        $db->execute($delete_sql, [C\THREAD_RECOMMENDATION]);
-        $base_insert_sql = "INSERT INTO RECOMMENDATION_TERM_EMBEDDING VALUES ";
-        $insert_sql = $base_insert_sql;
-        $comma = "";
-        $insert_count = 0;
-        $item_type = C\THREAD_RECOMMENDATION;
-        foreach ($normalized_term_embeddings as $term_id => $embedding) {
-            $serialized_embedding = serialize($embedding);
-            $insert_sql .= "$comma($term_id, $item_type," .
-                " '$serialized_embedding')";
-            $comma = ",";
-            $insert_count++;
-            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
-                $insert_sql = $db->insertIgnore($insert_sql);
-                $db->execute($insert_sql);
-                $insert_count = 0;
-                $comma = "";
-                $insert_sql = $base_insert_sql;
-            }
-        }
-        if ($insert_count > 0) {
-            $insert_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_sql);
-        }
-        return [$term_embeddings, $item_terms];
+        return $item_terms;
     }
     /**
      * Computes the item embeddings for individual items (main thread only and
@@ -388,23 +394,24 @@ class RecommendationJob extends MediaJob
      * Additionally fetches the existing item embeddings from database and
      * updates them if the term embeddings are updated for their terms
      *
-     * @param array $term_embeddings embedding for the terms
      * @param array $item_terms terms in each item
      * @return array $updated_item_embeddings containing embeddings for items
      */
-    public function computeItemEmbeddings($term_embeddings, $item_terms)
+    public function computeItemEmbeddings($item_terms)
     {
         $db = $this->db;
         $updated_item_embeddings = [];
         foreach ($item_terms as $item_id => [$terms, $group_id]) {
             $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0);
             foreach ($terms as [$term_id, $term]) {
-                if (array_key_exists($term_id, $term_embeddings)) {
-                    $item_embedding = LinearAlgebra::add($item_embedding,
-                        $term_embeddings[$term_id]);
-                }
+                $term_embedding = $this->getTermEmbedding($term_id,
+                    C\THREAD_RECOMMENDATION, true);
+                $term_embedding = unpack("d$this->size", $term_embedding);
+                $item_embedding = LinearAlgebra::add($item_embedding,
+                    $term_embedding);
             }
             $item_embedding = LinearAlgebra::normalize($item_embedding);
+            $item_embedding = pack("d$this->size", ...$item_embedding);
             $updated_item_embeddings[$item_id] = [$item_embedding, $group_id];
         }
         $base_delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING" .
@@ -417,9 +424,9 @@ class RecommendationJob extends MediaJob
         $item_type = C\THREAD_RECOMMENDATION;
         foreach ($updated_item_embeddings as
             $item_id => [$embedding, $parent_id]) {
-            $serialized_embedding = serialize($embedding);
-            $insert_sql .= "$comma($item_id, $item_type," .
-                " '$serialized_embedding', $parent_id)";
+            $embedding = serialize(unpack("d$this->size", $embedding));
+            $insert_sql .= "$comma($item_id, $item_type, " .
+                "'$embedding', $parent_id)";
             $delete_sql .= "$comma $item_id";
             $comma = ",";
             $insert_count++;
@@ -476,14 +483,17 @@ class RecommendationJob extends MediaJob
             $user_items[$user_id] = [];
             foreach ($item_ids as $item_id) {
                 if (array_key_exists($item_id, $item_embeddings)) {
-                    $item_user_embeddings[$user_id] = LinearAlgebra::add(
-                        $item_user_embeddings[$user_id],
+                    $item_embedding = unpack("d$this->size",
                         $item_embeddings[$item_id][0]);
+                    $item_user_embeddings[$user_id] = LinearAlgebra::add(
+                        $item_user_embeddings[$user_id], $item_embedding);
                     $user_items[$user_id][] = $item_id;
                 }
             }
             $item_user_embeddings[$user_id] = LinearAlgebra::normalize(
                 $item_user_embeddings[$user_id]);
+            $item_user_embeddings[$user_id] = pack("d$this->size",
+                ...$item_user_embeddings[$user_id]);
         }
         return [$item_user_embeddings, $user_items];
     }
@@ -516,21 +526,15 @@ class RecommendationJob extends MediaJob
         }
         $item_user_recommendations = [];
         foreach ($item_user_embeddings as $user_id => $embedding) {
+            $embedding = unpack("d$this->size", $embedding);
             if (array_key_exists($user_id, $user_groups)) {
-                $user_item = [];
-                if (array_key_exists($user_id, $user_items)) {
-                    $user_item = $user_items[$user_id];
-                }
-                $user_group = [];
-                if (array_key_exists($user_id, $user_groups)) {
-                    $user_group = $user_groups[$user_id];
-                }
                 foreach ($item_embeddings as
                     $item_id => [$item_embedding, $parent_id]) {
-                    if (in_array($item_id, $user_item) ||
-                        !in_array($parent_id, $user_group)) {
+                    if (in_array($item_id, $user_items[$user_id]) ||
+                        !in_array($parent_id, $user_groups[$user_id])) {
                         continue;
                     }
+                    $item_embedding = unpack("d$this->size", $item_embedding);
                     $similarity = LinearAlgebra::similarity(
                         $item_embedding, $embedding);
                     $item_user_recommendations[] = [$user_id,
@@ -547,8 +551,9 @@ class RecommendationJob extends MediaJob
         $insert_count = 0;
         $item_type = C\THREAD_RECOMMENDATION;
         foreach ($item_user_recommendations as $recommendation) {
-            $insert_sql .= "$comma({$recommendation[0]}, {$recommendation[1]}" .
-                ", $item_type, {$recommendation[2]}, {$this->update_time})";
+            [$user_id, $item_id, $similarity] = $recommendation;
+            $insert_sql .= "$comma($user_id, $item_id" .
+                ", $item_type, $similarity, {$this->update_time})";
             $comma = ",";
             $insert_count++;
             if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
@@ -579,15 +584,20 @@ class RecommendationJob extends MediaJob
         $updated_group_embeddings = [];
         foreach ($item_embeddings as $item_id => [$embedding, $parent_id]) {
             if (array_key_exists($parent_id, $updated_group_embeddings)) {
-                $updated_group_embeddings[$parent_id] = LinearAlgebra::add(
-                    $embedding, $updated_group_embeddings[$parent_id]);
+                $embedding = unpack("d$this->size", $embedding);
+                $group_embedding = unpack("d$this->size",
+                    $updated_group_embeddings[$parent_id]);
+                $updated_group_embeddings[$parent_id] = pack("d$this->size",
+                    ...LinearAlgebra::add($embedding, $group_embedding));
             } else {
                 $updated_group_embeddings[$parent_id] = $embedding;
             }
         }
         foreach ($updated_group_embeddings as $group_id => $embedding) {
+            $embedding = unpack("d$this->size", $embedding);
             $embedding = LinearAlgebra::normalize($embedding);
-            $updated_group_embeddings[$group_id] = $embedding;
+            $updated_group_embeddings[$group_id] = pack("d$this->size",
+                ...$embedding);
         }
         $base_delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING" .
             " WHERE ITEM_TYPE = ? AND ID IN (";
@@ -598,9 +608,9 @@ class RecommendationJob extends MediaJob
         $insert_count = 0;
         $item_type = C\GROUP_RECOMMENDATION;
         foreach ($updated_group_embeddings as $group_id => $embedding) {
-            $serialized_embedding = serialize($embedding);
-            $insert_sql .= "$comma($group_id, $item_type," .
-                " '$serialized_embedding', $group_id)";
+            $embedding = serialize(unpack("d$this->size", $embedding));
+            $insert_sql .= "$comma($group_id, $item_type, " .
+                "'$embedding', $group_id)";
             $delete_sql .= "$comma $group_id";
             $comma = ",";
             $insert_count++;
@@ -658,14 +668,15 @@ class RecommendationJob extends MediaJob
             $user_groups[$user_id] = [];
             foreach ($group_ids as $group_id) {
                 if (array_key_exists($group_id, $group_embeddings)) {
-                    $group_user_embeddings[$user_id] = LinearAlgebra::add(
-                        $group_user_embeddings[$user_id],
+                    $embedding = unpack("d$this->size",
                         $group_embeddings[$group_id]);
+                    $group_user_embeddings[$user_id] = LinearAlgebra::add(
+                        $group_user_embeddings[$user_id], $embedding);
                     $user_groups[$user_id][] = $group_id;
                 }
             }
-            $group_user_embeddings[$user_id] = LinearAlgebra::normalize(
-                $group_user_embeddings[$user_id]);
+            $group_user_embeddings[$user_id] = pack("d$this->size",
+                ...LinearAlgebra::normalize($group_user_embeddings[$user_id]));
         }
         return [$group_user_embeddings, $user_groups];
     }
@@ -693,14 +704,14 @@ class RecommendationJob extends MediaJob
         }
         $group_user_recommendations = [];
         foreach ($group_user_embeddings as $user_id => $embedding) {
-            $user_group = $user_groups[$user_id];
-            $impression_group = $user_group_impression[$user_id];
             foreach ($group_embeddings as $group_id => $group_embedding) {
                 if (in_array($group_id, $exclude_group_ids) ||
-                    in_array($group_id, $user_group) ||
-                    in_array($group_id, $impression_group)) {
+                    in_array($group_id, $user_groups[$user_id]) ||
+                    in_array($group_id, $user_group_impression[$user_id])) {
                     continue;
                 }
+                $embedding = unpack("d$this->size", $embedding);
+                $group_embedding = unpack("d$this->size", $group_embedding);
                 $similarity = LinearAlgebra::similarity($embedding,
                     $group_embedding);
                 $group_user_recommendations[] = [$user_id, $group_id,
@@ -716,8 +727,9 @@ class RecommendationJob extends MediaJob
         $insert_count = 0;
         $item_type = C\GROUP_RECOMMENDATION;
         foreach ($group_user_recommendations as $recommendation) {
-            $insert_sql .= "$comma({$recommendation[0]}, {$recommendation[1]}" .
-                ", $item_type, {$recommendation[2]}, {$this->update_time})";
+            [$user_id, $group_id, $similarity] = $recommendation;
+            $insert_sql .= "$comma($user_id, $group_id" .
+                ", $item_type, $similarity, {$this->update_time})";
             $comma = ",";
             $insert_count++;
             if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
@@ -747,13 +759,18 @@ class RecommendationJob extends MediaJob
         L\crawlLog("...Finished fetching descriptions for the wiki page " .
             "resources...");
         L\crawlLog("...Start computing wiki term embeddings...");
-        [$term_embeddings, $resource_terms, $meta_details_terms] =
+        [$resource_terms, $meta_details_terms] =
             $this->computeWikiTermEmbeddings($descriptions);
         L\crawlLog("...Finished computing wiki term embeddings...");
         L\crawlLog("...Start computing wiki resource embeddings...");
         $item_embeddings = $this->computeWikiResourceEmbeddings($resource_terms,
-            $meta_details_terms, $term_embeddings);
+            $meta_details_terms);
         L\crawlLog("...Finished computing wiki resource embeddings...");
+        unset($resource_terms);
+        unset($meta_details_terms);
+        L\crawlLog("...Start write back term embeddings from cache to db");
+        $this->saveTermEmbeddingsCacheToDb(C\RESOURCE_RECOMMENDATION);
+        L\crawlLog("...Finished write back term embeddings from cache to db");
         L\crawlLog("...Start computing wiki user embeddings...");
         [$user_embeddings, $user_items] = $this->computeWikiUserEmbeddings(
             $item_embeddings);
@@ -762,6 +779,10 @@ class RecommendationJob extends MediaJob
         $this->computeWikiUserRecommendations($item_embeddings,
             $user_embeddings, $user_items, $resource_metadata);
         L\crawlLog("...Done computing wiki resource recommendations...");
+        unset($user_embeddings);
+        unset($user_items);
+        unset($item_embeddings);
+        unset($resource_metadata);
     }
     /**
      * Fetches the description for the eligible wiki resources having the root
@@ -839,22 +860,19 @@ class RecommendationJob extends MediaJob
      * approach
      *
      * @param array $descriptions of resources
-     * @return array [$term_embeddings, $resource_terms, $meta_details_term]
-     * first with key being term id and value is the embedding vector for that
-     * term, second with key being resource id and value being array of clean
-     * terms in that resource description
+     * @return array [$resource_terms, $meta_details_term]
      */
     public function computeWikiTermEmbeddings($descriptions)
     {
         $db = $this->db;
+        $this->lru_cache = new LRUCache(self::MAX_TERM_EMBEDDINGS);
         $select_sql = "SELECT * FROM RECOMMENDATION_TERM_EMBEDDING WHERE" .
-            " ITEM_TYPE = ?";
+            " ITEM_TYPE = ?" . $db->limitOffset(self::MAX_TERM_EMBEDDINGS);
         $results = $db->execute($select_sql, [C\RESOURCE_RECOMMENDATION]);
-        $term_embeddings = [];
         $resource_terms = [];
         $meta_details_terms = [];
         while ($row = $db->fetchArray($results)) {
-            $term_embeddings[$row['ID']] = unserialize($row['VECTOR']);
+            $this->lru_cache->put($row['ID'], unserialize($row['VECTOR']));
         }
         $context_distance_sum = (self::CONTEXT_WINDOW_LENGTH *
             (self::CONTEXT_WINDOW_LENGTH + 1)) / 2.0;
@@ -885,51 +903,41 @@ class RecommendationJob extends MediaJob
                 for ($i = 0; $i < count($terms); $i++) {
                     [$term_id, $term] = $terms[$i];
                     $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE;
-                    if (!array_key_exists($term_id, $term_embeddings)) {
-                        $term_embeddings[$term_id] = array_fill(0,
-                            self::EMBEDDING_VECTOR_SIZE, 0);
-                    }
+                    $term_sign_hash = hash(self::SIGN_HASH_ALGORITHM,
+                        $term, true);
+                    $term_sign = unpack('n', $term_sign_hash)[1]
+                        % 2 == 0 ? -1 : 1;
+                    $term_embedding = $this->getTermEmbedding($term_id,
+                        C\RESOURCE_RECOMMENDATION);
+                    $term_embedding = unpack("d$this->size", $term_embedding);
                     for ($j = $i - 1; $j >= 0 &&
                         $j >= $i - self::CONTEXT_WINDOW_LENGTH; $j--) {
                         [$context_term_id, $context_term] = $terms[$j];
+                        $context_term_embedding = $this->getTermEmbedding(
+                            $context_term_id, C\RESOURCE_RECOMMENDATION);
+                        $context_term_embedding = unpack("d$this->size",
+                            $context_term_embedding);
                         $weight = exp(-1 * pow(($i - $j) / $std_deviation, 2));
                         $context_term_hash = $context_term_id %
                             self::EMBEDDING_VECTOR_SIZE;
-                        $term_embeddings[$term_id][$context_term_hash] +=
-                            $weight;
-                        $term_embeddings[$context_term_id][$term_hash] +=
-                            $weight;
+                        $context_term_sign_hash = hash(
+                            self::SIGN_HASH_ALGORITHM, $context_term, true);
+                        $context_term_sign = unpack('n',
+                            $context_term_sign_hash)[1] % 2 == 0 ? -1 : 1;
+                        $term_embedding[$context_term_hash] +=
+                            $context_term_sign * $weight;
+                            $context_term_embedding = pack("d$this->size",
+                            ...$context_term_embedding);
+                        $this->updateTermEmbeddingCache($context_term_id,
+                            $context_term_embedding, C\RESOURCE_RECOMMENDATION);
                     }
+                    $term_embedding = pack("d$this->size", ...$term_embedding);
+                    $this->updateTermEmbeddingCache($term_id, $term_embedding,
+                        C\RESOURCE_RECOMMENDATION);
                 }
             }
         }
-        $delete_sql = "DELETE FROM RECOMMENDATION_TERM_EMBEDDING WHERE" .
-            " ITEM_TYPE = ?";
-        $db->execute($delete_sql, [C\RESOURCE_RECOMMENDATION]);
-        $base_insert_sql = "INSERT INTO RECOMMENDATION_TERM_EMBEDDING VALUES ";
-        $insert_sql = $base_insert_sql;
-        $comma = "";
-        $insert_count = 0;
-        $item_type = C\RESOURCE_RECOMMENDATION;
-        foreach ($term_embeddings as $term_id => $embedding) {
-            $serialized_embedding = serialize($embedding);
-            $insert_sql .= "$comma($term_id, $item_type, " .
-                "'$serialized_embedding')";
-            $comma = ",";
-            $insert_count++;
-            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
-                $insert_sql = $db->insertIgnore($insert_sql);
-                $db->execute($insert_sql);
-                $insert_count = 0;
-                $comma = "";
-                $insert_sql = $base_insert_sql;
-            }
-        }
-        if ($insert_count > 0) {
-            $insert_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_sql);
-        }
-        return [$term_embeddings, $resource_terms, $meta_details_terms];
+        return [$resource_terms, $meta_details_terms];
     }
     /**
      * Split the given text into terms, clean the terms by removing non
@@ -977,40 +985,49 @@ class RecommendationJob extends MediaJob
      *
      * @param array $resource_terms of processed terms from resource description
      * @param array $meta_details_terms of raw resource descriptions
-     * @param array $term_embeddings of term embeddings
      * @return array $updated_item_embeddings array of updated wiki resource
      * embeddings
      */
     public function computeWikiResourceEmbeddings($resource_terms,
-        $meta_details_terms, $term_embeddings)
+        $meta_details_terms)
     {
         $db = $this->db;
         $updated_item_embeddings = [];
         foreach ($resource_terms as $resource_id => $terms) {
             $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0);
             foreach ($terms as [$term_id, $term]) {
-                if (array_key_exists($term_id, $term_embeddings)) {
-                    $item_embedding = LinearAlgebra::add($item_embedding,
-                        $term_embeddings[$term_id]);
-                }
+                $term_embedding = $this->getTermEmbedding($term_id,
+                    C\RESOURCE_RECOMMENDATION, true);
+                $term_embedding = unpack("d$this->size", $term_embedding);
+                $item_embedding = LinearAlgebra::add($item_embedding,
+                    $term_embedding);
             }
-            $updated_item_embeddings[$resource_id] = $item_embedding;
+            $updated_item_embeddings[$resource_id] = pack("d$this->size",
+                ...$item_embedding);
         }
         foreach ($meta_details_terms as $resource_id => $meta_terms) {
             if (!array_key_exists($resource_id, $updated_item_embeddings)) {
-                $updated_item_embeddings[$resource_id] = array_fill(0,
-                    self::EMBEDDING_VECTOR_SIZE, 0);
+                $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0);
+            } else {
+                $item_embedding = unpack("d$this->size",
+                    $updated_item_embeddings[$resource_id]);
             }
             foreach ($meta_terms as [$meta_term_id, $meta_term]) {
                 if (strlen($meta_term) <= 1) {
                     continue;
                 }
-                $updated_item_embeddings[$resource_id][$meta_term_id] += 1.;
+                $sign_hash = hash(self::SIGN_HASH_ALGORITHM, $meta_term, true);
+                $sign = unpack('n', $sign_hash)[1] % 2 == 0 ? -1 : 1;
+                $item_embedding[$meta_term_id%self::EMBEDDING_VECTOR_SIZE] +=
+                    $sign * 1.0;
             }
+            $updated_item_embeddings[$resource_id] = pack("d$this->size",
+                ...$item_embedding);
         }
         foreach ($updated_item_embeddings as $item_id => $embedding) {
-            $updated_item_embeddings[$item_id] = LinearAlgebra::normalize(
-                $embedding);
+            $embedding = unpack("d$this->size", $embedding);
+            $updated_item_embeddings[$item_id] = pack("d$this->size",
+                ...LinearAlgebra::normalize($embedding));
         }
         $delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING WHERE" .
             " ITEM_TYPE = ?";
@@ -1021,9 +1038,9 @@ class RecommendationJob extends MediaJob
         $insert_count = 0;
         $item_type = C\RESOURCE_RECOMMENDATION;
         foreach ($updated_item_embeddings as $resource_id => $embedding) {
-            $serialized_embedding = serialize($embedding);
+            $embedding = serialize(unpack("d$this->size", $embedding));
             $insert_sql .= "$comma($resource_id, $item_type," .
-                " '$serialized_embedding', $resource_id)";
+                " '$embedding', $resource_id)";
             $comma = ",";
             $insert_count++;
             if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
@@ -1075,13 +1092,16 @@ class RecommendationJob extends MediaJob
             $user_items[$user_id] = [];
             foreach ($item_ids as $item_id) {
                 if (array_key_exists($item_id, $item_embeddings)) {
+                    $embedding = unpack("d$this->size",
+                        $item_embeddings[$item_id]);
                     $user_embeddings[$user_id] = LinearAlgebra::add(
-                        $user_embeddings[$user_id], $item_embeddings[$item_id]);
+                        $user_embeddings[$user_id], $embedding);
                     $user_items[$user_id][] = $item_id;
                 }
             }
-            $user_embeddings[$user_id] = LinearAlgebra::normalize(
-                $user_embeddings[$user_id]);
+            $user_embeddings[$user_id] = pack("d$this->size",
+                ...LinearAlgebra::normalize(
+                $user_embeddings[$user_id]));
         }
         return [$user_embeddings, $user_items];
     }
@@ -1100,20 +1120,18 @@ class RecommendationJob extends MediaJob
         $db = $this->db;
         $recommendations = [];
         foreach ($user_embeddings as $user_id => $user_embedding) {
-            if (array_key_exists($user_id, $user_items)) {
-                $user_item = $user_items[$user_id];
-            } else {
-                $user_item = [];
-            }
+            $user_embedding = unpack("d$this->size", $user_embedding);
             foreach ($item_embeddings as $item_id => $item_embedding) {
-                if (in_array($item_id, $user_item) ||
+                if (in_array($item_id, $user_items[$user_id]) ||
                     !array_key_exists($item_id, $resource_metadata)) {
                     continue;
                 }
+                $item_embedding = unpack("d$this->size", $item_embedding);
                 $similarity = LinearAlgebra::similarity($user_embedding,
                     $item_embedding);
                 list($group_id, $page_id, $resource_path) =
                     $resource_metadata[$item_id];
+                unset($resource_metadata[$item_id]);
                 $recommendations[] = [$user_id, $group_id, $page_id,
                     $resource_path, $similarity, $item_id];
             }
@@ -1146,4 +1164,95 @@ class RecommendationJob extends MediaJob
             $db->execute($insert_sql);
         }
     }
+    /**
+     * Returns the term embedding either from LRU cache or database
+     *
+     * @param int $term_id
+     * @param int $item_type
+     * @param boolean $update indicates whether to update the cache
+     * @return string $term_embedding
+     */
+    public function getTermEmbedding($term_id, $item_type, $update = false)
+    {
+        $db = $this->db;
+        $term_embedding = $this->lru_cache->get($term_id);
+        if (!isset($term_embedding)) {
+            $sql = "SELECT VECTOR FROM RECOMMENDATION_TERM_EMBEDDING " .
+                "WHERE ITEM_TYPE = ? AND ID = ?";
+            $result = $db->execute($sql, [$item_type, $term_id]);
+            $row = $db->fetchArray($result);
+            if (!$row) {
+                $term_embedding = pack("d$this->size", ...array_fill(0,
+                    self::EMBEDDING_VECTOR_SIZE, 0.0));
+            } else {
+                $term_embedding = unserialize($row['VECTOR']);
+            }
+        }
+        if ($update) {
+            $this->updateTermEmbeddingCache($term_id, $term_embedding,
+                $item_type);
+        }
+        return $term_embedding;
+    }
+    /**
+     * Updates LRU cache of term embeddings and save the evicted
+     * embedding back to database
+     *
+     * @param int $term_id
+     * @param string $term_embedding
+     * @param int $item_type
+     */
+    public function updateTermEmbeddingCache($term_id, $term_embedding,
+        $item_type)
+    {
+        $db = $this->db;
+        $evicted_item = $this->lru_cache->put($term_id, $term_embedding);
+        if (isset($evicted_item)) {
+            $sql = "DELETE FROM RECOMMENDATION_TERM_EMBEDDING " .
+                "WHERE ITEM_TYPE = ? AND ID = ?";
+            $db->execute($sql, [$item_type, $evicted_item[0]]);
+            $sql = "INSERT INTO RECOMMENDATION_TERM_EMBEDDING VALUES (?, ?, ?)";
+            $db->execute($sql, [$evicted_item[0], $item_type,
+                serialize($evicted_item[1])]);
+        }
+    }
+    /**
+     * Writes back the term embeddings in cache to database and free up memory
+     *
+     * @param int $item_type value for ITEM_TYPE column
+     */
+    public function saveTermEmbeddingsCacheToDb($item_type)
+    {
+        $base_delete_sql = "DELETE FROM RECOMMENDATION_TERM_EMBEDDING" .
+            " WHERE ITEM_TYPE = ? AND ID IN (";
+        $delete_sql = $base_delete_sql;
+        $base_insert_sql = "INSERT INTO RECOMMENDATION_TERM_EMBEDDING VALUES ";
+        $insert_sql = $base_insert_sql;
+        $comma = "";
+        $insert_count = 0;
+        foreach ($this->lru_cache as $id => $embedding) {
+            $embedding = serialize($embedding);
+            $insert_sql .= "$comma($id, $item_type, $embedding)";
+            $delete_sql .= "$comma $id";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $delete_sql .= ")";
+                $db->execute($delete_sql, [$item_type]);
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
+                $insert_count = 0;
+                $comma = "";
+                $delete_sql = $base_delete_sql;
+                $insert_sql = $base_insert_sql;
+            }
+        }
+        if ($insert_count > 0) {
+            $delete_sql .= ")";
+            $db->execute($delete_sql, [$item_type]);
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
+        }
+        unset($this->lru_cache);
+    }
 }
diff --git a/src/models/ProfileModel.php b/src/models/ProfileModel.php
index 2b0f0d3ce..55a320906 100755
--- a/src/models/ProfileModel.php
+++ b/src/models/ProfileModel.php
@@ -215,6 +215,8 @@ class ProfileModel extends Model
                 GROUP_ITEM_RECOMMENDATION (USER_ID $integer NOT NULL,
                 ITEM_ID $integer NOT NULL, ITEM_TYPE $integer NOT NULL,
                 SCORE FLOAT, TIMESTAMP NUMERIC(" . C\TIMESTAMP_LEN . "))",
+            "GI_RECOMMENDATION_INDEX" => "CREATE INDEX GI_RECOMMENDATION_INDEX
+                 ON GROUP_ITEM_RECOMMENDATION (USER_ID, ITEM_TYPE)",
             "GROUP_PAGE" => "CREATE TABLE GROUP_PAGE (
                 ID $serial PRIMARY KEY $auto_increment, GROUP_ID $integer,
                 DISCUSS_THREAD $integer, TITLE VARCHAR(" . C\TITLE_LEN . "),
@@ -247,6 +249,8 @@ class ProfileModel extends Model
                 GROUP_ID $integer NOT NULL, PAGE_ID $integer NOT NULL,
                 RESOURCE_PATH VARCHAR(255), SCORE FLOAT, TIMESTAMP NUMERIC(" .
                 C\TIMESTAMP_LEN . "), RESOURCE_ID $integer)",
+            "GR_RECOMMENDATION_INDEX" => "CREATE INDEX GR_RECOMMENDATION_INDEX
+                ON GROUP_RESOURCE_RECOMMENDATION (USER_ID)",
             "SOCIAL_GROUPS" => "CREATE TABLE SOCIAL_GROUPS (
                 GROUP_ID $serial PRIMARY KEY $auto_increment,
                 GROUP_NAME VARCHAR(" . C\SHORT_TITLE_LEN
ViewGit