Update RecommendationJob media job to use Hash2Vec approach for generating recommendations for groups, threads and wiki resources, r=chris

Parth Patel [2022-11-30 02:Nov:th]
Update RecommendationJob media job to use Hash2Vec approach for generating recommendations for groups, threads and wiki resources, r=chris

Signed-off-by: Chris Pollett <chris@pollett.org>
Filename
src/configs/Config.php
src/controllers/components/AccountaccessComponent.php
src/controllers/components/SocialComponent.php
src/library/LinearAlgebra.php
src/library/VersionFunctions.php
src/library/media_jobs/RecommendationJob.php
src/locale/en_US/configure.ini
src/models/ProfileModel.php
src/models/UserModel.php
src/views/elements/ManageaccountElement.php
diff --git a/src/configs/Config.php b/src/configs/Config.php
index e3602044d..b3d784ca8 100755
--- a/src/configs/Config.php
+++ b/src/configs/Config.php
@@ -162,7 +162,7 @@ nsconddefine('GENERATOR_STRING', "Yioop");
  * Version number for upgrade database function
  * @var int
  */
-nsdefine('DATABASE_VERSION', 73);
+nsdefine('DATABASE_VERSION', 74);
 /**
  * Minimum Version fo Yioop for which keyword ad script
  * still works with this version
@@ -1223,17 +1223,21 @@ nsdefine('RESOURCE_IMPRESSION', 6);
  */
 nsdefine('MAX_RECOMMENDATIONS', 3);
 /**
- * Type used to indicate ITEM_RECOMMENDATION score is about a trending thread
+ * Type used to indicate ITEM_TYPE is about a trending thread
  */
 nsdefine('TRENDING_RECOMMENDATION', 1);
 /**
- * Type used to indicate ITEM_RECOMMENDATION score is about a thread
+ * Type used to indicate ITEM_TYPE is about a thread
  */
 nsdefine('THREAD_RECOMMENDATION', 2);
 /**
- * Type used to indicate ITEM_RECOMMENDATION score is about a group
+ * Type used to indicate ITEM_TYPE is about a group
  */
 nsdefine('GROUP_RECOMMENDATION', 3);
+/**
+ * Type used to indicate ITEM_TYPE is about a wiki resource
+ */
+nsdefine('RESOURCE_RECOMMENDATION', 4);
 /**
  * Used to control update frequency of impression analytic data when
  * media updater in use
diff --git a/src/controllers/components/AccountaccessComponent.php b/src/controllers/components/AccountaccessComponent.php
index e7c6ebffc..d19834b7e 100644
--- a/src/controllers/components/AccountaccessComponent.php
+++ b/src/controllers/components/AccountaccessComponent.php
@@ -87,6 +87,8 @@ class AccountaccessComponent extends Component
         $data['GROUP_RECOMMENDATIONS'] =
             $user_model->getRecommendations($cron_timestamp,
             $user_id, C\GROUP_RECOMMENDATION);
+        $data['RESOURCE_RECOMMENDATIONS'] =
+            $user_model->getResourceRecommendations($user_id);
         $username = $signin_model->getUserName($user_id);
         $data["USER"] = $user_model->getUser($username);
         $data["CRAWL_MANAGER"] = false;
diff --git a/src/controllers/components/SocialComponent.php b/src/controllers/components/SocialComponent.php
index 073d27670..8c14a4986 100644
--- a/src/controllers/components/SocialComponent.php
+++ b/src/controllers/components/SocialComponent.php
@@ -62,6 +62,11 @@ class SocialComponent extends Component implements CrawlConstants
      *  successfully uploaded
      */
     const UPLOAD_SUCCESS = 1;
+    /**
+     * File to tell RecommendationJob the paths of eligible wiki resources
+     * description files
+     */
+    const RECOMMENDATION_FILE = C\APP_DIR . "/resources/recommendation.txt";
     /**
      * Used to handle the manage group activity.
      *
@@ -3917,6 +3922,12 @@ EOD;
                     needs_descriptions_format:
                     $data["HEAD"]['update_description']);
             $thumb_folder = $data['RESOURCES_INFO']['thumb_folder'];
+            if (!empty($thumb_folder)) {
+                $fp = fopen(self::RECOMMENDATION_FILE, "a");
+                fwrite($fp, $group_id . "###" . $data['PAGE_ID'] . "###" .
+                    $thumb_folder . "\n");
+                fclose($fp);
+            }
             $this->initUserResourcePreferences($data);
             $scroll_id = "scroll-container-" .
                 L\crawlHash($data['PAGE_ID'] . $sub_path);
diff --git a/src/library/LinearAlgebra.php b/src/library/LinearAlgebra.php
index 31d620882..6aafddb33 100644
--- a/src/library/LinearAlgebra.php
+++ b/src/library/LinearAlgebra.php
@@ -56,7 +56,7 @@ class LinearAlgebra
     public static function add($vector1, $vector2)
     {
         if (is_array($vector1) && is_array($vector2)) {
-            foreach($vector2 as $coord2 => $value2) {
+            foreach ($vector2 as $coord2 => $value2) {
                 $vector1[$coord2] = (empty($vector1[$coord2])) ? $value2 :
                     $vector1[$coord2] + $value2;
             }
@@ -68,7 +68,7 @@ class LinearAlgebra
                 $scalar = $vector1;
                 $vector1 = $vector2;
             }
-            foreach($vector1 as $coord => $value) {
+            foreach ($vector1 as $coord => $value) {
                 $vector1[$coord] = $value + $scalar;
             }
         }
@@ -101,7 +101,7 @@ class LinearAlgebra
         $sum = 0;
         $not_in_common = 0;
         $distortion = 0;
-        foreach($vector1 as $key => $weight) {
+        foreach ($vector1 as $key => $weight) {
             if (empty($vector2[$key])) {
                 $sum += $weight * $weight;
                 $not_in_common++;
@@ -110,7 +110,7 @@ class LinearAlgebra
                 $sum += $diff * $diff;
             }
         }
-        foreach($vector2 as $key => $weight) {
+        foreach ($vector2 as $key => $weight) {
             if (empty($vector1[$key])) {
                 $sum += $weight * $weight;
                 $not_in_common++;
@@ -134,7 +134,7 @@ class LinearAlgebra
         $v1 = (count($vector1) < count($vector2)) ? $vector1 : $vector2;
         $v2 = (count($vector1) < count($vector2)) ? $vector2 : $vector1;
         $sum = 0.;
-        foreach($v1 as $coordinate => $value) {
+        foreach ($v1 as $coordinate => $value) {
             if (!empty($v2[$coordinate])) {
                 $sum += $value * $v2[$coordinate];
             }
@@ -168,7 +168,7 @@ class LinearAlgebra
     public static function length($vector, $norm_power = 2)
     {
         $norm = 0.;
-        foreach($vector as $weight) {
+        foreach ($vector as $weight) {
             $norm += pow(abs($weight), $norm_power);
         }
         $norm = pow($norm, 1./$norm_power);
@@ -185,7 +185,7 @@ class LinearAlgebra
     public static function multiply($scalar_vec_mat, $vector)
     {
         if (is_numeric($scalar_vec_mat)) {
-            foreach($vector as $coordinate => $value) {
+            foreach ($vector as $coordinate => $value) {
                 $vector[$coordinate] *= $scalar_vec_mat;
             }
             return $vector;
@@ -238,9 +238,12 @@ class LinearAlgebra
      */
     public static function similarity($vector1, $vector2)
     {
-        $similarity = self::dot($vector1, $vector2) /
-            (self::length($vector1) * self::length($vector2));
-        return $similarity;
+        $dot_product = self::dot($vector1, $vector2);
+        $length = self::length($vector1) * self::length($vector2);
+        if ($length == 0) {
+            return 0.;
+        }
+        return $dot_product / $length;
     }
     /**
      * Subtracts two vectors component-wise. Treat empty components in either
@@ -256,7 +259,7 @@ class LinearAlgebra
     public static function subtract($vector1, $vector2)
     {
         if (is_array($vector1) && is_array($vector2)) {
-            foreach($vector2 as $coord2 => $value2) {
+            foreach ($vector2 as $coord2 => $value2) {
                 $vector1[$coord2] = (empty($vector1[$coord2])) ? -$value2 :
                     $vector1[$coord2] - $value2;
             }
@@ -268,10 +271,10 @@ class LinearAlgebra
                 $scalar = $vector1;
                 $vector1 = $vector2;
             }
-            foreach($vector1 as $coord => $value) {
+            foreach ($vector1 as $coord => $value) {
                 $vector1[$coord] = $value - $scalar;
             }
         }
         return $vector1;
     }
-}
+}
\ No newline at end of file
diff --git a/src/library/VersionFunctions.php b/src/library/VersionFunctions.php
index cf7f2c0d1..9a09191e9 100644
--- a/src/library/VersionFunctions.php
+++ b/src/library/VersionFunctions.php
@@ -2028,3 +2028,33 @@ function upgradeDatabaseVersion73(&$db)
     $db->execute("ALTER TABLE GROUP_PAGE ADD COLUMN LAST_MODIFIED
         NUMERIC(".C\TIMESTAMP_LEN.")");
 }
+/**
+ * Upgrades a Version 73 version of the Yioop database to a Version 74 version
+ * @param object $db datasource to use to upgrade
+ */
+function upgradeDatabaseVersion74(&$db)
+{
+    $dbinfo = ["DBMS" => C\DBMS, "DB_HOST" => C\DB_HOST,
+        "DB_NAME" => C\DB_NAME, "DB_PASSWORD" => C\DB_PASSWORD];
+    $integer = $db->integerType($dbinfo);
+    $db->execute("DROP TABLE IF EXISTS ITEM_RECOMMENDATION");
+    $db->execute("DROP TABLE IF EXISTS ITEM_TERM_FREQUENCY");
+    $db->execute("DROP TABLE IF EXISTS ITEM_TERM_WEIGHTS");
+    $db->execute("DROP TABLE IF EXISTS USER_ITEM_SIMILARITY");
+    $db->execute("DROP TABLE IF EXISTS USER_TERM_FREQUENCY");
+    $db->execute("DROP TABLE IF EXISTS USER_TERM_WEIGHTS");
+    $db->execute("CREATE TABLE IF NOT EXISTS RECOMMENDATION_TERM_EMBEDDING " .
+        "(ID $integer NOT NULL, ITEM_TYPE $integer NOT NULL, VECTOR BLOB, " .
+        "PRIMARY KEY(ID, ITEM_TYPE))");
+    $db->execute("CREATE TABLE IF NOT EXISTS RECOMMENDATION_ITEM_EMBEDDING " .
+        "(ID $integer NOT NULL, ITEM_TYPE $integer NOT NULL, VECTOR BLOB, " .
+        "PARENT_ID $integer, PRIMARY KEY(ID, ITEM_TYPE))");
+    $db->execute("CREATE TABLE IF NOT EXISTS GROUP_ITEM_RECOMMENDATION " .
+        "(USER_ID $integer NOT NULL, ITEM_ID $integer NOT NULL, " .
+        "ITEM_TYPE $integer NOT NULL, SCORE FLOAT, TIMESTAMP " .
+        "NUMERIC(" . C\TIMESTAMP_LEN . "))");
+    $db->execute("CREATE TABLE IF NOT EXISTS GROUP_RESOURCE_RECOMMENDATION " .
+        "(USER_ID $integer NOT NULL, GROUP_ID $integer NOT NULL, " .
+        "PAGE_ID $integer NOT NULL, RESOURCE_PATH VARCHAR(255), SCORE FLOAT, " .
+        "TIMESTAMP NUMERIC(" . C\TIMESTAMP_LEN . "), RESOURCE_ID $integer)");
+}
diff --git a/src/library/media_jobs/RecommendationJob.php b/src/library/media_jobs/RecommendationJob.php
index d451c610b..f18332dac 100644
--- a/src/library/media_jobs/RecommendationJob.php
+++ b/src/library/media_jobs/RecommendationJob.php
@@ -34,7 +34,7 @@ namespace seekquarry\yioop\library\media_jobs;
 use seekquarry\yioop\configs as C;
 use seekquarry\yioop\library as L;
 use seekquarry\yioop\library\LinearAlgebra as LinearAlgebra;
-use seekquarry\yioop\library\CrawlConstants;
+use seekquarry\yioop\library\PhraseParser as PhraseParser;
 use seekquarry\yioop\models\CronModel;

 /**
@@ -77,6 +77,31 @@ class RecommendationJob extends MediaJob
      * Maximum number of terms used in making recommendations
      */
     const MAX_TERMS = 20000;
+    /**
+     * File containing paths to description folders of wiki page resources
+     * that should be used to create data corpus for computing recommendations
+     */
+    const RECOMMENDATION_FILE = C\APP_DIR . "/resources/recommendation.txt";
+    /**
+     * Length of context window for calculating term embeddings
+     */
+    const CONTEXT_WINDOW_LENGTH = 5;
+    /**
+     * Size of term and item embedding vector
+     */
+    const EMBEDDING_VECTOR_SIZE = 200;
+    /**
+     * Update period to consider for fetching the records from
+     * ITEM_IMPRESSION_SUMMARY table
+     */
+    const UPDATE_PERIOD = C\ONE_MONTH;
+    /**
+     * Stop words to exclude from the descriptions fetched by DescriptionUpdate
+     * media job
+     */
+    const DESCRIPTION_STOP_WORDS = ["author", "authors", "plot", "genre",
+        "genres", "star", "stars", "credits", "rating", "ratings", "year",
+        "director", "cast", "runtime"];
     /**
      * Sets up the database connection so can access tables related
      * to recommendations. Initialize timing info related to job.
@@ -121,15 +146,19 @@ class RecommendationJob extends MediaJob
             "item_group_recommendations");
         L\crawlLog("Current Active Recommendation Timestamp: ".
             $this->active_time);
-        L\crawlLog("...Clearing last run's intermediate results together ".
-            "with any old data");
-        $this->clearIntermediateRecommendationData();
         L\crawlLog("...Start computing similarity-based group and item ".
             "recommendations...");
         $this->computeThreadGroupRecommendations();
         L\crawlLog("...Finished computing similarity-based group and item ".
             "recommendations.");
+        L\crawlLog("...Start computing similarity-based wiki resource " .
+            "recommendations...");
+        $this->computeWikiResourceRecommendations();
+        L\crawlLog("...Finished computing similarity-based wiki" .
+            "resource recommendations...");
+        L\crawlLog("...Start computing new user recommendations...");
         $this->initializeNewUserRecommendations();
+        L\crawlLog("...Finished computing new user recommendations...");
         $this->cron_model->updateCronTime(
             "item_group_recommendations", $this->update_time);
     }
@@ -144,8 +173,8 @@ class RecommendationJob extends MediaJob
         $popular_recommendations = [
             C\THREAD_RECOMMENDATION  => [], C\GROUP_RECOMMENDATION  => []];
         $sql = "SELECT ITEM_ID, SUM(SCORE) AS TOTAL_SCORE FROM " .
-            "ITEM_RECOMMENDATION WHERE ITEM_TYPE = ? AND TIMESTAMP = " .
-            $this->active_time . " GROUP BY ITEM_ID ORDER BY TOTAL_SCORE DESC ".
+            "GROUP_ITEM_RECOMMENDATION WHERE ITEM_TYPE = ? " .
+            "GROUP BY ITEM_ID ORDER BY TOTAL_SCORE DESC ".
             $db->limitOffset(C\MAX_RECOMMENDATIONS);
         foreach ($popular_recommendations as $type => $recommendation) {
             $results = $db->execute($sql, [$type]);
@@ -155,9 +184,9 @@ class RecommendationJob extends MediaJob
         }
         $new_user_sql = "SELECT USER_ID AS USER_ID ".
             "FROM USERS WHERE USER_ID NOT IN ".
-            "(SELECT USER_ID FROM ITEM_RECOMMENDATION)";
+            "(SELECT USER_ID FROM GROUP_ITEM_RECOMMENDATION)";
         $new_user_results = $db->execute($new_user_sql);
-        $base_recommend_sql = "INSERT INTO ITEM_RECOMMENDATION VALUES ";
+        $base_recommend_sql = "INSERT INTO GROUP_ITEM_RECOMMENDATION VALUES ";
         $insert_recommend_sql = $base_recommend_sql;
         $comma = "";
         $insert_count = 0;
@@ -167,13 +196,11 @@ class RecommendationJob extends MediaJob
             foreach ($popular_recommendations as $type => $recommendations) {
                 foreach ($recommendations as $recommendation) {
                     $insert_recommend_sql .=
-                        "$comma ({$recommendation['ITEM_ID']}, $user_id, ".
+                        "$comma ($user_id, {$recommendation['ITEM_ID']}, ".
                         "$type, {$recommendation['TOTAL_SCORE']}," .
                         $this->update_time . ")";
                     $comma = ",";
                     $insert_count++;
-                    L\crawlTimeoutLog("..initialized new %s users so far",
-                        $i++);
                 }
                 if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
                     $db->execute($insert_recommend_sql);
@@ -186,6 +213,46 @@ class RecommendationJob extends MediaJob
         if ($insert_count > 0) {
             $db->execute($insert_recommend_sql);
         }
+        $sql = "SELECT GROUP_ID, PAGE_ID, RESOURCE_PATH, RESOURCE_ID," .
+            " SUM(SCORE) AS TOTAL_SCORE FROM" .
+            " GROUP_RESOURCE_RECOMMENDATION GROUP BY GROUP_ID," .
+            " PAGE_ID, RESOURCE_PATH, RESOURCE_ID ORDER BY TOTAL_SCORE DESC";
+        $results = $db->execute($sql);
+        while ($row = $db->fetchArray($results)) {
+            $popular_recommendations[C\RESOURCE_RECOMMENDATION][] = $row;
+        }
+        $base_recommend_sql = "INSERT INTO GROUP_RESOURCE_RECOMMENDATION" .
+            " VALUES ";
+        $insert_recommend_sql = $base_recommend_sql;
+        $comma = "";
+        $insert_count = 0;
+        $new_user_sql = "SELECT USER_ID FROM USERS WHERE USER_ID NOT IN" .
+            "(SELECT USER_ID FROM GROUP_RESOURCE_RECOMMENDATION)";
+        $new_user_results = $db->execute($new_user_sql);
+        while ($row = $db->fetchArray($new_user_results)) {
+            $user_id = $row['USER_ID'];
+            $timestamp = time();
+            foreach ($popular_recommendations[C\RESOURCE_RECOMMENDATION] as
+                $recommendation) {
+                $insert_recommend_sql .=
+                    "$comma ($user_id, {$recommendation['GROUP_ID']}, ".
+                    "{$recommendation['PAGE_ID']}, " .
+                    "\"{$recommendation['RESOURCE_PATH']}\", ".
+                    "{$recommendation['TOTAL_SCORE']}, {$this->update_time}, ".
+                    "{$recommendation['RESOURCE_ID']})";
+                $comma = ",";
+                $insert_count++;
+                if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
+                    $db->execute($insert_recommend_sql);
+                    $insert_recommend_sql = $base_recommend_sql;
+                    $insert_count = 0;
+                    $comma = "";
+                }
+            }
+        }
+        if ($insert_count > 0) {
+            $db->execute($insert_recommend_sql);
+        }
     }
     /**
      * Manages the whole process of computing thread and group recommendations
@@ -194,429 +261,932 @@ class RecommendationJob extends MediaJob
      */
     public function computeThreadGroupRecommendations()
     {
-        $this->computeItemTermFrequencies();
-        $this->computeUserTermFrequencies();
-        $number_items = $this->numberItems();
-        $number_users = $this->numberUsers();
-        $this->computeUserItemIdf($number_items, $number_users);
-        $this->tfIdfUsers();
-        $this->tfIdfItems();
-        $this->computeUserItemSimilarity();
-        $not_belongs_subselect =  "NOT EXISTS (SELECT * FROM ".
-            "GROUP_ITEM B WHERE S.USER_ID=B.USER_ID ".
-            "AND S.THREAD_ID=B.PARENT_ID )";
-        $this->calculateSimilarityRecommendations(C\THREAD_RECOMMENDATION,
-            "SELECT S.USER_ID, S.THREAD_ID, S.SIMILARITY FROM ".
-            "USER_ITEM_SIMILARITY S WHERE $not_belongs_subselect AND ".
-            "S.GROUP_MEMBER=1 ORDER BY S.USER_ID ASC, ".
-            "S.SIMILARITY DESC", C\MAX_RECOMMENDATIONS);
-        $this->calculateSimilarityRecommendations(C\GROUP_RECOMMENDATION,
-            "SELECT S.USER_ID AS USER_ID, GI.GROUP_ID AS GROUP_ID," .
-            "SUM(S.SIMILARITY) AS RATING FROM ".
-            "GROUP_ITEM GI, USER_ITEM_SIMILARITY S ".
-            "WHERE GI.ID = S.THREAD_ID AND S.GROUP_MEMBER=0 ".
-            "GROUP BY GI.GROUP_ID, S.USER_ID ORDER BY S.USER_ID, RATING DESC",
-            C\MAX_RECOMMENDATIONS);
+        L\crawlLog("...Start computing Item Term Embeddings...");
+        [$term_embeddings, $item_terms] = $this->computeItemTermEmbeddings();
+        L\crawlLog("...Finished computing Item Term Embeddings...");
+        L\crawlLog("...Start computing Item Embeddings...");
+        $item_embeddings = $this->computeItemEmbeddings(
+            $term_embeddings, $item_terms);
+        L\crawlLog("...Finished computing Item Embeddings...");
+        L\crawlLog("...Start computing Item User Embeddings...");
+        [$item_user_embeddings, $user_items] = $this->
+            computeItemUserEmbeddings($item_embeddings);
+        L\crawlLog("...Finshed computing Item User Embeddings...");
+        L\crawlLog("...Start computing Item User Recommendations...");
+        $user_groups = $this->computeItemUserRecommendations($item_embeddings,
+            $item_user_embeddings, $user_items);
+        L\crawlLog("...Finished computing Item User Recommendations...");
+        L\crawlLog("...Start computing Group Embeddings...");
+        $group_embeddings = $this->computeGroupEmbeddings($item_embeddings);
+        L\crawlLog("...Finished computing Group Embeddings...");
+        L\crawlLog("...Start computing Group User Embeddings...");
+        [$group_user_embeddings, $user_group_impression] =
+            $this->computeGroupUserEmbeddings($group_embeddings);
+        L\crawlLog("...Finished computing Group User Embeddings...");
+        L\crawlLog("...Start computing Group User Recommendations...");
+        $this->computeGroupUserRecommendations($group_embeddings,
+            $group_user_embeddings, $user_groups, $user_group_impression);
+        L\crawlLog("...Finished computing Group User Recommendations...");
     }
     /**
-     * Delete all rows from intermediate tables used in the calculation
-     * of group and thread recommendations. Also clears any non-active item
-     * recommendations
+     * Computes the term embeddings for individual items (main thread only and
+     * not comments) in groups feeds for the terms in their title and
+     * description text. Processes only MAX_GROUP_ITEMS which are either newly
+     * created or recently edited
+     *
+     * @return array [$term_embeddings, $item_terms] containing embeddings for
+     * terms in the items and terms in each item
      */
-    public function clearIntermediateRecommendationData()
+    public function computeItemTermEmbeddings()
     {
-        $this->db->execute("DELETE FROM ITEM_RECOMMENDATION
-            WHERE TIMESTAMP <> '" . $this->active_time . "'");
+        $db = $this->db;
+        $select_sql = "SELECT * FROM RECOMMENDATION_TERM_EMBEDDING WHERE" .
+            " ITEM_TYPE = ?";
+        $results = $db->execute($select_sql, [C\THREAD_RECOMMENDATION]);
+        $term_embeddings = [];
+        $item_terms = [];
+        while ($row = $db->fetchArray($results)) {
+            $term_embeddings[$row['ID']] = unserialize($row['VECTOR']);
+        }
+        $context_distance_sum = (self::CONTEXT_WINDOW_LENGTH *
+            (self::CONTEXT_WINDOW_LENGTH + 1)) / 2.0;
+        $mean = $context_distance_sum / self::CONTEXT_WINDOW_LENGTH;
+        $carry = 0.0;
+        for ($i = 1; $i <= self::CONTEXT_WINDOW_LENGTH; $i++) {
+            $difference = $i - $mean;
+            $carry += $difference * $difference;
+        }
+        $std_deviation = sqrt($carry / self::CONTEXT_WINDOW_LENGTH);
+        $group_item_sql = "SELECT * FROM GROUP_ITEM WHERE ID = PARENT_ID" .
+            " AND TITLE NOT LIKE '%Page%' ORDER BY EDIT_DATE DESC " .
+            $db->limitOffset(self::MAX_GROUP_ITEMS);
+        $results = $db->execute($group_item_sql);
+        $update_term_embeddings = [];
+        while ($row = $db->fetchArray($results)) {
+            $item_id = $row['ID'];
+            $text_corpus = $row['TITLE'] . " " . $row['DESCRIPTION'];
+            $text_corpus = mb_strtolower($text_corpus);
+            $terms = $this->cleanRemoveStopWords($text_corpus);
+            $item_terms[$item_id] = [$terms, $row['GROUP_ID']];
+            for ($i = 0; $i < count($terms); $i++) {
+                [$term_id, $term] = $terms[$i];
+                $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE;
+                if (!array_key_exists($term_id, $term_embeddings)) {
+                    $term_embeddings[$term_id] = array_fill(0,
+                        self::EMBEDDING_VECTOR_SIZE, 0);
+                }
+                for ($j = $i - 1; $j >= 0 &&
+                    $j >= $i - self::CONTEXT_WINDOW_LENGTH; $j--) {
+                    [$context_term_id, $context_term] = $terms[$j];
+                    $weight = exp(-1 * pow(($i - $j) / $std_deviation, 2));
+                    $context_term_hash = $context_term_id %
+                        self::EMBEDDING_VECTOR_SIZE;
+                    $term_embeddings[$term_id][$context_term_hash] +=
+                        $weight;
+                    $term_embeddings[$context_term_id][$term_hash] +=
+                        $weight;
+                }
+            }
+        }
+        $normalized_term_embeddings = [];
+        foreach ($term_embeddings as $term_id => $embedding) {
+            $normalized_term_embeddings[$term_id] =
+                LinearAlgebra::normalize($embedding);
+        }
+        $delete_sql = "DELETE FROM RECOMMENDATION_TERM_EMBEDDING" .
+            " WHERE ITEM_TYPE = ?";
+        $db->execute($delete_sql, [C\THREAD_RECOMMENDATION]);
+        $base_insert_sql = "INSERT INTO RECOMMENDATION_TERM_EMBEDDING VALUES ";
+        $insert_sql = $base_insert_sql;
+        $comma = "";
+        $insert_count = 0;
+        $item_type = C\THREAD_RECOMMENDATION;
+        foreach ($normalized_term_embeddings as $term_id => $embedding) {
+            $serialized_embedding = serialize($embedding);
+            $insert_sql .= "$comma($term_id, $item_type," .
+                " '$serialized_embedding')";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
+                $insert_count = 0;
+                $comma = "";
+                $insert_sql = $base_insert_sql;
+            }
+        }
+        if ($insert_count > 0) {
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
+        }
+        return [$term_embeddings, $item_terms];
     }
     /**
-     * Computes the number of group items
-     * @return int number of items
+     * Computes the item embeddings for individual items (main thread only and
+     * not comments) in groups feeds using the term embeddings for their terms.
+     * Additionally fetches the existing item embeddings from database and
+     * updates them if the term embeddings are updated for their terms
+     *
+     * @param array $term_embeddings embedding for the terms
+     * @param array $item_terms terms in each item
+     * @return array $updated_item_embeddings containing embeddings for items
      */
-    public function numberItems()
+    public function computeItemEmbeddings($term_embeddings, $item_terms)
     {
-        $results = $this->db->execute("SELECT COUNT(*) AS NUM_ITEMS FROM ".
-            "GROUP_ITEM WHERE LOWER(TITLE) NOT LIKE '%page%'");
-        $num_items = 0;
-        if ($row = $this->db->fetchArray($results)) {
-            $num_items = $row['NUM_ITEMS'];
+        $db = $this->db;
+        $sql = "SELECT * FROM RECOMMENDATION_ITEM_EMBEDDING" .
+            " WHERE ITEM_TYPE = ?";
+        $results = $db->execute($sql, [C\THREAD_RECOMMENDATION]);
+        $item_embeddings = [];
+        while ($row = $db->fetchArray($results)) {
+            $item_embeddings[$row['ID']] = [unserialize($row['VECTOR']),
+                $row['PARENT_ID']];
+        }
+        $updated_item_embeddings = [];
+        foreach ($item_terms as $item_id => [$terms, $group_id]) {
+            $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0);
+            foreach ($terms as [$term_id, $term]) {
+                if (array_key_exists($term_id, $term_embeddings)) {
+                    $item_embedding = LinearAlgebra::add($item_embedding,
+                        $term_embeddings[$term_id]);
+                }
+            }
+            $updated_item_embeddings[$item_id] = [$item_embedding, $group_id];
+            if (array_key_exists($item_id, $item_embeddings)) {
+                unset($item_embeddings[$item_id]);
+            }
+        }
+        foreach ($item_embeddings as $item_id => [$embedding, $parent_id]) {
+            $updated_item_embeddings[$item_id] = [$embedding, $parent_id];
+        }
+        foreach ($updated_item_embeddings as $item_id => $embedding) {
+            $updated_item_embeddings[$item_id][0] = LinearAlgebra::normalize(
+                $updated_item_embeddings[$item_id][0]);
         }
-        return $num_items;
+        $delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING" .
+            " WHERE ITEM_TYPE = ?";
+        $db->execute($delete_sql, [C\THREAD_RECOMMENDATION]);
+        $base_insert_sql = "INSERT INTO RECOMMENDATION_ITEM_EMBEDDING VALUES ";
+        $insert_sql = $base_insert_sql;
+        $comma = "";
+        $insert_count = 0;
+        $item_type = C\THREAD_RECOMMENDATION;
+        foreach ($updated_item_embeddings as
+            $item_id => [$embedding, $parent_id]) {
+            $serialized_embedding = serialize($embedding);
+            $insert_sql .= "$comma($item_id, $item_type," .
+                " '$serialized_embedding', $parent_id)";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
+                $insert_count = 0;
+                $comma = "";
+                $insert_sql = $base_insert_sql;
+            }
+        }
+        if ($insert_count > 0) {
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
+        }
+        return $updated_item_embeddings;
     }
     /**
-     * Computes the number of users
-     * @return int number of users
+     * Computes the user embeddings based on the item embeddings which user have
+     * impression in ITEM_IMPRESSION_SUMMARY table for defined UPDATE_PERIOD
+     *
+     * @param array $item_embeddings embedding vectors of items
+     * @return array [$item_user_embedding, $user_items] user embeddings for
+     * items and the items id user have impression
      */
-    public function numberUsers()
+    public function computeItemUserEmbeddings($item_embeddings)
     {
-        $results =
-            $this->db->execute("SELECT COUNT(*) AS NUM_USERS FROM USERS");
-        $num_users = 0;
-        if ($row = $this->db->fetchArray($results)) {
-            $num_users = $row['NUM_USERS'];
+        $db = $this->db;
+        $db_list_function = in_array($db->to_upper_dbms, ["SQLITE3", "MYSQL"]) ?
+            "GROUP_CONCAT" : "STRING_AGG";
+        $timestamp = floor(time() / self::UPDATE_PERIOD ) * self::UPDATE_PERIOD;
+        $condition = "ITEM_TYPE = ? AND USER_ID <> 2 AND" .
+            " ((UPDATE_PERIOD = ? AND UPDATE_TIMESTAMP = ?) OR" .
+            " (UPDATE_PERIOD = ?))";
+        $impression_sql = "SELECT USER_ID, $db_list_function(ITEM_ID, ',') AS" .
+            " ITEM_IDS FROM ITEM_IMPRESSION_SUMMARY WHERE $condition" .
+            " GROUP BY USER_ID";
+        $results = $db->execute($impression_sql,
+            [C\THREAD_IMPRESSION, self::UPDATE_PERIOD, $timestamp,
+                C\MOST_RECENT_VIEW]);
+        $item_user_embeddings = [];
+        $user_items = [];
+        while ($row = $db->fetchArray($results)) {
+            $user_id = $row['USER_ID'];
+            $item_ids = explode(",", $row['ITEM_IDS']);
+            $item_ids = array_unique($item_ids);
+            $item_user_embeddings[$user_id] = array_fill(0,
+                self::EMBEDDING_VECTOR_SIZE, 0);
+            $user_items[$user_id] = [];
+            foreach ($item_ids as $item_id) {
+                if (array_key_exists($item_id, $item_embeddings)) {
+                    $item_user_embeddings[$user_id] = LinearAlgebra::add(
+                        $item_user_embeddings[$user_id],
+                        $item_embeddings[$item_id][0]);
+                    $user_items[$user_id][] = $item_id;
+                }
+            }
+            $item_user_embeddings[$user_id] = LinearAlgebra::normalize(
+                $item_user_embeddings[$user_id]);
         }
-        return $num_users;
+        return [$item_user_embeddings, $user_items];
     }
     /**
-     * Computes the term frequencies for individual items (posts) in groups
-     * feeds. That is, for each item in each group for each term in that
-     * item compute the number of times it appears in that item.
+     * Computes the items recommendation for user based on the cosine similarity
+     * between user embeddings and item embeddings. Recommendations are
+     * calculated for the items user have not interacted with yet and items
+     * should be from the groups where the user is already a memeber
+     *
+     * @param array $item_embeddings embeddings vectors for items
+     * @param array $item_user_embeddings embeddings vectors for user
+     * @param array $user_items items id for user in impression table
+     * @return array $user_groups group ids where the user is a member
      */
-    public function computeItemTermFrequencies()
+    public function computeItemUserRecommendations($item_embeddings,
+        $item_user_embeddings, $user_items)
     {
+        L\crawlLog("...Computing User Item Similarity Scores.");
         $db = $this->db;
-        $group_item_sql = "SELECT ID AS ITEM_ID, TITLE, DESCRIPTION ".
-            "FROM GROUP_ITEM ".
-            "WHERE LOWER(TITLE) NOT LIKE '%page%' " .
-            "ORDER BY PUBDATE DESC " . $db->limitOffset(self::MAX_GROUP_ITEMS);
-        $results = $db->execute($group_item_sql);
-        $base_sql = "INSERT INTO ITEM_TERM_FREQUENCY VALUES";
-        $insert_sql = $base_sql;
+        $db_list_function = in_array($db->to_upper_dbms, ["SQLITE3", "MYSQL"]) ?
+            "GROUP_CONCAT" : "STRING_AGG";
+        $user_group_sql = "SELECT USER_ID, $db_list_function(GROUP_ID, ',')" .
+            " AS GROUP_IDS FROM USER_GROUP GROUP BY USER_ID";
+        $results = $db->execute($user_group_sql);
+        $user_groups = [];
+        while ($row = $db->fetchArray($results)) {
+            $user_id = $row['USER_ID'];
+            $group_ids = explode(",", $row['GROUP_IDS']);
+            $user_groups[$user_id] = $group_ids;
+        }
+        $item_user_recommendations = [];
+        foreach ($item_user_embeddings as $user_id => $embedding) {
+            if (array_key_exists($user_id, $user_groups)) {
+                $user_item = [];
+                if (array_key_exists($user_id, $user_items)) {
+                    $user_item = $user_items[$user_id];
+                }
+                $user_group = [];
+                if (array_key_exists($user_id, $user_groups)) {
+                    $user_group = $user_groups[$user_id];
+                }
+                foreach ($item_embeddings as
+                    $item_id => [$item_embedding, $parent_id]) {
+                    if (in_array($item_id, $user_item) ||
+                        !in_array($parent_id, $user_group)) {
+                        continue;
+                    }
+                    $similarity = LinearAlgebra::similarity(
+                        $item_embedding, $embedding);
+                    $item_user_recommendations[] = [$user_id,
+                        $item_id, $similarity];
+                }
+            }
+        }
+        $delete_sql = "DELETE FROM GROUP_ITEM_RECOMMENDATION WHERE" .
+            " ITEM_TYPE = ?";
+        $db->execute($delete_sql, [C\THREAD_RECOMMENDATION]);
+        $base_insert_sql = "INSERT INTO GROUP_ITEM_RECOMMENDATION VALUES ";
+        $insert_sql = $base_insert_sql;
         $comma = "";
         $insert_count = 0;
-        L\crawlLog("...Computing Item Term Frequencies");
-        $i = 0;
-        while ($item = $db->fetchArray($results)) {
-            $term_frequencies = $this->termCount(
-                $item['TITLE'] . " " . $item['DESCRIPTION']);
-            foreach ($term_frequencies as $term => $frequency) {
-                $log_freq = log($frequency, 10) + 1;
-                $insert_sql .= "$comma ({$item['ITEM_ID']}, '" .
-                     floor(bindec(str_replace(" ", "", L\toBinString(
-                        hash("crc32b", $term, true))))/2) .
-                        "', $frequency, $log_freq)";
-                $comma = ",";
-                $insert_count++;
-                L\crawlTimeoutLog("...%s item term frequencies so far",
-                    $i++);
-                if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
-                    $insert_ignore_sql = $db->insertIgnore($insert_sql);
-                    $db->execute($insert_ignore_sql);
-                    $insert_sql = $base_sql;
-                    $insert_count = 0;
-                    $comma = "";
-                }
+        $item_type = C\THREAD_RECOMMENDATION;
+        foreach ($item_user_recommendations as $recommendation) {
+            $insert_sql .= "$comma({$recommendation[0]}, {$recommendation[1]}" .
+                ", $item_type, {$recommendation[2]}, {$this->update_time})";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
+                $insert_count = 0;
+                $comma = "";
+                $insert_sql = $base_insert_sql;
             }
         }
         if ($insert_count > 0) {
-            $insert_ignore_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_ignore_sql);
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
         }
+        return $user_groups;
     }
     /**
-     * Calculates term => frequency pairs for all terms in a supplied string
-     * @param string $record string of terms
-     * @return array $term_frequencies associative array term => count
-     */
-    public static function termCount($record)
-    {
-        $terms = explode(" ", $record);
-        $term_frequencies = array_count_values($terms);
-        return $term_frequencies;
-    }
-    /**
-     * Calculates the term frequencies for users. For each post of the user,
-     * how often the user has seen a post with that term
+     * Computes the group embeddings using the item embeddings for the items in
+     * a group. Additionally fetches the existing group embeddings from database
+     * and updates them if the item embeddings are updated
+     *
+     * @param array $item_embeddings embedding for the items
+     * @return array $updated_group_embeddings containing embeddings for groups
      */
-    public function computeUserTermFrequencies()
+    public function computeGroupEmbeddings($item_embeddings)
     {
         $db = $this->db;
-        $sql = "SELECT II.USER_ID AS UID," .
-            "COUNT(*) AS FREQUENCY, IWF.TERM_ID AS TID ".
-            "FROM ITEM_TERM_FREQUENCY IWF, ITEM_IMPRESSION II ".
-            "WHERE IWF.ITEM_ID = II.ITEM_ID ".
-            "GROUP BY II.USER_ID, IWF.TERM_ID";
-        $results = $db->execute($sql);
-        $base_insert_sql = "INSERT INTO USER_TERM_FREQUENCY VALUES ";
+        $sql = "SELECT * FROM RECOMMENDATION_ITEM_EMBEDDING" .
+            " WHERE ITEM_TYPE = ?";
+        $results = $db->execute($sql, [C\GROUP_RECOMMENDATION]);
+        $group_embeddings = [];
+        while ($row = $db->fetchArray($results)) {
+            $group_embeddings[$row['ID']] = unserialize($row['VECTOR']);
+        }
+        $updated_group_embeddings = [];
+        foreach ($item_embeddings as $item_id => [$embedding, $parent_id]) {
+            if (array_key_exists($parent_id, $updated_group_embeddings)) {
+                $updated_group_embeddings[$parent_id] = LinearAlgebra::add(
+                    $embedding, $updated_group_embeddings[$parent_id]);
+            } else {
+                $updated_group_embeddings[$parent_id] = $embedding;
+            }
+        }
+        foreach ($updated_group_embeddings as $group_id => $embedding) {
+            $embedding = LinearAlgebra::normalize($embedding);
+            if (array_key_exists($group_id, $group_embeddings)) {
+                $embedding = LinearAlgebra::add($embedding,
+                    $group_embeddings[$group_id]);
+                $embedding = LinearAlgebra::normalize($embedding);
+                unset($group_embeddings[$group_id]);
+            }
+            $updated_group_embeddings[$group_id] = $embedding;
+        }
+        foreach ($group_embeddings as $group_id => $embedding) {
+            $updated_group_embeddings[$group_id] = $embedding;
+        }
+        $base_insert_sql = "INSERT INTO RECOMMENDATION_ITEM_EMBEDDING VALUES ";
         $insert_sql = $base_insert_sql;
-        $insert_count = 0;
-        L\crawlLog("...Computing User Term Frequencies");
-        $i = 0;
         $comma = "";
-        while($row = $db->fetchArray($results)) {
-            $uid = $row['UID'];
-            $wid = $row['TID'];
-            $log_freq = log($row['FREQUENCY'], 10) + 1.0;
-            $insert_sql .= "$comma ({$row['UID']}, {$row['TID']},".
-                "{$row['FREQUENCY']}, $log_freq)";
+        $insert_count = 0;
+        $item_type = C\GROUP_RECOMMENDATION;
+        foreach ($updated_group_embeddings as $group_id => $embedding) {
+            $serialized_embedding = serialize($embedding);
+            $insert_sql .= "$comma($group_id, $item_type," .
+                " '$serialized_embedding', $group_id)";
             $comma = ",";
             $insert_count++;
-            L\crawlTimeoutLog("...%s user term frequencies so far",
-                $i++);
-            if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
-                $insert_ignore_sql = $db->insertIgnore($insert_sql);
-                $db->execute($insert_ignore_sql);
-                $insert_sql = $base_insert_sql;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
                 $insert_count = 0;
                 $comma = "";
+                $insert_sql = $base_insert_sql;
             }
         }
         if ($insert_count > 0) {
-            $insert_ignore_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_ignore_sql);
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
         }
+        return $updated_group_embeddings;
     }
     /**
-     * Computes inverse document frequencies for each term for each user and
-     * for each item. That is, for a particular term, it will compute
-     * the number of times a user used that term in a post/the number of
-     * posts by that user and take the log of the result. For items, the
-     * idea is similar, for each thread, one calculates the number of posts
-     * that the term appeared in/the total number of posts in the thread and
-     * take the log of the result.
+     * Computes the user embeddings based on the group embeddings which user
+     * have impression in ITEM_IMPRESSION_SUMMARY table for defined
+     * UPDATE_PERIOD or are a member in the group
      *
-     * @param int $number_items number of items
-     * @param int $number_users number of  users
+     * @param array $group_embeddings embedding vectors of groups
+     * @return array [$group_user_embedding, $user_groups] user embeddings for
+     * groups and the groups id user have membership
      */
-    public function computeUserItemIdf($number_items, $number_users)
+    public function computeGroupUserEmbeddings($group_embeddings)
     {
         $db = $this->db;
-        $terms_sql = "SELECT DISTINCT TERM_ID, SUM(FREQUENCY) AS FREQ ".
-            "FROM ITEM_TERM_FREQUENCY GROUP BY TERM_ID ".
-            "ORDER BY FREQ DESC " . $db->limitOffset(self::MAX_TERMS);
-        $results = $db->execute($terms_sql);
-        $num_items_term_sql = "SELECT COUNT(DISTINCT ITEM_ID)".
-            " AS NUM_ITEMS_TERM FROM ITEM_TERM_FREQUENCY ".
-            "WHERE TERM_ID = ? ";
-        $num_users_term_sql ="SELECT COUNT(DISTINCT USER_ID) ".
-            "AS NUM_USERS_TERM FROM USER_TERM_FREQUENCY ".
-            "WHERE TERM_ID = ? ";
-        $i = 0;
-        $item_idf =[];
-        $user_idf = [];
-        L\crawlLog("...Computing User Item IDF values.");
-        while($row = $db->fetchArray($results)) {
-            $term_id = $row['TERM_ID'];
-            /*
-                Number of groups having the required term
-            */
-            $num_items_results = $db->execute($num_items_term_sql, [$term_id]);
-            $row = $db->fetchArray($num_items_results);
-            $item_idf[$term_id] =
-                max(log($number_items/($row['NUM_ITEMS_TERM'] + 1), 10), 0);
-            /*
-                Number of users having the required term
-            */
-            $num_users_results = $db->execute($num_users_term_sql, [$term_id]);
-            $row = $db->fetchArray($num_users_results);
-            $user_idf[$term_id] =
-                max(log($number_users/($row['NUM_USERS_TERM'] + 1), 10), 0);
-            L\crawlTimeoutLog("...%s user item IDFs so far",
-                $i++);
-        }
-        $this->item_idf = $item_idf;
-        $this->user_idf = $user_idf;
+        $db_list_function = in_array($db->to_upper_dbms, ["SQLITE3", "MYSQL"]) ?
+            "GROUP_CONCAT" : "STRING_AGG";
+        $timestamp = floor(time() / self::UPDATE_PERIOD ) * self::UPDATE_PERIOD;
+        $condition = "ITEM_TYPE = ? AND USER_ID <> 2 AND" .
+            " ((UPDATE_PERIOD = ? AND UPDATE_TIMESTAMP = ?) OR" .
+            " (UPDATE_PERIOD = ?))";
+        $impression_sql = "SELECT USER_ID, $db_list_function(ITEM_ID, ',') AS" .
+            " ITEM_IDS FROM ITEM_IMPRESSION_SUMMARY WHERE $condition" .
+            " GROUP BY USER_ID";
+        $results = $db->execute($impression_sql,
+            [C\GROUP_IMPRESSION, self::UPDATE_PERIOD, $timestamp,
+                C\MOST_RECENT_VIEW]);
+        $group_user_embeddings = [];
+        $user_groups = [];
+        while ($row = $db->fetchArray($results)) {
+            $user_id = $row['USER_ID'];
+            $group_ids = explode(",", $row['ITEM_IDS']);
+            $group_ids = array_unique($group_ids);
+            $group_user_embeddings[$user_id] = array_fill(0,
+                self::EMBEDDING_VECTOR_SIZE, 0);
+            $user_groups[$user_id] = [];
+            foreach ($group_ids as $group_id) {
+                if (array_key_exists($group_id, $group_embeddings)) {
+                    $group_user_embeddings[$user_id] = LinearAlgebra::add(
+                        $group_user_embeddings[$user_id],
+                        $group_embeddings[$group_id]);
+                    $user_groups[$user_id][] = $group_id;
+                }
+            }
+            $group_user_embeddings[$user_id] = LinearAlgebra::normalize(
+                $group_user_embeddings[$user_id]);
+        }
+        return [$group_user_embeddings, $user_groups];
     }
     /**
-     * Calculates the product  TF * IDF for users based on the
-     * results of @see computeUserItemIdf and @see computeUserTermFrequencies
+     * Computes the group recommendation for user based on the cosine similarity
+     * between user embeddings and group embeddings. Recommendations are
+     * calculated for the groups whic user has not interacted with yet and
+     * they are not member of that group
+     *
+     * @param array $group_embeddings embeddings vector for groups
+     * @param array $group_user_embeddings embeddings vector for users
+     * @param array $user_groups groups id for user having membership
+     * @return array $user_group_impression group ids which user has seen
      */
-    public function tfIdfUsers()
+    public function computeGroupUserRecommendations($group_embeddings,
+        $group_user_embeddings, $user_groups, $user_group_impression)
     {
-        L\crawlLog("...Computing TF*IDF scores for users.");
         $db = $this->db;
-        $user_idf = $this->user_idf;
-        $user_terms_sql = "SELECT TERM_ID, USER_ID, LOG_FREQUENCY ".
-            "FROM USER_TERM_FREQUENCY";
-        $base_insert_sql = "INSERT INTO USER_TERM_WEIGHTS VALUES ";
+        $invite_groups_sql = "SELECT GROUP_ID FROM SOCIAL_GROUPS" .
+            " WHERE REGISTER_TYPE = ?";
+        $results = $db->execute($invite_groups_sql, [C\INVITE_ONLY_JOIN]);
+        $exclude_group_ids = [];
+        while ($row = $db->fetchArray($results)) {
+            $exclude_group_ids[] = $row['GROUP_ID'];
+        }
+        $group_user_recommendations = [];
+        foreach ($group_user_embeddings as $user_id => $embedding) {
+            $user_group = $user_groups[$user_id];
+            $impression_group = $user_group_impression[$user_id];
+            foreach ($group_embeddings as $group_id => $group_embedding) {
+                if (in_array($group_id, $exclude_group_ids) ||
+                    in_array($group_id, $user_group) ||
+                    in_array($group_id, $impression_group)) {
+                    continue;
+                }
+                $similarity = LinearAlgebra::similarity($embedding,
+                    $group_embedding);
+                $group_user_recommendations[] = [$user_id, $group_id,
+                    $similarity];
+            }
+        }
+        $delete_sql = "DELETE FROM GROUP_ITEM_RECOMMENDATION WHERE" .
+            " ITEM_TYPE = ?";
+        $db->execute($delete_sql, [C\GROUP_RECOMMENDATION]);
+        $base_insert_sql = "INSERT INTO GROUP_ITEM_RECOMMENDATION VALUES ";
         $insert_sql = $base_insert_sql;
-        $results = $db->execute($user_terms_sql);
-        $insert_count = 0;
-        $i = 0;
         $comma = "";
-        while($row = $db->fetchArray($results)) {
-            L\crawlTimeoutLog("...%s user tf-idfs so far",
-                $i++);
-            if (!empty($user_idf[$row['TERM_ID']])) {
-                $insert_sql .= "$comma ({$row['TERM_ID']}, {$row['USER_ID']}, ".
-                    ($row["LOG_FREQUENCY"] * $user_idf[$row['TERM_ID']]) . ")";
-                $insert_count++;
-                $comma = ",";
-            }
-            if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
-                $insert_ignore_sql = $db->insertIgnore($insert_sql);
-                $db->execute($insert_ignore_sql);
-                $insert_sql = $base_insert_sql;
+        $insert_count = 0;
+        $item_type = C\GROUP_RECOMMENDATION;
+        foreach ($group_user_recommendations as $recommendation) {
+            $insert_sql .= "$comma({$recommendation[0]}, {$recommendation[1]}" .
+                ", $item_type, {$recommendation[2]}, {$this->update_time})";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
                 $insert_count = 0;
                 $comma = "";
+                $insert_sql = $base_insert_sql;
             }
         }
         if ($insert_count > 0) {
-            $insert_ignore_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_ignore_sql);
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
+        }
+    }
+    /**
+     * Manages the whole process of computing wiki resource recommendations
+     * for users. Makes a series of calls to handle parts of this computation
+     * before synthesizing the result
+     */
+    public function computeWikiResourceRecommendations()
+    {
+        L\crawlLog("...Start fetching descriptions for the wiki page " .
+            "resources...");
+        [$descriptions, $resource_metadata] = $this->
+            getWikiResourceDescriptions();
+        L\crawlLog("...Finished fetching descriptions for the wiki page " .
+            "resources...");
+        $item_embeddings = $this->getWikiResourceEmbeddings();
+        L\crawlLog("...Start computing wiki term embeddings...");
+        [$term_embeddings, $resource_terms, $meta_details_terms] =
+            $this->computeWikiTermEmbeddings($descriptions, $item_embeddings);
+        L\crawlLog("...Finished computing wiki term embeddings...");
+        L\crawlLog("...Start computing wiki resource embeddings...");
+        $item_embeddings = $this->computeWikiResourceEmbeddings($resource_terms,
+            $meta_details_terms, $term_embeddings, $item_embeddings);
+        L\crawlLog("...Finished computing wiki resource embeddings...");
+        L\crawlLog("...Start computing wiki user embeddings...");
+        [$user_embeddings, $user_items] = $this->computeWikiUserEmbeddings(
+            $item_embeddings);
+        L\crawlLog("...Finished computing wiki user embeddings...");
+        L\crawlLog("...Start computing wiki resource recommendations...");
+        $this->computeWikiUserRecommendations($item_embeddings,
+            $user_embeddings, $user_items, $resource_metadata);
+        L\crawlLog("...Done computing wiki resource recommendations...");
+    }
+    /**
+     * Fetches the description for the eligible wiki resources having the root
+     * folder path captured in RECOMMENDATION_FILE
+     *
+     * @return array $descriptions of resources
+     */
+    public function getWikiResourceDescriptions()
+    {
+        $thumb_folders = explode("\n",
+            file_get_contents(self::RECOMMENDATION_FILE));
+        $thumb_folders = array_unique($thumb_folders);
+        $descriptions = [];
+        $resource_metadata = [];
+        foreach ($thumb_folders as $thumb_folder) {
+            list($group_id, $page_id, $folder) = explode("###", $thumb_folder);
+            $folder = trim($folder, " \n\r\t\v\x00");
+            $files = $this->getDescriptionFiles($folder);
+            foreach ($files as $file) {
+                $resource_file = substr($file, 0, strlen($file) - 4);
+                $resource_id = unpack('n', md5($group_id . $page_id .
+                    $resource_file, true))[1];
+                if (array_key_exists($resource_id, $descriptions)) {
+                    continue;
+                }
+                $description = file_get_contents($file);
+                if (strcmp($description, "Description search sources".
+                    " failed to find description.") == 0) {
+                        continue;
+                }
+                $descriptions[$resource_id] = $description;
+                $resource_metadata[$resource_id] = [$group_id,
+                    $page_id, $resource_file];
+            }
+        }
+        return [$descriptions, $resource_metadata];
+    }
+    /**
+     * Returns all the resource description files in a given thumb folder and
+     * also recursively scan through subfolders if any
+     *
+     * @param string $thumb_folder path of a thumb folder
+     * @return array $files list of description files path in given folder
+     */
+    public function getDescriptionFiles($thumb_folder)
+    {
+        if (!is_dir($thumb_folder)) {
+            return [];
+        }
+        $exclude_files = [".", "..", "needs_description.txt",
+            "subfolder_counts.txt", ".DS_Store"];
+        $files = scandir($thumb_folder);
+        $file_paths = [];
+        foreach ($files as $file) {
+            if (in_array($file, $exclude_files)) {
+                continue;
+            }
+            if (is_dir($thumb_folder . "/" . $file)) {
+                L\crawlLog("...This is a folder, looking files inside it...");
+                $sub_file_paths = $this->getDescriptionFiles(
+                    $thumb_folder . "/" . $file);
+                $file_paths = array_merge($file_paths, $sub_file_paths);
+            } else {
+                $file_paths[] = $thumb_folder . "/$file";
+            }
+        }
+        return $file_paths;
+    }
+    /**
+     * Retrieves wiki resources embeddings from the database
+     *
+     * @return array $item_embeddings embedding vector for resources
+     */
+    public function getWikiResourceEmbeddings()
+    {
+        $db = $this->db;
+        $sql = "SELECT * FROM RECOMMENDATION_ITEM_EMBEDDING WHERE" .
+            " ITEM_TYPE = ?";
+        $results = $db->execute($sql, [C\RESOURCE_RECOMMENDATION]);
+        $item_embeddings = [];
+        while ($row = $db->fetchArray($results)) {
+            $item_embeddings[$row['ID']] = unserialize($row['VECTOR']);
         }
+        return $item_embeddings;
     }
     /**
-    * Calculates the product  TF * IDF for users based on the
-    * results of @see computeUserItemIdf and @see computeItemTermFrequencies
+     * Computes the embedding for new terms in the description of wiki
+     * resources and updates the embedding of existing terms using Hash2Vec
+     * approach
+     *
+     * @param array $descriptions of resources
+     * @param array $item_embeddings embedding vector for resources
+     * @return array [$term_embeddings, $resource_terms, $meta_details_term]
+     * first with key being term id and value is the embedding vector for that
+     * term, second with key being resource id and value being array of clean
+     * terms in that resource description
      */
-    public function tfIdfItems()
+    public function computeWikiTermEmbeddings($descriptions, $item_embeddings)
     {
-        L\crawlLog("...Computing TF*IDF scores for items.");
         $db = $this->db;
-        $item_idf = $this->item_idf;
-        $item_terms_sql = "SELECT TERM_ID, ITEM_ID, LOG_FREQUENCY ".
-            "FROM ITEM_TERM_FREQUENCY";
-        $base_insert_sql = "INSERT INTO ITEM_TERM_WEIGHTS VALUES ";
+        $select_sql = "SELECT * FROM RECOMMENDATION_TERM_EMBEDDING WHERE" .
+            " ITEM_TYPE = ?";
+        $results = $db->execute($select_sql, [C\RESOURCE_RECOMMENDATION]);
+        $term_embeddings = [];
+        $resource_terms = [];
+        $meta_details_terms = [];
+        while ($row = $db->fetchArray($results)) {
+            $term_embeddings[$row['ID']] = unserialize($row['VECTOR']);
+        }
+        $context_distance_sum = (self::CONTEXT_WINDOW_LENGTH *
+            (self::CONTEXT_WINDOW_LENGTH + 1)) / 2.0;
+        $mean = $context_distance_sum / self::CONTEXT_WINDOW_LENGTH;
+        $carry = 0.0;
+        for ($i = 1; $i <= self::CONTEXT_WINDOW_LENGTH; $i++) {
+            $difference = $i - $mean;
+            $carry += $difference * $difference;
+        }
+        $std_deviation = sqrt($carry / self::CONTEXT_WINDOW_LENGTH);
+        foreach ($descriptions as $resource_id => $description) {
+            $resource_terms[$resource_id] = [];
+            $meta_details_terms[$resource_id] = [];
+            $description_parts = explode("\n", $description);
+            foreach ($description_parts as $description_part) {
+                $description_part = mb_strtolower($description_part);
+                $terms = $this->cleanRemoveStopWords($description_part, true);
+                if (count($terms) < self::CONTEXT_WINDOW_LENGTH) {
+                    $meta_details_terms[$resource_id] = array_merge($terms,
+                        $meta_details_terms[$resource_id]);
+                } else {
+                    $resource_terms[$resource_id] = array_merge($terms,
+                        $resource_terms[$resource_id]);
+                }
+            }
+            if (array_key_exists($resource_id, $item_embeddings)) {
+                continue;
+            }
+            if (count($resource_terms[$resource_id]) > 0) {
+                $terms = $resource_terms[$resource_id];
+                for ($i = 0; $i < count($terms); $i++) {
+                    [$term_id, $term] = $terms[$i];
+                    $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE;
+                    if (!array_key_exists($term_id, $term_embeddings)) {
+                        $term_embeddings[$term_id] = array_fill(0,
+                            self::EMBEDDING_VECTOR_SIZE, 0);
+                    }
+                    for ($j = $i - 1; $j >= 0 &&
+                        $j >= $i - self::CONTEXT_WINDOW_LENGTH; $j--) {
+                        [$context_term_id, $context_term] = $terms[$j];
+                        $weight = exp(-1 * pow(($i - $j) / $std_deviation, 2));
+                        $context_term_hash = $context_term_id %
+                            self::EMBEDDING_VECTOR_SIZE;
+                        $term_embeddings[$term_id][$context_term_hash] +=
+                            $weight;
+                        $term_embeddings[$context_term_id][$term_hash] +=
+                            $weight;
+                    }
+                }
+            }
+        }
+        $delete_sql = "DELETE FROM RECOMMENDATION_TERM_EMBEDDING WHERE" .
+            " ITEM_TYPE = ?";
+        $db->execute($delete_sql, [C\RESOURCE_RECOMMENDATION]);
+        $base_insert_sql = "INSERT INTO RECOMMENDATION_TERM_EMBEDDING VALUES ";
         $insert_sql = $base_insert_sql;
-        $results = $db->execute($item_terms_sql);
-        $insert_count = 0;
-        $i = 0;
         $comma = "";
-        while($row = $db->fetchArray($results)) {
-            L\crawlTimeoutLog("...%s term tf-idfs so far",
-                $i++);
-            if (!empty($item_idf[$row['TERM_ID']])) {
-                $insert_sql .= "$comma ({$row['TERM_ID']}, {$row['ITEM_ID']}, ".
-                    ($row["LOG_FREQUENCY"] * $item_idf[$row['TERM_ID']]) . ")";
-                $insert_count++;
-                $comma = ",";
-            }
-            if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
-                $insert_ignore_sql = $db->insertIgnore($insert_sql);
-                $db->execute($insert_ignore_sql);
-                $insert_sql = $base_insert_sql;
+        $insert_count = 0;
+        $item_type = C\RESOURCE_RECOMMENDATION;
+        foreach ($term_embeddings as $term_id => $embedding) {
+            $serialized_embedding = serialize($embedding);
+            $insert_sql .= "$comma($term_id, $item_type, " .
+                "'$serialized_embedding')";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
                 $insert_count = 0;
                 $comma = "";
+                $insert_sql = $base_insert_sql;
             }
         }
         if ($insert_count > 0) {
-            $insert_ignore_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_ignore_sql);
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
+        }
+        return [$term_embeddings, $resource_terms, $meta_details_terms];
+    }
+    /**
+     * Split the given text into terms, clean the terms by removing non
+     * alphanumeric characters and remove the stop terms in order to reduce the
+     * noise while calculating the embeddings
+     *
+     * @param string $text which needs to be processed
+     * @param boolean $description_stop_word_flag to remove
+     * words present in DESCRIPTION_STOP_WORDS
+     * @return array $terms [term_id, term] term_id calculated using md5 hash
+     * for the term
+     */
+    public function cleanRemoveStopWords($text,
+        $description_stop_word_flag = false)
+    {
+        $raw_terms = preg_split("/[\s,\/\._-]+/", $text);
+        $terms = [];
+        foreach ($raw_terms as $term) {
+            $term = preg_replace("/\W/", "", $term);
+            $term = preg_replace("/&rsquo/", "'", $term);
+            $term = str_replace(['"', "'"], "", $term);
+            if (strlen($term) > 0) {
+                $terms[] = $term;
+            }
+        }
+        $text_locale = L\guessLocaleFromString($text);
+        $stop_obj = PhraseParser::getTokenizer($text_locale);
+        if ($stop_obj && method_exists($stop_obj, "stoptermsRemover")) {
+            $terms = $stop_obj->stoptermsRemover($terms);
+        }
+        $term_ids = [];
+        foreach ($terms as $term) {
+            if ($description_stop_word_flag &&
+                in_array($term, self::DESCRIPTION_STOP_WORDS)) {
+                continue;
+            }
+            $term_id = unpack('n', md5($term, true))[1];
+            $term_ids[] = [$term_id, $term];
         }
+        return $term_ids;
     }
     /**
-     * Computes the cosine similarity between users and particular threads
-     * based on TF*IDF scores and inserts the result into USER_ITEM_SIMILARITY
+     * Computes the embeddings for wiki page resources using the calculated
+     * term embeddings and add the metadata details separately to the embeddings
+     *
+     * @param array $resource_terms of processed terms from resource description
+     * @param array $meta_details_terms of raw resource descriptions
+     * @param array $term_embeddings of term embeddings
+     * @param array $item_embeddings of existing wiki resource embeddings
+     * @return array $updated_item_embeddings array of updated wiki resource
+     * embeddings
      */
-    public function computeUserItemSimilarity()
+    public function computeWikiResourceEmbeddings($resource_terms,
+        $meta_details_terms, $term_embeddings, $item_embeddings)
     {
-        L\crawlLog("...Computing User Item Similarity Scores.");
         $db = $this->db;
-        $similarity_parts_sql =
-            "SELECT SUM(UTW.WEIGHT * ITW.WEIGHT) AS THREAD_DOT_USER, ".
-            "SUM(UTW.WEIGHT * UTW.WEIGHT) AS USER_MAG," .
-            "SUM(ITW.WEIGHT * ITW.WEIGHT) AS ITEM_MAG," .
-            "GI.PARENT_ID AS THREAD_ID, UTW.USER_ID AS USER_ID ".
-            "FROM ITEM_TERM_WEIGHTS ITW, USER_TERM_WEIGHTS UTW, GROUP_ITEM GI ".
-            "WHERE GI.ID = ITW.ITEM_ID AND UTW.TERM_ID=ITW.TERM_ID " .
-            "GROUP BY UTW.USER_ID, GI.PARENT_ID";
-        $similarity_parts_result = $db->execute($similarity_parts_sql);
-        //used to check if belong to group
-        $member_info_sql = "SELECT GI.GROUP_ID FROM ".
-            "USER_GROUP UG, GROUP_ITEM GI WHERE ".
-            "UG.GROUP_ID = GI.GROUP_ID AND LOWER(GI.TITLE) ".
-            "NOT LIKE '%page%' AND UG.USER_ID = ? AND  GI.ID = ?";
-        //used to check if can join group easily
-        $register_info_sql = "SELECT G.GROUP_ID, G.REGISTER_TYPE AS REGISTER ".
-            "FROM SOCIAL_GROUPS G, GROUP_ITEM GI WHERE ".
-            "G.GROUP_ID = GI.GROUP_ID AND GI.ID = ? ";
-        $insert_count = 0;
-        $base_sql = "INSERT INTO USER_ITEM_SIMILARITY VALUES ";
-        $insert_sql = $base_sql;
-        $comma = "";
-        $i = 0;
-        while($row = $db->fetchArray($similarity_parts_result)) {
-            list($item_dot_user, $user_magnitude,
-                $item_magnitude, $thread_id, $user_id,) = array_values($row);
-            $user_magnitude = sqrt($user_magnitude);
-            $item_magnitude = sqrt($item_magnitude);
-            $add_record = false;
-            if ($result = $db->execute($member_info_sql, [$user_id,
-                $thread_id])){
-                $info_row = $db->fetchArray($result);
-                if (!empty($info_row) && $item_dot_user > 0) {
-                    $add_record = true;
-                    $group_member = 1;
-                } else {
-                    $access_results =
-                        $db->execute($register_info_sql, [$thread_id]);
-                    if ($access_results &&
-                        $access_row = $db->fetchArray($access_results)) {
-                        if (in_array($access_row['REGISTER'],
-                            [C\PUBLIC_BROWSE_REQUEST_JOIN, C\PUBLIC_JOIN])) {
-                            $add_record = true;
-                            $group_member = 0;
-                        }
-                    }
+        $updated_item_embeddings = [];
+        foreach ($resource_terms as $resource_id => $terms) {
+            $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0);
+            foreach ($terms as [$term_id, $term]) {
+                if (array_key_exists($term_id, $term_embeddings)) {
+                    $item_embedding = LinearAlgebra::add($item_embedding,
+                        $term_embeddings[$term_id]);
                 }
             }
-            L\crawlTimeoutLog("...%s similarity scores so far", $i++);
-            if ($add_record) {
-                $cos_sim = floatval($item_dot_user)
-                    /floatval($user_magnitude * $item_magnitude);
-                $insert_count++;
-                $insert_sql .= "$comma ($user_id, $thread_id, $cos_sim,
-                    $group_member)";
-                $comma = ",";
-                if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
-                    $insert_ignore_sql = $db->insertIgnore($insert_sql);
-                    $db->execute($insert_ignore_sql);
-                    $insert_sql = $base_sql;
-                    $insert_count = 0;
-                    $comma = "";
+            $updated_item_embeddings[$resource_id] = $item_embedding;
+            if (array_key_exists($resource_id, $item_embeddings)) {
+                unset($item_embeddings[$resource_id]);
+            }
+        }
+        foreach ($item_embeddings as $resource_id => $embedding) {
+            $updated_item_embeddings[$resource_id] = $embedding;
+        }
+        foreach ($meta_details_terms as $resource_id => $meta_terms) {
+            if (!array_key_exists($resource_id, $updated_item_embeddings)) {
+                $updated_item_embeddings[$resource_id] = array_fill(0,
+                    self::EMBEDDING_VECTOR_SIZE, 0);
+            }
+            foreach ($meta_terms as [$meta_term_id, $meta_term]) {
+                if (strlen($meta_term) <= 1) {
+                    continue;
                 }
+                $updated_item_embeddings[$resource_id][$meta_term_id] += 1.;
+            }
+        }
+        foreach ($updated_item_embeddings as $item_id => $embedding) {
+            $updated_item_embeddings[$item_id] = LinearAlgebra::normalize(
+                $embedding);
+        }
+        $delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING WHERE" .
+            " ITEM_TYPE = ?";
+        $db->execute($delete_sql, [C\RESOURCE_RECOMMENDATION]);
+        $base_insert_sql = "INSERT INTO RECOMMENDATION_ITEM_EMBEDDING VALUES ";
+        $insert_sql = $base_insert_sql;
+        $comma = "";
+        $insert_count = 0;
+        $item_type = C\RESOURCE_RECOMMENDATION;
+        foreach ($updated_item_embeddings as $resource_id => $embedding) {
+            $serialized_embedding = serialize($embedding);
+            $insert_sql .= "$comma($resource_id, $item_type," .
+                " '$serialized_embedding', $resource_id)";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
+                $insert_count = 0;
+                $comma = "";
+                $insert_sql = $base_insert_sql;
             }
         }
         if ($insert_count > 0) {
-            $insert_ignore_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_ignore_sql);
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
+        }
+        return $updated_item_embeddings;
+    }
+    /**
+     * Computes user embeddings for wiki resources based on the user's resources
+     * impression logged in ITEM_IMPRESSION_SUMMARY table for the defined update
+     * period
+     *
+     * @param array $item_embeddings of wiki page resources embedding
+     * @return array [$user_embeddings, $user_items] of user embeddings
+     * for wiki resources and the user resource impression
+     */
+    public function computeWikiUserEmbeddings($item_embeddings)
+    {
+        $db = $this->db;
+        $db_list_function = in_array($db->to_upper_dbms, ["SQLITE3", "MYSQL"]) ?
+            "GROUP_CONCAT" : "STRING_AGG";
+        $timestamp = floor(time() / self::UPDATE_PERIOD ) * self::UPDATE_PERIOD;
+        $condition = "ITEM_TYPE = ? AND USER_ID <> 2 AND" .
+            " ((UPDATE_PERIOD = ? AND UPDATE_TIMESTAMP = ?) OR" .
+            " (UPDATE_PERIOD = ?))";
+        $impression_sql = "SELECT USER_ID, $db_list_function(ITEM_ID, ',') AS" .
+            " ITEM_IDS FROM ITEM_IMPRESSION_SUMMARY WHERE $condition" .
+            " GROUP BY USER_ID";
+        $results = $db->execute($impression_sql,
+            [C\RESOURCE_IMPRESSION, self::UPDATE_PERIOD, $timestamp,
+                C\MOST_RECENT_VIEW]);
+        $user_embeddings = [];
+        $user_items = [];
+        while ($row = $db->fetchArray($results)) {
+            $user_id = $row['USER_ID'];
+            $item_ids = explode(",", $row['ITEM_IDS']);
+            $item_ids = array_unique($item_ids);
+            $user_embeddings[$user_id] = array_fill(0,
+                self::EMBEDDING_VECTOR_SIZE, 0);
+            $user_items[$user_id] = [];
+            foreach ($item_ids as $item_id) {
+                if (array_key_exists($item_id, $item_embeddings)) {
+                    $user_embeddings[$user_id] = LinearAlgebra::add(
+                        $user_embeddings[$user_id], $item_embeddings[$item_id]);
+                    $user_items[$user_id][] = $item_id;
+                }
+            }
+            $user_embeddings[$user_id] = LinearAlgebra::normalize(
+                $user_embeddings[$user_id]);
         }
+        return [$user_embeddings, $user_items];
     }
     /**
-     * Computes up to $max_recommendations item recommendations of the given
-     * type (thread or group) based on query which computes similarity score
-     * between a user and a given type.
-     * @param int $recommendation_type a config.php constant indicating the type
-     *      of recommendation to compute
-     * @param $similarity_sql query used to determine user similarity scores
-     *      should output triples: (user_id item_id rating)
-     * @param int $max_recommendations maximum number of recommendations to
-     *      compute per user
-     */
-    public function calculateSimilarityRecommendations($recommendation_type,
-        $similarity_sql, $max_recommendations)
+     * Computes the wiki resource recommendations based on cosine similarity
+     * between resource embeddings and user embeddings
+     *
+     * @param array $item_embeddings of wiki resources embeddings
+     * @param array $user_embeddings of users consumed wiki resources
+     * embeddings
+     * @param array $user_items of users consumed wiki resources
+     */
+    public function computeWikiUserRecommendations($item_embeddings,
+        $user_embeddings, $user_items, $resource_metadata)
     {
         $db = $this->db;
-        $base_sql = "INSERT INTO ITEM_RECOMMENDATION VALUES";
-        $insert_sql = $base_sql;
-        $similarity_results = $db->execute($similarity_sql);
-        if (!$similarity_results) {
-            return;
+        $recommendations = [];
+        foreach ($user_embeddings as $user_id => $user_embedding) {
+            if (array_key_exists($user_id, $user_items)) {
+                $user_item = $user_items[$user_id];
+            } else {
+                $user_item = [];
+            }
+            foreach ($item_embeddings as $item_id => $item_embedding) {
+                if (in_array($item_id, $user_item) ||
+                    !array_key_exists($item_id, $resource_metadata)) {
+                    continue;
+                }
+                $similarity = LinearAlgebra::similarity($user_embedding,
+                    $item_embedding);
+                list($group_id, $page_id, $resource_path) =
+                    $resource_metadata[$item_id];
+                $recommendations[] = [$user_id, $group_id, $page_id,
+                    $resource_path, $similarity, $item_id];
+            }
         }
-        $old_user_id = -1; // assume no one has this id
+        $delete_sql = "DELETE FROM GROUP_RESOURCE_RECOMMENDATION";
+        $db->execute($delete_sql);
+        $base_insert_sql = "INSERT INTO GROUP_RESOURCE_RECOMMENDATION " .
+            "VALUES ";
+        $insert_sql = $base_insert_sql;
         $comma = "";
         $insert_count = 0;
-        $i = 0;
-        L\crawlLog("...Computing type: $recommendation_type ".
-            "recommendations");
-        while($row = $db->fetchArray($similarity_results)) {
-            list($user_id, $item_id, $similarity, ) = array_values($row);
-            if ($user_id != $old_user_id) {
-                $old_user_id = $user_id;
-                $num_recommended = 1;
-            }
-            if ($num_recommended <= $max_recommendations
-                && $old_user_id == $user_id) {
-                $insert_sql .= "$comma ($item_id, $user_id, " .
-                    $recommendation_type .
-                    ", $similarity, {$this->update_time})";
-                $comma = ",";
-                $insert_count++;
-                if ($insert_count > self::BATCH_SQL_INSERT_NUM) {
-                    $insert_ignore_sql = $db->insertIgnore($insert_sql);
-                    $db->execute($insert_ignore_sql);
-                    $insert_sql = $base_sql;
-                    $insert_count = 0;
-                    $comma = "";
-                }
-                $num_recommended++;
-                $old_user_id = $user_id;
+        foreach ($recommendations as $recommendation) {
+            list($user_id, $group_id, $page_id, $resource_path,
+                $score, $item_id) = $recommendation;
+            $time = $this->update_time;
+            $insert_sql .= "$comma($user_id, $group_id, $page_id, " .
+                "\"$resource_path\", $score, $time, $item_id)";
+            $comma = ",";
+            $insert_count++;
+            if ($insert_count == self::BATCH_SQL_INSERT_NUM) {
+                $insert_sql = $db->insertIgnore($insert_sql);
+                $db->execute($insert_sql);
+                $insert_count = 0;
+                $comma = "";
+                $insert_sql = $base_insert_sql;
             }
-            L\crawlTimeoutLog("...%s recommendations so far", $i++);
         }
         if ($insert_count > 0) {
-            $insert_ignore_sql = $db->insertIgnore($insert_sql);
-            $db->execute($insert_ignore_sql);
+            $insert_sql = $db->insertIgnore($insert_sql);
+            $db->execute($insert_sql);
         }
     }
 }
diff --git a/src/locale/en_US/configure.ini b/src/locale/en_US/configure.ini
index 1da5ff1cd..4392a44bd 100644
--- a/src/locale/en_US/configure.ini
+++ b/src/locale/en_US/configure.ini
@@ -1073,6 +1073,7 @@ manageaccount_element_more_groups = "More groups..."
 manageaccount_element_recommendations = "Recommendations"
 manageaccount_element_rec_threads = "Threads:"
 manageaccount_element_rec_groups = "Groups:"
+manageaccount_element_rec_resources = "Page Resources:"
 manageaccount_element_account_details = "Account Details"
 manageaccount_element_username = "Username"
 manageaccount_element_firstname = "First Name"
diff --git a/src/models/ProfileModel.php b/src/models/ProfileModel.php
index e301f3be3..a82579b93 100755
--- a/src/models/ProfileModel.php
+++ b/src/models/ProfileModel.php
@@ -210,6 +210,10 @@ class ProfileModel extends Model
                 GI_PARENT_ID_PUBDATE_INDEX ON GROUP_ITEM (PARENT_ID, PUBDATE)",
             "GROUP_ITEM_VOTE" => "CREATE TABLE GROUP_ITEM_VOTE(
                 USER_ID $integer, ITEM_ID $integer)",
+            "GROUP_ITEM_RECOMMENDATION" => "CREATE TABLE
+                GROUP_ITEM_RECOMMENDATION (USER_ID $integer NOT NULL,
+                ITEM_ID $integer NOT NULL, ITEM_TYPE $integer NOT NULL,
+                SCORE FLOAT, TIMESTAMP NUMERIC(" . C\TIMESTAMP_LEN . "))",
             "GROUP_PAGE" => "CREATE TABLE GROUP_PAGE (
                 ID $serial PRIMARY KEY $auto_increment, GROUP_ID $integer,
                 DISCUSS_THREAD $integer, TITLE VARCHAR(" . C\TITLE_LEN . "),
@@ -237,6 +241,11 @@ class ProfileModel extends Model
                 TO_PAGE_NAME))",
             "GPP_ID_INDEX" => "CREATE INDEX GP_PRE_INDEX ON GROUP_PAGE_PRE_LINK
                  (TO_GROUP_ID, TO_PAGE_NAME)",
+            "GROUP_RESOURCE_RECOMMENDATION" => "CREATE TABLE
+                GROUP_RESOURCE_RECOMMENDATION (USER_ID $integer NOT NULL,
+                GROUP_ID $integer NOT NULL, PAGE_ID $integer NOT NULL,
+                RESOURCE_PATH VARCHAR(255), SCORE FLOAT, TIMESTAMP NUMERIC(" .
+                C\TIMESTAMP_LEN . "), RESOURCE_ID $integer)",
             "SOCIAL_GROUPS" => "CREATE TABLE SOCIAL_GROUPS (
                 GROUP_ID $serial PRIMARY KEY $auto_increment,
                 GROUP_NAME VARCHAR(" . C\SHORT_TITLE_LEN
@@ -270,19 +279,6 @@ class ProfileModel extends Model
                 TMP_NUM_VIEWS $integer DEFAULT -1,
                 PRIMARY KEY(USER_ID, ITEM_ID, ITEM_TYPE,
                 UPDATE_PERIOD, UPDATE_TIMESTAMP))",
-            "ITEM_RECOMMENDATION" => "CREATE TABLE ITEM_RECOMMENDATION (
-                ITEM_ID $integer, USER_ID $integer, ITEM_TYPE $integer,
-                SCORE FLOAT, TIMESTAMP NUMERIC(" . C\TIMESTAMP_LEN . "))",
-            "IR_USER_ID_INDEX" => "CREATE INDEX IR_USER_ID_INDEX ON
-                ITEM_RECOMMENDATION(USER_ID)",
-            "ITEM_TERM_FREQUENCY"=> "CREATE TABLE ITEM_TERM_FREQUENCY
-                (ITEM_ID $integer, TERM_ID $integer, FREQUENCY $integer,
-                LOG_FREQUENCY FLOAT, PRIMARY KEY(ITEM_ID, TERM_ID))",
-            "ITF_TERM_ID_INDEX" => "CREATE INDEX ITF_TERM_ID_INDEX ON
-                ITEM_TERM_FREQUENCY(TERM_ID)",
-            "ITEM_TERM_WEIGHTS"=> "CREATE TABLE ITEM_TERM_WEIGHTS (
-                TERM_ID $integer, ITEM_ID $integer, WEIGHT FLOAT,
-                PRIMARY KEY(TERM_ID, ITEM_ID))",
             "LOCALE" => "CREATE TABLE LOCALE(LOCALE_ID $serial PRIMARY KEY
                 $auto_increment, LOCALE_TAG VARCHAR(" . C\NAME_LEN . "),
                 LOCALE_NAME VARCHAR(" . C\LONG_NAME_LEN .
@@ -329,6 +325,14 @@ class ProfileModel extends Model
                 ACTIVITY_ID $integer,
                 ALLOWED_ARGUMENTS VARCHAR(" . C\MAX_URL_LEN . ") DEFAULT 'all',
                 PRIMARY KEY(ROLE_ID, ACTIVITY_ID))",
+            "RECOMMENDATION_ITEM_EMBEDDING" => "CREATE TABLE
+                RECOMMENDATION_ITEM_EMBEDDING (ID $integer NOT NULL,
+                ITEM_TYPE $integer NOT NULL, VECTOR BLOB, PARENT_ID $integer,
+                PRIMARY KEY(ID, ITEM_TYPE))",
+            "RECOMMENDATION_TERM_EMBEDDING" => "CREATE TABLE
+                RECOMMENDATION_TERM_EMBEDDING (ID $integer NOT NULL,
+                ITEM_TYPE $integer NOT NULL, VECTOR BLOB,
+                PRIMARY KEY(ID, ITEM_TYPE))",
             "SCRAPER" =>
                 "CREATE TABLE SCRAPER (ID $serial PRIMARY KEY
                 $auto_increment, NAME VARCHAR(" . C\TITLE_LEN . "),
@@ -382,18 +386,6 @@ class ProfileModel extends Model
                 ROLE_ID $integer, PRIMARY KEY (ROLE_ID, USER_ID))",
             "USER_SESSION" => "CREATE TABLE USER_SESSION (
                 USER_ID $integer PRIMARY KEY, SESSION $user_session_text)",
-            "USER_ITEM_SIMILARITY" => "CREATE TABLE USER_ITEM_SIMILARITY
-                (USER_ID $integer, THREAD_ID $integer, SIMILARITY FLOAT,
-                 GROUP_MEMBER $integer,
-                PRIMARY KEY(USER_ID, THREAD_ID))",
-            "USER_TERM_FREQUENCY"=>"CREATE TABLE USER_TERM_FREQUENCY
-                (USER_ID $integer, TERM_ID $integer, FREQUENCY $integer,
-                 LOG_FREQUENCY FLOAT, PRIMARY KEY(USER_ID, TERM_ID))",
-            "UTF_TERM_ID_INDEX" => "CREATE INDEX UTF_TERM_ID_INDEX ON
-                 USER_TERM_FREQUENCY(TERM_ID)",
-            "USER_TERM_WEIGHTS"=>"CREATE TABLE USER_TERM_WEIGHTS
-                (TERM_ID $integer, USER_ID $integer, WEIGHT FLOAT,
-                PRIMARY KEY(TERM_ID, USER_ID))",
             "VISITOR" => "CREATE TABLE VISITOR(ADDRESS VARCHAR(".
                 C\MAX_IP_ADDRESS_AS_STRING_LEN . "),
                 PAGE_NAME VARCHAR(" . C\NAME_LEN . "),
diff --git a/src/models/UserModel.php b/src/models/UserModel.php
index 3574128aa..934b4a0b6 100755
--- a/src/models/UserModel.php
+++ b/src/models/UserModel.php
@@ -618,7 +618,7 @@ class UserModel extends Model
             $name_id = "GROUP_ID";
         }
         $sql = "SELECT IR.ITEM_ID AS ID, NT.$name_column AS NAME ".
-            "FROM ITEM_RECOMMENDATION IR, $name_table NT ".
+            "FROM GROUP_ITEM_RECOMMENDATION IR, $name_table NT ".
             "WHERE IR.ITEM_ID = NT.$name_id AND IR.USER_ID = ? AND " .
             "ITEM_TYPE = ? AND TIMESTAMP = $timestamp " .
             "ORDER BY SCORE DESC " . $db->limitOffset($num);
@@ -629,4 +629,29 @@ class UserModel extends Model
         }
         return $recommendations;
     }
+    public function getResourceRecommendations($user_id, $num = 3)
+    {
+        $db = $this->db;
+        $sql = "SELECT * FROM GROUP_RESOURCE_RECOMMENDATION WHERE" .
+            " USER_ID = ? ORDER BY SCORE DESC " . $db->limitOffset($num);
+        $results = $db->execute($sql, [$user_id]);
+        $recommendations = [];
+        while($row = $db->fetchArray($results)) {
+            $group_id = $row['GROUP_ID'];
+            $page_id = $row['PAGE_ID'];
+            $page_sql = "SELECT TITLE FROM GROUP_PAGE WHERE ID = ?";
+            $result = $db->execute($page_sql, [$page_id]);
+            while ($sub_row = $db->fetchArray($result)) {
+                $page_title = $sub_row['TITLE'];
+            }
+            $index = strrpos($row['RESOURCE_PATH'], "/");
+            $name = substr($row['RESOURCE_PATH'], $index + 1);
+            $resource_index = strrpos($row['RESOURCE_PATH'], "/resources/");
+            $sub_path = substr($row['RESOURCE_PATH'], $resource_index + 28,
+                $index - $resource_index - 28);
+            $recommendations[] = [$group_id, $page_id, $page_title,
+                $name, $sub_path];
+        }
+        return $recommendations;
+    }
 }
diff --git a/src/views/elements/ManageaccountElement.php b/src/views/elements/ManageaccountElement.php
index a94b518b9..4a95f6382 100755
--- a/src/views/elements/ManageaccountElement.php
+++ b/src/views/elements/ManageaccountElement.php
@@ -95,7 +95,8 @@ class ManageaccountElement extends Element
                     tl('manageaccount_element_more_groups')?></a></b></div><?php
             }
             if (!empty($data['THREAD_RECOMMENDATIONS']) ||
-                !empty($data['GROUP_RECOMMENDATIONS'])) {
+                !empty($data['GROUP_RECOMMENDATIONS']) ||
+                !empty($data['RESOURCE_RECOMMENDATIONS'])) {
                 ?>
                 <h2><?=tl('manageaccount_element_recommendations')?></h2>
                 <div class="access-result">
@@ -123,7 +124,22 @@ class ManageaccountElement extends Element
                         <?php
                     }?></ul><?php
                 }
-                ?></div><?php
+                ?><br /><?php
+                if (!empty($data['RESOURCE_RECOMMENDATIONS'])) {
+                    ?><b><?=tl('manageaccount_element_rec_resources')
+                    ?></b><ul><?php
+                    foreach ($data['RESOURCE_RECOMMENDATIONS'] as
+                        $recommendation) {
+                        $encoded_name = str_replace(" ",
+                            "+", $recommendation[3]); ?><li>
+                        <a href="<?= htmlentities(B\controllerUrl("group")) .
+                            "/$recommendation[0]/$recommendation[2]?" .
+                            "$token&page_id=$recommendation[1]&" .
+                            "sf=$recommendation[4]&arg=media&n=$encoded_name"
+                            ?>" ><?= $recommendation[3] ?></a></li>
+                        <?php
+                    }?></div><?php
+                }
             }
             ?>
         </div>
@@ -320,4 +336,4 @@ class ManageaccountElement extends Element
         </div>
         </div><?php
     }
-}
+}
\ No newline at end of file
ViewGit