diff --git a/src/library/LRUCache.php b/src/library/LRUCache.php index db3ed8231..bc0077556 100644 --- a/src/library/LRUCache.php +++ b/src/library/LRUCache.php @@ -98,4 +98,13 @@ class LRUCache return $this->cache[$key]; } } -} + /** + * Returns all the items currently in cache + * + * @return array $this->cache + */ + public function getAll() + { + return $this->cache; + } +} \ No newline at end of file diff --git a/src/library/media_jobs/RecommendationJob.php b/src/library/media_jobs/RecommendationJob.php index f772f88ac..d96a15f6e 100644 --- a/src/library/media_jobs/RecommendationJob.php +++ b/src/library/media_jobs/RecommendationJob.php @@ -330,7 +330,8 @@ class RecommendationJob extends MediaJob $term_embeddings = []; $item_terms = []; while ($row = $db->fetchArray($results)) { - $this->lru_cache->put($row['ID'], unserialize($row['VECTOR'])); + $this->lru_cache->put($row['ID'], + base64_decode($row['VECTOR'], true)); } $context_distance_sum = (self::CONTEXT_WINDOW_LENGTH * (self::CONTEXT_WINDOW_LENGTH + 1)) / 2.0; @@ -353,35 +354,35 @@ class RecommendationJob extends MediaJob $item_terms[$item_id] = [$terms, $row['GROUP_ID']]; for ($i = 0; $i < count($terms); $i++) { [$term_id, $term] = $terms[$i]; - $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE; + $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE + 1; $term_sign_hash = hash(self::SIGN_HASH_ALGORITHM, $term, true); - $term_sign = unpack('n', $term_sign_hash)[1] % 2 == 0 ? -1 : 1; + $term_sign = unpack('N', $term_sign_hash)[1] % 2 == 0 ? -1 : 1; $term_embedding = $this->getTermEmbedding($term_id, C\THREAD_RECOMMENDATION); - $term_embedding = unpack("d$this->size", $term_embedding); + $term_embedding = unpack("E*", $term_embedding); for ($j = $i - 1; $j >= 0 && $j >= $i - self::CONTEXT_WINDOW_LENGTH; $j--) { [$context_term_id, $context_term] = $terms[$j]; $context_term_embedding = $this->getTermEmbedding( $context_term_id, C\THREAD_RECOMMENDATION); - $context_term_embedding = unpack("d$this->size", + $context_term_embedding = unpack("E*", $context_term_embedding); $weight = exp(-1 * pow(($i - $j) / $std_deviation, 2)); $context_term_hash = $context_term_id % - self::EMBEDDING_VECTOR_SIZE; + self::EMBEDDING_VECTOR_SIZE + 1; $context_term_sign_hash = hash(self::SIGN_HASH_ALGORITHM, $context_term, true); - $context_term_sign = unpack('n', $context_term_sign_hash)[1] + $context_term_sign = unpack('N', $context_term_sign_hash)[1] % 2 == 0 ? -1 : 1; $term_embedding[$context_term_hash] += $context_term_sign * $weight; $context_term_embedding[$term_hash] += $term_sign * $weight; - $context_term_embedding = pack("d$this->size", + $context_term_embedding = pack("E*", ...$context_term_embedding); $this->updateTermEmbeddingCache($context_term_id, $context_term_embedding, C\THREAD_RECOMMENDATION); } - $term_embedding = pack("d$this->size", ...$term_embedding); + $term_embedding = pack("E*", ...$term_embedding); $this->updateTermEmbeddingCache($term_id, $term_embedding, C\THREAD_RECOMMENDATION); } @@ -402,16 +403,16 @@ class RecommendationJob extends MediaJob $db = $this->db; $updated_item_embeddings = []; foreach ($item_terms as $item_id => [$terms, $group_id]) { - $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0); + $item_embedding = array_fill(1, self::EMBEDDING_VECTOR_SIZE, 0); foreach ($terms as [$term_id, $term]) { $term_embedding = $this->getTermEmbedding($term_id, C\THREAD_RECOMMENDATION, true); - $term_embedding = unpack("d$this->size", $term_embedding); + $term_embedding = unpack("E*", $term_embedding); $item_embedding = LinearAlgebra::add($item_embedding, $term_embedding); } $item_embedding = LinearAlgebra::normalize($item_embedding); - $item_embedding = pack("d$this->size", ...$item_embedding); + $item_embedding = pack("E*", ...$item_embedding); $updated_item_embeddings[$item_id] = [$item_embedding, $group_id]; } $base_delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING" . @@ -424,7 +425,7 @@ class RecommendationJob extends MediaJob $item_type = C\THREAD_RECOMMENDATION; foreach ($updated_item_embeddings as $item_id => [$embedding, $parent_id]) { - $embedding = serialize(unpack("d$this->size", $embedding)); + $embedding = base64_encode($embedding); $insert_sql .= "$comma($item_id, $item_type, " . "'$embedding', $parent_id)"; $delete_sql .= "$comma $item_id"; @@ -478,12 +479,12 @@ class RecommendationJob extends MediaJob $user_id = $row['USER_ID']; $item_ids = explode(",", $row['ITEM_IDS']); $item_ids = array_unique($item_ids); - $item_user_embeddings[$user_id] = array_fill(0, + $item_user_embeddings[$user_id] = array_fill(1, self::EMBEDDING_VECTOR_SIZE, 0); $user_items[$user_id] = []; foreach ($item_ids as $item_id) { if (array_key_exists($item_id, $item_embeddings)) { - $item_embedding = unpack("d$this->size", + $item_embedding = unpack("E*", $item_embeddings[$item_id][0]); $item_user_embeddings[$user_id] = LinearAlgebra::add( $item_user_embeddings[$user_id], $item_embedding); @@ -492,7 +493,7 @@ class RecommendationJob extends MediaJob } $item_user_embeddings[$user_id] = LinearAlgebra::normalize( $item_user_embeddings[$user_id]); - $item_user_embeddings[$user_id] = pack("d$this->size", + $item_user_embeddings[$user_id] = pack("E*", ...$item_user_embeddings[$user_id]); } return [$item_user_embeddings, $user_items]; @@ -526,7 +527,7 @@ class RecommendationJob extends MediaJob } $item_user_recommendations = []; foreach ($item_user_embeddings as $user_id => $embedding) { - $embedding = unpack("d$this->size", $embedding); + $embedding = unpack("E*", $embedding); if (array_key_exists($user_id, $user_groups)) { foreach ($item_embeddings as $item_id => [$item_embedding, $parent_id]) { @@ -534,7 +535,7 @@ class RecommendationJob extends MediaJob !in_array($parent_id, $user_groups[$user_id])) { continue; } - $item_embedding = unpack("d$this->size", $item_embedding); + $item_embedding = unpack("E*", $item_embedding); $similarity = LinearAlgebra::similarity( $item_embedding, $embedding); $item_user_recommendations[] = [$user_id, @@ -584,19 +585,19 @@ class RecommendationJob extends MediaJob $updated_group_embeddings = []; foreach ($item_embeddings as $item_id => [$embedding, $parent_id]) { if (array_key_exists($parent_id, $updated_group_embeddings)) { - $embedding = unpack("d$this->size", $embedding); - $group_embedding = unpack("d$this->size", + $embedding = unpack("E*", $embedding); + $group_embedding = unpack("E*", $updated_group_embeddings[$parent_id]); - $updated_group_embeddings[$parent_id] = pack("d$this->size", + $updated_group_embeddings[$parent_id] = pack("E*", ...LinearAlgebra::add($embedding, $group_embedding)); } else { $updated_group_embeddings[$parent_id] = $embedding; } } foreach ($updated_group_embeddings as $group_id => $embedding) { - $embedding = unpack("d$this->size", $embedding); + $embedding = unpack("E*", $embedding); $embedding = LinearAlgebra::normalize($embedding); - $updated_group_embeddings[$group_id] = pack("d$this->size", + $updated_group_embeddings[$group_id] = pack("E*", ...$embedding); } $base_delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING" . @@ -608,7 +609,7 @@ class RecommendationJob extends MediaJob $insert_count = 0; $item_type = C\GROUP_RECOMMENDATION; foreach ($updated_group_embeddings as $group_id => $embedding) { - $embedding = serialize(unpack("d$this->size", $embedding)); + $embedding = serialize(unpack("E*", $embedding)); $insert_sql .= "$comma($group_id, $item_type, " . "'$embedding', $group_id)"; $delete_sql .= "$comma $group_id"; @@ -663,19 +664,19 @@ class RecommendationJob extends MediaJob $user_id = $row['USER_ID']; $group_ids = explode(",", $row['ITEM_IDS']); $group_ids = array_unique($group_ids); - $group_user_embeddings[$user_id] = array_fill(0, + $group_user_embeddings[$user_id] = array_fill(1, self::EMBEDDING_VECTOR_SIZE, 0); $user_groups[$user_id] = []; foreach ($group_ids as $group_id) { if (array_key_exists($group_id, $group_embeddings)) { - $embedding = unpack("d$this->size", + $embedding = unpack("E*", $group_embeddings[$group_id]); $group_user_embeddings[$user_id] = LinearAlgebra::add( $group_user_embeddings[$user_id], $embedding); $user_groups[$user_id][] = $group_id; } } - $group_user_embeddings[$user_id] = pack("d$this->size", + $group_user_embeddings[$user_id] = pack("E*", ...LinearAlgebra::normalize($group_user_embeddings[$user_id])); } return [$group_user_embeddings, $user_groups]; @@ -704,14 +705,14 @@ class RecommendationJob extends MediaJob } $group_user_recommendations = []; foreach ($group_user_embeddings as $user_id => $embedding) { + $embedding = unpack("E*", $embedding); foreach ($group_embeddings as $group_id => $group_embedding) { if (in_array($group_id, $exclude_group_ids) || in_array($group_id, $user_groups[$user_id]) || in_array($group_id, $user_group_impression[$user_id])) { continue; } - $embedding = unpack("d$this->size", $embedding); - $group_embedding = unpack("d$this->size", $group_embedding); + $group_embedding = unpack("E*", $group_embedding); $similarity = LinearAlgebra::similarity($embedding, $group_embedding); $group_user_recommendations[] = [$user_id, $group_id, @@ -801,6 +802,9 @@ class RecommendationJob extends MediaJob $descriptions = []; $resource_metadata = []; foreach ($thumb_folders as $thumb_folder) { + if (empty($thumb_folder)) { + continue; + } list($group_id, $page_id, $folder) = explode("###", $thumb_folder); $folder = trim($folder, " \n\r\t\v\x00"); $files = $this->getDescriptionFiles($folder); @@ -872,7 +876,8 @@ class RecommendationJob extends MediaJob $resource_terms = []; $meta_details_terms = []; while ($row = $db->fetchArray($results)) { - $this->lru_cache->put($row['ID'], unserialize($row['VECTOR'])); + $this->lru_cache->put($row['ID'], + base64_decode($row['VECTOR'], true)); } $context_distance_sum = (self::CONTEXT_WINDOW_LENGTH * (self::CONTEXT_WINDOW_LENGTH + 1)) / 2.0; @@ -902,36 +907,38 @@ class RecommendationJob extends MediaJob $terms = $resource_terms[$resource_id]; for ($i = 0; $i < count($terms); $i++) { [$term_id, $term] = $terms[$i]; - $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE; + $term_hash = $term_id % self::EMBEDDING_VECTOR_SIZE + 1; $term_sign_hash = hash(self::SIGN_HASH_ALGORITHM, $term, true); - $term_sign = unpack('n', $term_sign_hash)[1] + $term_sign = unpack('N', $term_sign_hash)[1] % 2 == 0 ? -1 : 1; $term_embedding = $this->getTermEmbedding($term_id, C\RESOURCE_RECOMMENDATION); - $term_embedding = unpack("d$this->size", $term_embedding); + $term_embedding = unpack("E*", $term_embedding); for ($j = $i - 1; $j >= 0 && $j >= $i - self::CONTEXT_WINDOW_LENGTH; $j--) { [$context_term_id, $context_term] = $terms[$j]; $context_term_embedding = $this->getTermEmbedding( $context_term_id, C\RESOURCE_RECOMMENDATION); - $context_term_embedding = unpack("d$this->size", + $context_term_embedding = unpack("E*", $context_term_embedding); $weight = exp(-1 * pow(($i - $j) / $std_deviation, 2)); $context_term_hash = $context_term_id % - self::EMBEDDING_VECTOR_SIZE; + self::EMBEDDING_VECTOR_SIZE + 1; $context_term_sign_hash = hash( self::SIGN_HASH_ALGORITHM, $context_term, true); - $context_term_sign = unpack('n', + $context_term_sign = unpack('N', $context_term_sign_hash)[1] % 2 == 0 ? -1 : 1; $term_embedding[$context_term_hash] += $context_term_sign * $weight; - $context_term_embedding = pack("d$this->size", + $context_term_embedding[$term_hash] += + $term_sign * $weight; + $context_term_embedding = pack("E*", ...$context_term_embedding); $this->updateTermEmbeddingCache($context_term_id, $context_term_embedding, C\RESOURCE_RECOMMENDATION); } - $term_embedding = pack("d$this->size", ...$term_embedding); + $term_embedding = pack("E*", ...$term_embedding); $this->updateTermEmbeddingCache($term_id, $term_embedding, C\RESOURCE_RECOMMENDATION); } @@ -974,7 +981,7 @@ class RecommendationJob extends MediaJob in_array($term, self::DESCRIPTION_STOP_WORDS)) { continue; } - $term_id = unpack('n', md5($term, true))[1]; + $term_id = unpack('N', md5($term, true))[1]; $term_ids[] = [$term_id, $term]; } return $term_ids; @@ -994,22 +1001,22 @@ class RecommendationJob extends MediaJob $db = $this->db; $updated_item_embeddings = []; foreach ($resource_terms as $resource_id => $terms) { - $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0); + $item_embedding = array_fill(1, self::EMBEDDING_VECTOR_SIZE, 0); foreach ($terms as [$term_id, $term]) { $term_embedding = $this->getTermEmbedding($term_id, C\RESOURCE_RECOMMENDATION, true); - $term_embedding = unpack("d$this->size", $term_embedding); + $term_embedding = unpack("E*", $term_embedding); $item_embedding = LinearAlgebra::add($item_embedding, $term_embedding); } - $updated_item_embeddings[$resource_id] = pack("d$this->size", + $updated_item_embeddings[$resource_id] = pack("E*", ...$item_embedding); } foreach ($meta_details_terms as $resource_id => $meta_terms) { if (!array_key_exists($resource_id, $updated_item_embeddings)) { - $item_embedding = array_fill(0, self::EMBEDDING_VECTOR_SIZE, 0); + $item_embedding = array_fill(1, self::EMBEDDING_VECTOR_SIZE, 0); } else { - $item_embedding = unpack("d$this->size", + $item_embedding = unpack("E*", $updated_item_embeddings[$resource_id]); } foreach ($meta_terms as [$meta_term_id, $meta_term]) { @@ -1017,16 +1024,16 @@ class RecommendationJob extends MediaJob continue; } $sign_hash = hash(self::SIGN_HASH_ALGORITHM, $meta_term, true); - $sign = unpack('n', $sign_hash)[1] % 2 == 0 ? -1 : 1; - $item_embedding[$meta_term_id%self::EMBEDDING_VECTOR_SIZE] += - $sign * 1.0; + $sign = unpack('N', $sign_hash)[1] % 2 == 0 ? -1 : 1; + $item_embedding[$meta_term_id%self::EMBEDDING_VECTOR_SIZE + 1] + += $sign * 1.0; } - $updated_item_embeddings[$resource_id] = pack("d$this->size", + $updated_item_embeddings[$resource_id] = pack("E*", ...$item_embedding); } foreach ($updated_item_embeddings as $item_id => $embedding) { - $embedding = unpack("d$this->size", $embedding); - $updated_item_embeddings[$item_id] = pack("d$this->size", + $embedding = unpack("E*", $embedding); + $updated_item_embeddings[$item_id] = pack("E*", ...LinearAlgebra::normalize($embedding)); } $delete_sql = "DELETE FROM RECOMMENDATION_ITEM_EMBEDDING WHERE" . @@ -1038,7 +1045,7 @@ class RecommendationJob extends MediaJob $insert_count = 0; $item_type = C\RESOURCE_RECOMMENDATION; foreach ($updated_item_embeddings as $resource_id => $embedding) { - $embedding = serialize(unpack("d$this->size", $embedding)); + $embedding = base64_encode($embedding); $insert_sql .= "$comma($resource_id, $item_type," . " '$embedding', $resource_id)"; $comma = ","; @@ -1087,21 +1094,20 @@ class RecommendationJob extends MediaJob $user_id = $row['USER_ID']; $item_ids = explode(",", $row['ITEM_IDS']); $item_ids = array_unique($item_ids); - $user_embeddings[$user_id] = array_fill(0, + $user_embeddings[$user_id] = array_fill(1, self::EMBEDDING_VECTOR_SIZE, 0); $user_items[$user_id] = []; foreach ($item_ids as $item_id) { if (array_key_exists($item_id, $item_embeddings)) { - $embedding = unpack("d$this->size", + $embedding = unpack("E*", $item_embeddings[$item_id]); $user_embeddings[$user_id] = LinearAlgebra::add( $user_embeddings[$user_id], $embedding); $user_items[$user_id][] = $item_id; } } - $user_embeddings[$user_id] = pack("d$this->size", - ...LinearAlgebra::normalize( - $user_embeddings[$user_id])); + $user_embeddings[$user_id] = pack("E*", + ...LinearAlgebra::normalize($user_embeddings[$user_id])); } return [$user_embeddings, $user_items]; } @@ -1120,13 +1126,13 @@ class RecommendationJob extends MediaJob $db = $this->db; $recommendations = []; foreach ($user_embeddings as $user_id => $user_embedding) { - $user_embedding = unpack("d$this->size", $user_embedding); + $user_embedding = unpack("E*", $user_embedding); foreach ($item_embeddings as $item_id => $item_embedding) { if (in_array($item_id, $user_items[$user_id]) || !array_key_exists($item_id, $resource_metadata)) { continue; } - $item_embedding = unpack("d$this->size", $item_embedding); + $item_embedding = unpack("E*", $item_embedding); $similarity = LinearAlgebra::similarity($user_embedding, $item_embedding); list($group_id, $page_id, $resource_path) = @@ -1182,10 +1188,10 @@ class RecommendationJob extends MediaJob $result = $db->execute($sql, [$item_type, $term_id]); $row = $db->fetchArray($result); if (!$row) { - $term_embedding = pack("d$this->size", ...array_fill(0, + $term_embedding = pack("E*", ...array_fill(1, self::EMBEDDING_VECTOR_SIZE, 0.0)); } else { - $term_embedding = unserialize($row['VECTOR']); + $term_embedding = base64_decode($row['VECTOR'], true); } } if ($update) { @@ -1213,7 +1219,7 @@ class RecommendationJob extends MediaJob $db->execute($sql, [$item_type, $evicted_item[0]]); $sql = "INSERT INTO RECOMMENDATION_TERM_EMBEDDING VALUES (?, ?, ?)"; $db->execute($sql, [$evicted_item[0], $item_type, - serialize($evicted_item[1])]); + base64_encode($evicted_item[1])]); } } /** @@ -1223,6 +1229,7 @@ class RecommendationJob extends MediaJob */ public function saveTermEmbeddingsCacheToDb($item_type) { + $db = $this->db; $base_delete_sql = "DELETE FROM RECOMMENDATION_TERM_EMBEDDING" . " WHERE ITEM_TYPE = ? AND ID IN ("; $delete_sql = $base_delete_sql; @@ -1230,9 +1237,9 @@ class RecommendationJob extends MediaJob $insert_sql = $base_insert_sql; $comma = ""; $insert_count = 0; - foreach ($this->lru_cache as $id => $embedding) { - $embedding = serialize($embedding); - $insert_sql .= "$comma($id, $item_type, $embedding)"; + foreach ($this->lru_cache->getAll() as $id => $embedding) { + $embedding = base64_encode($embedding); + $insert_sql .= "$comma($id, $item_type, '$embedding')"; $delete_sql .= "$comma $id"; $comma = ","; $insert_count++;