Fix compressor bug caused when change active crawl's seed sites, a=chris

Chris Pollett [2022-07-29 22:Jul:th]
Fix compressor bug caused when change active crawl's seed sites, a=chris
Filename
src/library/IndexDocumentBundle.php
src/library/PartitionDocumentBundle.php
diff --git a/src/library/IndexDocumentBundle.php b/src/library/IndexDocumentBundle.php
index 32ebd2e33..c5d280b8d 100644
--- a/src/library/IndexDocumentBundle.php
+++ b/src/library/IndexDocumentBundle.php
@@ -240,7 +240,7 @@ class IndexDocumentBundle implements CrawlConstants
                 $archive_info_path));
             $just_got_info = true;
         }
-        if (floatval($this->archive_info['VERSION']) < 3.1) {
+        if ($this->archive_info['VERSION'] < "3.1") {
             $this->archive_info['RECORD_COMPRESSOR'] =
                 C\NS_COMPRESSORS . "GzipCompressor";
         }
@@ -258,7 +258,6 @@ class IndexDocumentBundle implements CrawlConstants
         $this->archive_info['BPLUS_BLOB_COMPRESSOR'] ??=
             C\NS_COMPRESSORS . "NonCompressor";
         $bplus_blob_compressor = $this->archive_info['BPLUS_BLOB_COMPRESSOR'];
-        $blob_compressor = C\NS_COMPRESSORS . "GzipCompressor";
         if (!$read_only_archive && !$just_got_info) {
             file_put_contents($archive_info_path,
                 serialize($this->archive_info));
@@ -1386,7 +1385,8 @@ class IndexDocumentBundle implements CrawlConstants
         if (!is_array($table_info)) {
             $table_info = [];
         }
-        return array_merge($info, $table_info);
+        $info = array_merge($table_info, $info);;
+        return $info;
     }
     /**
      * Sets the archive info struct for the web archive bundle associated with
@@ -1395,11 +1395,20 @@ class IndexDocumentBundle implements CrawlConstants
      * timestamp, etc).
      *
      * @param string $dir_name folder with archive bundle
-     * @param array $info struct with above fields
+     * @param array $update_info struct with above fields
      */
-    public static function setArchiveInfo($dir_name, $info)
+    public static function setArchiveInfo($dir_name, $update_info)
     {
         $archive_info_path = $dir_name. "/" . self::ARCHIVE_INFO_FILE;
+        if (file_exists($archive_info_path)) {
+            $info = self::getArchiveInfo($dir_name);
+        }
+        if (empty($info) || !is_array($info)) {
+            $info = [];
+        }
+        foreach ($update_info as $field => $value) {
+            $info[$field] = $value;
+        }
         if (empty($info["VERSION"])) {
             $info["VERSION"] = self::DEFAULT_VERSION;
         }
diff --git a/src/library/PartitionDocumentBundle.php b/src/library/PartitionDocumentBundle.php
index 2d6c8bfbe..8b7a72eeb 100644
--- a/src/library/PartitionDocumentBundle.php
+++ b/src/library/PartitionDocumentBundle.php
@@ -200,7 +200,7 @@ class PartitionDocumentBundle
         $initial_parameters["PARTITION_SIZE_THRESHOLD"] =
             $partition_size_threshold;
         $initial_parameters["MAX_ITEMS_PER_FILE"] = $max_items_per_file;
-        $initial_parameters["RECORD_COMPRESSOR"] = $blob_compressor_type;
+        $initial_parameters["RECORD_COMPRESSOR"] = $record_compressor_type;
         $initial_parameters["BLOB_COMPRESSOR"] = $blob_compressor_type;
         $this->record_compressor = new $record_compressor_type();
         $this->blob_compressor = new $blob_compressor_type();
ViewGit