From b8b789c0027472421ec0808179bdc2d0f585602b Mon Sep 17 00:00:00 2001
From: Georgy Moshkin <gmoshkin@picodata.io>
Date: Thu, 8 Aug 2024 19:05:04 +0300
Subject: [PATCH] fix: migration file md5 checksums were wrong

---
 src/plugin/migration.rs | 25 ++++++++++---------------
 test/int/test_plugin.py | 12 ++++++++++++
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/plugin/migration.rs b/src/plugin/migration.rs
index b9dd9e0268..c2bcfc9e06 100644
--- a/src/plugin/migration.rs
+++ b/src/plugin/migration.rs
@@ -7,7 +7,6 @@ use crate::traft::op::{Dml, Op};
 use crate::util::Lexer;
 use crate::util::QuoteEscapingStyle;
 use crate::{error_injection, sql, tlog, traft};
-use md5::Digest;
 use std::fs::File;
 use std::io;
 use std::io::{ErrorKind, Read};
@@ -98,26 +97,22 @@ pub fn calculate_migration_hash(filename: &str) -> Result<md5::Digest, Error> {
     let (sender, receiver) = cbus::oneshot::channel(ENDPOINT_NAME);
 
     fn calculate_migration_hash_from_file(filename: &str) -> Result<md5::Digest, io::Error> {
-        const BUF_SIZE: usize = 1024;
-        const HASH_SIZE: usize = 16;
+        const BUF_SIZE: usize = 4096;
 
         let mut f = File::open(filename)?;
-        let mut buffer = [0; BUF_SIZE + HASH_SIZE];
-        let mut digest = Digest([0; HASH_SIZE]);
+        let mut context = md5::Context::new();
+        let mut buffer = [0; BUF_SIZE];
 
-        let (file_part, digest_part) = buffer.split_at_mut(BUF_SIZE);
-        assert_eq!(file_part.len(), BUF_SIZE);
-        assert_eq!(digest_part.len(), HASH_SIZE);
-        let mut n = f.read(file_part)?;
-
-        while n != 0 {
-            digest = md5::compute(buffer);
+        loop {
+            let n = f.read(&mut buffer)?;
+            if n == 0 {
+                break;
+            }
 
-            let (file_part, digest_part) = buffer.split_at_mut(BUF_SIZE);
-            n = f.read(file_part)?;
-            digest_part.copy_from_slice(&digest.0);
+            context.consume(&buffer[..n]);
         }
 
+        let digest = context.compute();
         Ok(digest)
     }
 
diff --git a/test/int/test_plugin.py b/test/int/test_plugin.py
index dbc837d4e5..b1a3f0747b 100644
--- a/test/int/test_plugin.py
+++ b/test/int/test_plugin.py
@@ -4,6 +4,8 @@ from typing import Any, Dict, List, Optional
 import pytest
 import uuid
 import msgpack  # type: ignore
+import os
+import hashlib
 from conftest import (
     Cluster,
     ReturnError,
@@ -747,6 +749,16 @@ def test_migration_separate_command(cluster: Cluster):
     expected_state = expected_state.set_data(_DATA_V_0_1_0)
     expected_state.assert_data_synced()
 
+    # check migration file checksums are calculated correctly
+    rows = i1.sql(""" SELECT "migration_file", "hash" FROM "_pico_plugin_migration" """)
+    assert i1.plugin_dir
+    plugin_dir = os.path.join(i1.plugin_dir, _PLUGIN_WITH_MIGRATION, "0.1.0")
+    for filename, checksum in rows:
+        fullpath = os.path.join(plugin_dir, filename)
+        with open(fullpath, "rb") as f:
+            hash = hashlib.md5(f.read())
+        assert checksum == hash.hexdigest(), filename
+
     # increase a version to v0.2.0
     i1.call("pico.install_plugin", _PLUGIN_WITH_MIGRATION, "0.2.0", timeout=5)
     i1.call("pico.migration_up", _PLUGIN_WITH_MIGRATION, "0.2.0")
-- 
GitLab