changeset 6142:93d408173903 attach-custom-data

merge
author Sebastien Jodogne <s.jodogne@gmail.com>
date Fri, 30 May 2025 12:23:19 +0200
parents 6cf5caab6117 (current diff) 98776c72a9bc (diff)
children d6c777a2511f
files
diffstat 6 files changed, 196 insertions(+), 85 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Fri May 30 12:22:52 2025 +0200
+++ b/NEWS	Fri May 30 12:23:19 2025 +0200
@@ -22,6 +22,7 @@
   - If "LimitMainDicomTagsReconstructLevel" was set, files were not transcoded if they had to.
     The "LimitMainDicomTagsReconstructLevel" configuration is now ignored when a full processing
     is required.
+* Fix computation of MD5 hashes for memory buffers whose size is larger than 2^31 bytes.
 
 
 Version 1.12.7 (2025-04-07)
--- a/OrthancFramework/Sources/SystemToolbox.cpp	Fri May 30 12:22:52 2025 +0200
+++ b/OrthancFramework/Sources/SystemToolbox.cpp	Fri May 30 12:23:19 2025 +0200
@@ -455,27 +455,60 @@
   }
 
 #if ORTHANC_ENABLE_MD5 == 1
+  void SystemToolbox::ComputeStreamMD5(std::string& result,
+                                       std::istream& inputStream)
+  {
+    Toolbox::MD5Context context;
+
+    const size_t bufferSize = 1024;
+    char buffer[bufferSize];
+
+    while (inputStream.good())
+    {
+      inputStream.read(buffer, bufferSize);
+      std::streamsize bytesRead = inputStream.gcount();
+
+      if (bytesRead > 0)
+      {
+        context.Append(buffer, bytesRead);
+      }
+    }
+
+    context.Export(result);
+  }
+
+
   void SystemToolbox::ComputeFileMD5(std::string& result,
                                      const std::string& path)
   {
-    std::ifstream fileStream(path, std::ifstream::binary);
-    Toolbox::ComputeMD5(result, fileStream);
+    boost::filesystem::ifstream fileStream;
+    fileStream.open(path, std::ifstream::in | std::ifstream::binary);
+
+    if (!fileStream.good())
+    {
+      throw OrthancException(ErrorCode_InexistentFile, "File not found: " + path);
+    }
+
+    ComputeStreamMD5(result, fileStream);
   }
 
+
   bool SystemToolbox::CompareFilesMD5(const std::string& path1,
                                       const std::string& path2)
   {
-    if (SystemToolbox::GetFileSize(path1) != SystemToolbox::GetFileSize(path2))
+    if (GetFileSize(path1) != GetFileSize(path2))
     {
       return false;
     }
-
-    std::string path1md5, path2md5;
+    else
+    {
+      std::string path1md5, path2md5;
     
-    SystemToolbox::ComputeFileMD5(path1md5, path1);
-    SystemToolbox::ComputeFileMD5(path2md5, path2);
+      ComputeFileMD5(path1md5, path1);
+      ComputeFileMD5(path2md5, path2);
 
-    return path1md5 == path2md5;
+      return path1md5 == path2md5;
+    }
   }
 #endif
 
--- a/OrthancFramework/Sources/SystemToolbox.h	Fri May 30 12:22:52 2025 +0200
+++ b/OrthancFramework/Sources/SystemToolbox.h	Fri May 30 12:23:19 2025 +0200
@@ -30,6 +30,10 @@
 #  error The macro ORTHANC_SANDBOXED must be defined
 #endif
 
+#if !defined(ORTHANC_ENABLE_MD5)
+#  error The macro ORTHANC_ENABLE_MD5 must be defined
+#endif
+
 #if ORTHANC_SANDBOXED == 1
 #  error The namespace SystemToolbox cannot be used in sandboxed environments
 #endif
@@ -84,6 +88,9 @@
     static uint64_t GetFileSize(const std::string& path);
 
 #if ORTHANC_ENABLE_MD5 == 1
+    static void ComputeStreamMD5(std::string& result,
+                                 std::istream& stream);
+
     static void ComputeFileMD5(std::string& result,
                                const std::string& path);
 
--- a/OrthancFramework/Sources/Toolbox.cpp	Fri May 30 12:22:52 2025 +0200
+++ b/OrthancFramework/Sources/Toolbox.cpp	Fri May 30 12:23:19 2025 +0200
@@ -64,6 +64,7 @@
 #include <boost/algorithm/string/join.hpp>
 #include <boost/lexical_cast.hpp>
 #include <boost/regex.hpp>
+#include <cassert>
 
 #if BOOST_VERSION >= 106600
 #  include <boost/uuid/detail/sha1.hpp>
@@ -207,6 +208,112 @@
 
 namespace Orthanc
 {
+#if ORTHANC_ENABLE_MD5 == 1
+  static char GetHexadecimalCharacter(uint8_t value)
+  {
+    assert(value < 16);
+
+    if (value < 10)
+    {
+      return value + '0';
+    }
+    else
+    {
+      return (value - 10) + 'a';
+    }
+  }
+
+
+  struct Toolbox::MD5Context::PImpl
+  {
+    md5_state_s  state_;
+    bool         done_;
+
+    PImpl() :
+      done_(false)
+    {
+      md5_init(&state_);
+    }
+  };
+
+
+  Toolbox::MD5Context::MD5Context() :
+    pimpl_(new PImpl)
+  {
+  }
+
+
+  void Toolbox::MD5Context::Append(const void* data,
+                                   size_t size)
+  {
+    static const size_t MAX_SIZE = 128 * 1024 * 1024;
+
+    if (pimpl_->done_)
+    {
+      throw OrthancException(ErrorCode_BadSequenceOfCalls);
+    }
+
+    const uint8_t *p = reinterpret_cast<const uint8_t*>(data);
+
+    while (size > 0)
+    {
+      /**
+       * The built-in implementation of MD5 requires that "size" can
+       * be casted to "int", so we feed it by chunks of maximum
+       * 128MB. This fixes an incorrect behavior in Orthanc <= 1.12.7.
+       **/
+
+      int chunkSize;
+      if (size > MAX_SIZE)
+      {
+        chunkSize = static_cast<int>(MAX_SIZE);
+      }
+      else
+      {
+        chunkSize = static_cast<int>(size);
+      }
+
+      md5_append(&pimpl_->state_, reinterpret_cast<const md5_byte_t*>(p), chunkSize);
+
+      p += chunkSize;
+
+      assert(static_cast<size_t>(chunkSize) <= size);
+      size -= chunkSize;
+    }
+  }
+
+
+  void Toolbox::MD5Context::Append(const std::string& source)
+  {
+    if (source.size() > 0)
+    {
+      Append(source.c_str(), source.size());
+    }
+  }
+
+
+  void Toolbox::MD5Context::Export(std::string& target)
+  {
+    if (pimpl_->done_)
+    {
+      throw OrthancException(ErrorCode_BadSequenceOfCalls);
+    }
+
+    pimpl_->done_ = true;
+
+    md5_byte_t actualHash[16];
+    md5_finish(&pimpl_->state_, actualHash);
+
+    target.resize(32);
+    for (unsigned int i = 0; i < 16; i++)
+    {
+      target[2 * i] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] / 16));
+      target[2 * i + 1] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] % 16));
+    }
+  }
+#endif  /* ORTHANC_ENABLE_MD5 */
+
+
   void Toolbox::LinesIterator::FindEndOfLine()
   {
     lineEnd_ = lineStart_;
@@ -444,21 +551,6 @@
 
 
 #if ORTHANC_ENABLE_MD5 == 1
-  static char GetHexadecimalCharacter(uint8_t value)
-  {
-    assert(value < 16);
-
-    if (value < 10)
-    {
-      return value + '0';
-    }
-    else
-    {
-      return (value - 10) + 'a';
-    }
-  }
-
-
   void Toolbox::ComputeMD5(std::string& result,
                            const std::string& data)
   {
@@ -477,62 +569,12 @@
                            const void* data,
                            size_t size)
   {
-    md5_state_s state;
-    md5_init(&state);
-
-    if (size > 0)
-    {
-      md5_append(&state, 
-                 reinterpret_cast<const md5_byte_t*>(data), 
-                 static_cast<int>(size));
-    }
-
-    md5_byte_t actualHash[16];
-    md5_finish(&state, actualHash);
-
-    result.resize(32);
-    for (unsigned int i = 0; i < 16; i++)
-    {
-      result[2 * i] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] / 16));
-      result[2 * i + 1] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] % 16));
-    }
+    MD5Context context;
+    context.Append(data, size);
+    context.Export(result);
   }
 
   void Toolbox::ComputeMD5(std::string& result,
-                           std::istream& inputStream)
-  {
-    md5_state_s state;
-    md5_init(&state);
-
-    const size_t bufferSize = 1024;
-    char buffer[bufferSize];
-    
-    while (inputStream.good())
-    {
-      inputStream.read(buffer, bufferSize);
-      std::streamsize bytesRead = inputStream.gcount();
-
-      if (bytesRead > 0)
-      {
-        md5_append(&state, 
-                   reinterpret_cast<const md5_byte_t*>(buffer), 
-                 static_cast<int>(bytesRead));
-      }
-    }
-
-    md5_byte_t actualHash[16];
-    md5_finish(&state, actualHash);
-
-    result.resize(32);
-    for (unsigned int i = 0; i < 16; i++)
-    {
-      result[2 * i] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] / 16));
-      result[2 * i + 1] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] % 16));
-    }
-  }
-
-
-  void Toolbox::ComputeMD5(std::string& result,
                            const std::set<std::string>& data)
   {
     std::string s;
--- a/OrthancFramework/Sources/Toolbox.h	Fri May 30 12:22:52 2025 +0200
+++ b/OrthancFramework/Sources/Toolbox.h	Fri May 30 12:23:19 2025 +0200
@@ -82,6 +82,25 @@
   class ORTHANC_PUBLIC Toolbox
   {
   public:
+#if ORTHANC_ENABLE_MD5 == 1
+    class ORTHANC_PUBLIC MD5Context : public boost::noncopyable
+    {
+    private:
+      class PImpl;
+      boost::shared_ptr<PImpl> pimpl_;
+
+    public:
+      MD5Context();
+
+      void Append(const void* data,
+                  size_t size);
+
+      void Append(const std::string& source);
+
+      void Export(std::string& target);
+    };
+#endif
+
     class ORTHANC_PUBLIC LinesIterator : public boost::noncopyable
     {
     private:
@@ -134,9 +153,6 @@
 
     static void ComputeMD5(std::string& result,
                            const std::set<std::string>& data);
-
-    static void ComputeMD5(std::string& result,
-                           std::istream& stream);
 #endif
 
     static void ComputeSHA1(std::string& result,
--- a/OrthancFramework/UnitTestsSources/FrameworkTests.cpp	Fri May 30 12:22:52 2025 +0200
+++ b/OrthancFramework/UnitTestsSources/FrameworkTests.cpp	Fri May 30 12:23:19 2025 +0200
@@ -398,9 +398,24 @@
   Toolbox::ComputeMD5(s, set);
   ASSERT_EQ("d1aaf4767a3c10a473407a4e47b02da6", s); // set md5 same as string with the values sorted
 
-  std::istringstream iss(std::string("aaabbbccc"));
-  Toolbox::ComputeMD5(s, iss);
-  ASSERT_EQ("d1aaf4767a3c10a473407a4e47b02da6", s);
+  {
+    Toolbox::MD5Context context;
+    context.Append("");
+    context.Append(NULL, 0);
+    context.Append("Hello");
+    context.Export(s);
+    ASSERT_EQ("8b1a9953c4611296a827abf8c47804d7", s);
+    ASSERT_THROW(context.Append("World"), OrthancException);
+    ASSERT_THROW(context.Export(s), OrthancException);
+  }
+
+#if ORTHANC_SANDBOXED != 1
+  {
+    std::istringstream iss(std::string("aaabbbccc"));
+    SystemToolbox::ComputeStreamMD5(s, iss);
+    ASSERT_EQ("d1aaf4767a3c10a473407a4e47b02da6", s);
+  }
+#endif
 }
 
 TEST(Toolbox, ComputeSHA1)
@@ -1598,8 +1613,6 @@
 #if ORTHANC_SANDBOXED != 1 && ORTHANC_ENABLE_MD5 == 1
 TEST(Toolbox, FileMD5)
 {
-  std::string path;
-
   {
     TemporaryFile tmp1, tmp2;
     std::string s = "aaabbbccc";
@@ -1635,7 +1648,6 @@
 
     ASSERT_FALSE(SystemToolbox::CompareFilesMD5(tmp1.GetPath(), tmp2.GetPath()));
   }
-
 }
 #endif