changeset 6889:5f1e9d5cfb52

fix slow ingest of files containing huge arrays of float/double values in their DICOM tags
author Alain Mazy <am@orthanc.team>
date Mon, 01 Jun 2026 17:22:29 +0200
parents b2b7a61b3c05
children 4705abdce449
files NEWS OrthancFramework/Sources/DicomParsing/FromDcmtkBridge.cpp OrthancFramework/Sources/DicomParsing/FromDcmtkBridge.h OrthancFramework/Sources/DicomParsing/ParsedDicomFile.cpp OrthancServer/Sources/Search/DatabaseLookup.cpp OrthancServer/Sources/Search/HierarchicalMatcher.cpp
diffstat 6 files changed, 57 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Mon Jun 01 16:09:18 2026 +0200
+++ b/NEWS	Mon Jun 01 17:22:29 2026 +0200
@@ -40,6 +40,8 @@
 * Fix usage of "LocalAet" in C-Find and "queries/../answers/../retrieve".
 * Fix Orthanc::ImageAccessor that was broken in Orthanc Framework 1.12.11
 * Fix a memory leak in C-GET (https://github.com/orthanc-server/orthanc-builder/issues/36)
+* Fix slow ingest of files containing huge arrays of float/double values in their
+  DICOM tags (https://discourse.orthanc-server.org/t/orthanc-1-12-11-performance-issues-with-deformable-registrations/6448)
 * Fix a Denial of Service via Deeply Nested DICOM Sequences
   https://orthanc.uclouvain.be/bugs/show_bug.cgi?id=258
   Security issue reported by Jose Lopez Martinez (aka elpe_pinillo) from Deloitte.
--- a/OrthancFramework/Sources/DicomParsing/FromDcmtkBridge.cpp	Mon Jun 01 16:09:18 2026 +0200
+++ b/OrthancFramework/Sources/DicomParsing/FromDcmtkBridge.cpp	Mon Jun 01 17:22:29 2026 +0200
@@ -634,8 +634,8 @@
       {
         target.SetValueInternal(element->getTag().getGTag(),
                                 element->getTag().getETag(),
-                                ConvertLeafElement(*element, DicomToJsonFlags_Default, maxStringLength, encoding,
-                                                   hasCodeExtensions, ignoreTagLength, Convert(element->getVR())));
+                                ConvertLeafElement(*element, DicomToJsonFlags_Default, maxStringLength, maxStringLength /* maxBinaryArrayLength: consider the same value as the maxStringLength */, 
+                                                   encoding, hasCodeExtensions, ignoreTagLength, Convert(element->getVR())));
       }
       else
       {
@@ -694,6 +694,7 @@
   DicomValue* FromDcmtkBridge::ConvertLeafElement(DcmElement& element,
                                                   DicomToJsonFlags flags,
                                                   unsigned int maxStringLength,
+                                                  unsigned int maxBinaryArrayLength,
                                                   Encoding encoding,
                                                   bool hasCodeExtensions,
                                                   const std::set<DicomTag>& ignoreTagLength,
@@ -894,8 +895,21 @@
               throw OrthancException(ErrorCode_BadFileFormat);
             }
 
-            const unsigned long numFloats = element.getLength() / sizeof(Float32);
+            uint64_t numFloats = static_cast<uint64_t>(element.getLength()) / sizeof(Float32);
+            numFloats = std::min(numFloats, static_cast<uint64_t>(maxBinaryArrayLength));
+
+            uint64_t preAllocateStringSize = static_cast<uint64_t>(maxStringLength); // We have seen files with 100M floats: https://discourse.orthanc-server.org/t/orthanc-1-12-11-performance-issues-with-deformable-registrations/6448
+            if (maxStringLength == 0) // no limit, use an heuristic to reserve the string: 12 chars per float
+            {
+              preAllocateStringSize = 12ull * numFloats;
+              if (preAllocateStringSize > static_cast<uint64_t>(std::numeric_limits<size_t>::max()))
+              {
+                throw OrthancException(ErrorCode_BadFileFormat, "Too many values in binary array");
+              }
+            }
+
             std::string result;
+            result.reserve(static_cast<size_t>(preAllocateStringSize));
             for (unsigned long i = 0; i < numFloats; i++)
             {
               if (i > 0)
@@ -927,8 +941,21 @@
               throw OrthancException(ErrorCode_BadFileFormat);
             }
 
-            const unsigned long numDoubles = element.getLength() / sizeof(Float64);
+            uint64_t numDoubles = static_cast<uint64_t>(element.getLength()) / sizeof(Float64);
+            numDoubles = std::min(numDoubles, static_cast<uint64_t>(maxBinaryArrayLength));
+
+            uint64_t preAllocateStringSize = static_cast<uint64_t>(maxStringLength); // We have seen files with 100M floats: https://discourse.orthanc-server.org/t/orthanc-1-12-11-performance-issues-with-deformable-registrations/6448
+            if (maxStringLength == 0) // no limit, use an heuristic to reserve the string: 20 chars per float
+            {
+              preAllocateStringSize = 20ull * numDoubles;
+              if (preAllocateStringSize > static_cast<uint64_t>(std::numeric_limits<size_t>::max()))
+              {
+                throw OrthancException(ErrorCode_BadFileFormat, "Too many values in binary array");
+              }
+            }
+
             std::string result;
+            result.reserve(static_cast<size_t>(preAllocateStringSize));
             for (unsigned long i = 0; i < numDoubles; i++)
             {
               if (i > 0)
@@ -961,7 +988,19 @@
               throw OrthancException(ErrorCode_BadFileFormat);
             }
 
-            const unsigned long numValues = element.getLength() / sizeof(Uint32);
+            uint64_t numValues = static_cast<uint64_t>(element.getLength()) / sizeof(Uint32);
+            numValues = std::min(numValues, static_cast<uint64_t>(maxBinaryArrayLength));
+
+            uint64_t preAllocateStringSize = static_cast<uint64_t>(maxStringLength); // We have seen files with 100M floats: https://discourse.orthanc-server.org/t/orthanc-1-12-11-performance-issues-with-deformable-registrations/6448
+            if (maxStringLength == 0) // no limit, use an heuristic to reserve the string: 10 chars per float
+            {
+              preAllocateStringSize = 10ull * numValues;
+              if (preAllocateStringSize > static_cast<uint64_t>(std::numeric_limits<size_t>::max()))
+              {
+                throw OrthancException(ErrorCode_BadFileFormat, "Too many values in binary array");
+              }
+            }
+
             std::string result;
             for (unsigned long i = 0; i < numValues; i++)
             {
@@ -1203,9 +1242,14 @@
 
     if (element.isLeaf())
     {
-      // The "0" below lets "LeafValueToJson()" take care of "TooLong" values
+      // The "maxStringLength=0" below lets "LeafValueToJson()" take care of "TooLong" values
+      // And the "maxBinaryArrayLength=maxStringLength" is there to limit the size of binary arrays that are actually returned 
+      // as string that is later clipped in LeafValueToJson.
+      // Since we have seen files with 100M float values, we don't want to generate a 1GB string that would be clipped to 256 bytes
+      // in LeafValueToJson.
+      // https://discourse.orthanc-server.org/t/orthanc-1-12-11-performance-issues-with-deformable-registrations/6448
       std::unique_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement
-                                    (element, flags, 0, encoding, hasCodeExtensions, ignoreTagLength, Convert(element.getVR())));
+                                    (element, flags, 0, maxStringLength /* maxBinaryArrayLength: consider the same value as the maxStringLength */, encoding, hasCodeExtensions, ignoreTagLength, Convert(element.getVR())));
 
       if (ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end())
       {
--- a/OrthancFramework/Sources/DicomParsing/FromDcmtkBridge.h	Mon Jun 01 16:09:18 2026 +0200
+++ b/OrthancFramework/Sources/DicomParsing/FromDcmtkBridge.h	Mon Jun 01 17:22:29 2026 +0200
@@ -190,6 +190,7 @@
     static DicomValue* ConvertLeafElement(DcmElement& element,
                                           DicomToJsonFlags flags,
                                           unsigned int maxStringLength,
+                                          unsigned int maxBinaryArrayLength,
                                           Encoding encoding,
                                           bool hasCodeExtensions,
                                           const std::set<DicomTag>& ignoreTagLength,
--- a/OrthancFramework/Sources/DicomParsing/ParsedDicomFile.cpp	Mon Jun 01 16:09:18 2026 +0200
+++ b/OrthancFramework/Sources/DicomParsing/ParsedDicomFile.cpp	Mon Jun 01 17:22:29 2026 +0200
@@ -892,7 +892,7 @@
       std::set<DicomTag> tmp;
       std::unique_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement
                                     (*element, DicomToJsonFlags_Default, 
-                                     0, encoding, hasCodeExtensions, tmp, FromDcmtkBridge::Convert(element->getVR())));
+                                     0, 0, encoding, hasCodeExtensions, tmp, FromDcmtkBridge::Convert(element->getVR())));
       
       if (v.get() == NULL ||
           v->IsNull())
--- a/OrthancServer/Sources/Search/DatabaseLookup.cpp	Mon Jun 01 16:09:18 2026 +0200
+++ b/OrthancServer/Sources/Search/DatabaseLookup.cpp	Mon Jun 01 17:22:29 2026 +0200
@@ -111,7 +111,7 @@
 
       std::set<DicomTag> ignoreTagLength;
       std::unique_ptr<DicomValue> value(FromDcmtkBridge::ConvertLeafElement
-                                        (*element, DicomToJsonFlags_None, 0, encoding, hasCodeExtensions,
+                                        (*element, DicomToJsonFlags_None, 0, 0, encoding, hasCodeExtensions,
                                          ignoreTagLength, FromDcmtkBridge::Convert(element->getVR())));
 
       // WARNING: Also modify "HierarchicalMatcher::Setup()" if modifying this code
--- a/OrthancServer/Sources/Search/HierarchicalMatcher.cpp	Mon Jun 01 16:09:18 2026 +0200
+++ b/OrthancServer/Sources/Search/HierarchicalMatcher.cpp	Mon Jun 01 17:22:29 2026 +0200
@@ -114,7 +114,7 @@
 
         std::set<DicomTag> ignoreTagLength;
         std::unique_ptr<DicomValue> value(FromDcmtkBridge::ConvertLeafElement
-                                          (*element, DicomToJsonFlags_None, 0, encoding, hasCodeExtensions,
+                                          (*element, DicomToJsonFlags_None, 0, 0, encoding, hasCodeExtensions,
                                            ignoreTagLength, FromDcmtkBridge::Convert(element->getVR())));
 
         // WARNING: Also modify "DatabaseLookup::IsMatch()" if modifying this code