changeset 6608:9722dc04e6a6

support of UTF-8 in ZIP archives, added configuration option "ZipUseUtf8"
author Sebastien Jodogne <s.jodogne@gmail.com>
date Sun, 01 Mar 2026 12:19:35 +0100
parents f30191e9ffbf
children e703805398d8
files OrthancServer/Sources/OrthancRestApi/OrthancRestArchive.cpp OrthancServer/Sources/ServerJobs/ArchiveJob.cpp OrthancServer/Sources/ServerJobs/ArchiveJob.h OrthancServer/UnitTestsSources/ServerJobsTests.cpp TODO
diffstat 5 files changed, 85 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/OrthancServer/Sources/OrthancRestApi/OrthancRestArchive.cpp	Sun Mar 01 11:21:54 2026 +0100
+++ b/OrthancServer/Sources/OrthancRestApi/OrthancRestArchive.cpp	Sun Mar 01 12:19:35 2026 +0100
@@ -45,6 +45,7 @@
   static const char* const KEY_LOSSY_QUALITY = "LossyQuality";
   static const char* const KEY_FILENAME = "Filename";
   static const char* const KEY_USER_DATA = "UserData";
+  static const char* const KEY_ALLOW_UTF8 = "Utf8";
 
   static const char* const GET_TRANSCODE = "transcode";
   static const char* const GET_LOSSY_QUALITY = "lossy-quality";
@@ -52,6 +53,7 @@
   static const char* const GET_RESOURCES = "resources";
 
   static const char* const CONFIG_LOADER_THREADS = "ZipLoaderThreads";
+  static const char* const CONFIG_ALLOW_UTF8 = "ZipUseUtf8";
 
 
   static void AddResourcesOfInterestFromString(ArchiveJob& job,
@@ -126,9 +128,11 @@
                                unsigned int& loaderThreads,  /* out */
                                std::string& filename,        /* out */
                                Json::Value& userData,        /* out */
+                               bool& allowUtf8,              /* out (new in Orthanc 1.12.11) */
                                const Json::Value& body,      /* in */
                                const bool defaultExtended    /* in */,
-                               const std::string& defaultFilename /* in */)
+                               const std::string& defaultFilename /* in */,
+                               const bool defaultUtf8        /* in (new in Orthanc 1.12.11) */)
   {
     synchronous = OrthancRestApi::IsSynchronousJobRequest
       (true /* synchronous by default */, body);
@@ -186,7 +190,17 @@
       OrthancConfiguration::ReaderLock lock;
       loaderThreads = lock.GetConfiguration().GetUnsignedIntegerParameter(CONFIG_LOADER_THREADS, 0);  // New in Orthanc 1.10.0
     }
-   
+
+    // New in Orthanc 1.12.11
+    if (body.type() == Json::objectValue &&
+        body.isMember(KEY_ALLOW_UTF8))
+    {
+      allowUtf8 = SerializationToolbox::ReadBoolean(body, KEY_ALLOW_UTF8);
+    }
+    else
+    {
+      allowUtf8 = defaultUtf8;
+    }
   }
 
 
@@ -550,7 +564,7 @@
       .SetRequestField(KEY_LOSSY_QUALITY, RestApiCallDocumentation::Type_Number,
                         "If transcoding to a lossy transfer syntax, this entry defines the quality "
                         "as an integer between 1 and 100.  If not provided, the value is defined "
-                        "by the \"DicomLossyTranscodingQuality\" configuration. (new in v1.12.7)", false)
+                        "by the \"DicomLossyTranscodingQuality\" configuration. (new in 1.12.7)", false)
       .SetRequestField(KEY_FILENAME, RestApiCallDocumentation::Type_String,
                         "Filename to set in the \"Content-Disposition\" HTTP header "
                         "(including file extension)", false)
@@ -558,6 +572,11 @@
                        "In asynchronous mode, the priority of the job. The higher the value, the higher the priority.", false)
       .SetRequestField(KEY_USER_DATA, RestApiCallDocumentation::Type_JsonObject,
                        "In asynchronous mode, user data that will be attached to the job.", false)
+      .SetRequestField(KEY_ALLOW_UTF8, RestApiCallDocumentation::Type_Boolean, "If `true`, filenames will be encoded "
+                       "using UTF-8 in the ZIP archive, which may not be supported by your operating system or by your "
+                       "ZIP uncompression software. If `false`, filenames will be encoded using plain ASCII, which was "
+                       "the default in Orthanc <= 1.12.10. Default value is defined by the \"" +
+                       std::string(CONFIG_ALLOW_UTF8) + "\" configuration option. (new in 1.12.11)", false)
       .AddAnswerType(MimeType_Zip, "In synchronous mode, the ZIP file containing the archive")
       .AddAnswerType(MimeType_Json, "In asynchronous mode, information about the job that has been submitted to "
                      "generate the archive: https://orthanc.uclouvain.be/book/users/advanced-rest.html#jobs")
@@ -597,6 +616,13 @@
     Json::Value body;
     if (call.ParseJsonRequest(body))
     {
+      bool defaultUtf8;
+
+      {
+        OrthancConfiguration::ReaderLock lock;
+        defaultUtf8 = lock.GetConfiguration().GetBooleanParameter(CONFIG_ALLOW_UTF8, false);  // New in Orthanc 1.12.11
+      }
+
       bool synchronous, extended, transcode;
       DicomTransferSyntax transferSyntax;
       int priority;
@@ -604,9 +630,11 @@
       std::string filename;
       unsigned int lossyQuality;
       Json::Value userData;
+      bool allowUtf8;
 
       GetJobParameters(synchronous, extended, transcode, transferSyntax, lossyQuality,
-                       priority, loaderThreads, filename, userData, body, DEFAULT_IS_EXTENDED, "Archive.zip");
+                       priority, loaderThreads, filename, userData, allowUtf8,
+                       body, DEFAULT_IS_EXTENDED, "Archive.zip", defaultUtf8);
       
       std::unique_ptr<ArchiveJob> job(new ArchiveJob(context, IS_MEDIA, extended, ResourceType_Patient));
       AddResourcesOfInterest(*job, body);
@@ -618,6 +646,7 @@
       }
       
       job->SetLoaderThreads(loaderThreads);
+      job->SetAllowUtf8(allowUtf8);
       job->SetUserData(userData);
 
       SubmitJob(call.GetOutput(), context, job, priority, synchronous, filename);
@@ -789,6 +818,13 @@
     Json::Value body;
     if (call.ParseJsonRequest(body))
     {
+      bool defaultUtf8;
+
+      {
+        OrthancConfiguration::ReaderLock lock;
+        defaultUtf8 = lock.GetConfiguration().GetBooleanParameter(CONFIG_ALLOW_UTF8, false);  // New in Orthanc 1.12.11
+      }
+
       bool synchronous, extended, transcode;
       DicomTransferSyntax transferSyntax;
       int priority;
@@ -796,9 +832,11 @@
       std::string filename;
       unsigned int lossyQuality;
       Json::Value userData;
+      bool allowUtf8;
 
       GetJobParameters(synchronous, extended, transcode, transferSyntax, lossyQuality,
-                       priority, loaderThreads, filename, userData, body, false /* by default, not extented */, id + ".zip");
+                       priority, loaderThreads, filename, userData, allowUtf8,
+                       body, false /* by default, not extented */, id + ".zip", defaultUtf8);
       
       std::unique_ptr<ArchiveJob> job(new ArchiveJob(context, IS_MEDIA, extended, LEVEL));
       job->AddResource(id, true, LEVEL);
@@ -810,6 +848,7 @@
       }
 
       job->SetLoaderThreads(loaderThreads);
+      job->SetAllowUtf8(allowUtf8);
       job->SetUserData(userData);
 
       SubmitJob(call.GetOutput(), context, job, priority, synchronous, filename);
--- a/OrthancServer/Sources/ServerJobs/ArchiveJob.cpp	Sun Mar 01 11:21:54 2026 +0100
+++ b/OrthancServer/Sources/ServerJobs/ArchiveJob.cpp	Sun Mar 01 12:19:35 2026 +0100
@@ -55,6 +55,7 @@
 static const char* const KEY_UNCOMPRESSED_SIZE = "UncompressedSize";
 static const char* const KEY_ARCHIVE_SIZE = "ArchiveSize";
 static const char* const KEY_TRANSCODE = "Transcode";
+static const char* const KEY_ALLOW_UTF8 = "Utf8";
 
 static boost::mutex loaderThreadsCounterMutex;
 static uint32_t loaderThreadsCounter = 0;
@@ -1095,18 +1096,21 @@
     std::unique_ptr<DicomDirWriter>         dicomDir_;
     bool                                    isMedia_;
     bool                                    isStream_;
+    bool                                    allowUtf8_;
 
   public:
     ZipWriterIterator(ServerContext& context,
                       InstanceLoader& instanceLoader,
                       ArchiveIndex& archive,
                       bool isMedia,
-                      bool enableExtendedSopClass) :
+                      bool enableExtendedSopClass,
+                      bool allowUtf8) :
       context_(context),
       instanceLoader_(instanceLoader),
       commands_(instanceLoader),
       isMedia_(isMedia),
-      isStream_(false)
+      isStream_(false),
+      allowUtf8_(allowUtf8)
     {
       if (isMedia)
       {
@@ -1134,6 +1138,7 @@
       {
         zip_.reset(new HierarchicalZipWriter(path));
         zip_->SetZip64(commands_.IsZip64());
+        zip_->SetAllowUtf8(allowUtf8_);
         isStream_ = false;
       }
       else
@@ -1149,6 +1154,7 @@
       if (zip_.get() == NULL)
       {
         zip_.reset(new HierarchicalZipWriter(protection.release(), commands_.IsZip64()));
+        zip_->SetAllowUtf8(allowUtf8_);
         isStream_ = true;
       }
       else
@@ -1267,7 +1273,8 @@
     transcode_(false),
     transferSyntax_(DicomTransferSyntax_LittleEndianImplicit),
     lossyQuality_(100),
-    loaderThreads_(0)
+    loaderThreads_(0),
+    allowUtf8_(false)
   {
   }
 
@@ -1393,6 +1400,19 @@
   }
 
 
+  void ArchiveJob::SetAllowUtf8(bool allowUtf8)
+  {
+    if (writer_.get() != NULL)   // Already started
+    {
+      throw OrthancException(ErrorCode_BadSequenceOfCalls);
+    }
+    else
+    {
+      allowUtf8_ = allowUtf8;
+    }
+  }
+
+
   void ArchiveJob::Reset()
   {
     throw OrthancException(ErrorCode_BadSequenceOfCalls,
@@ -1433,7 +1453,7 @@
           assert(asynchronousTarget_.get() != NULL);
           asynchronousTarget_->Touch();  // Make sure we can write to the temporary file
           
-          writer_.reset(new ZipWriterIterator(context_, *instanceLoader_, *archive_, isMedia_, enableExtendedSopClass_));
+          writer_.reset(new ZipWriterIterator(context_, *instanceLoader_, *archive_, isMedia_, enableExtendedSopClass_, allowUtf8_));
           writer_->SetOutputFile(asynchronousTarget_->GetPath());
         }
       }
@@ -1441,7 +1461,7 @@
       {
         assert(synchronousTarget_.get() != NULL);
     
-        writer_.reset(new ZipWriterIterator(context_, *instanceLoader_, *archive_, isMedia_, enableExtendedSopClass_));
+        writer_.reset(new ZipWriterIterator(context_, *instanceLoader_, *archive_, isMedia_, enableExtendedSopClass_, allowUtf8_));
         writer_->AcquireOutputStream(synchronousTarget_.release());
       }
 
@@ -1612,6 +1632,9 @@
     {
       value[KEY_TRANSCODE] = GetTransferSyntaxUid(transferSyntax_);
     }
+
+    // New in Orthanc 1.12.11
+    value[KEY_ALLOW_UTF8] = allowUtf8_;
   }
 
 
--- a/OrthancServer/Sources/ServerJobs/ArchiveJob.h	Sun Mar 01 11:21:54 2026 +0100
+++ b/OrthancServer/Sources/ServerJobs/ArchiveJob.h	Sun Mar 01 12:19:35 2026 +0100
@@ -75,6 +75,9 @@
     // New in Orthanc 1.10.0
     unsigned int         loaderThreads_;
 
+    // New in Orthanc 1.12.11
+    bool                 allowUtf8_;
+
     void FinalizeTarget();
     
   public:
@@ -111,6 +114,8 @@
 
     void SetLoaderThreads(unsigned int loaderThreads);
 
+    void SetAllowUtf8(bool allowUtf8);
+
     virtual void Reset() ORTHANC_OVERRIDE;
 
     virtual void Start() ORTHANC_OVERRIDE;
--- a/OrthancServer/UnitTestsSources/ServerJobsTests.cpp	Sun Mar 01 11:21:54 2026 +0100
+++ b/OrthancServer/UnitTestsSources/ServerJobsTests.cpp	Sun Mar 01 12:19:35 2026 +0100
@@ -783,6 +783,14 @@
   {
     ArchiveJob job(GetContext(), false, false, ResourceType_Patient);
     ASSERT_FALSE(job.Serialize(s));  // Cannot serialize this
+
+    Json::Value content;
+    job.GetPublicContent(content);
+    ASSERT_FALSE(content["Utf8"].asBool());
+
+    job.SetAllowUtf8(true);
+    job.GetPublicContent(content);
+    ASSERT_TRUE(content["Utf8"].asBool());
   }
 
   // DicomModalityStoreJob
--- a/TODO	Sun Mar 01 11:21:54 2026 +0100
+++ b/TODO	Sun Mar 01 12:19:35 2026 +0100
@@ -171,11 +171,6 @@
   The patch that was initialy provided was breaking the IngestTranscoding.
   This might require a DCMTK decoding plugin ?
   https://discourse.orthanc-server.org/t/orthanc-convert-ybr-to-rgb-but-does-not-change-metadata/3533/9
-* Support full UTF-8 path in zip internal folders.  Right now, in EnsureUniqueFilename,
-  we call KeepAlphanumeric().  Also allow '+' and '-' characters.  Make sure to
-  always avoid control characters like '\r\n'.
-  https://discourse.orthanc-server.org/t/seriesdescription-characters-and-removed-during-oe2-zip-export/6397
-
 
 
 ---------