changeset 1695:18c02c6987d5

fix for encodings
author Sebastien Jodogne <s.jodogne@gmail.com>
date Thu, 08 Oct 2015 14:34:19 +0200
parents 06d579e82bb8
children b62c2e706c13
files OrthancServer/DicomModification.cpp OrthancServer/DicomModification.h OrthancServer/FromDcmtkBridge.cpp OrthancServer/FromDcmtkBridge.h OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp OrthancServer/ParsedDicomFile.cpp OrthancServer/ParsedDicomFile.h UnitTestsSources/FromDcmtkTests.cpp
diffstat 8 files changed, 91 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/OrthancServer/DicomModification.cpp	Thu Oct 08 13:49:20 2015 +0200
+++ b/OrthancServer/DicomModification.cpp	Thu Oct 08 14:34:19 2015 +0200
@@ -136,12 +136,12 @@
   }
 
   void DicomModification::Replace(const DicomTag& tag,
-                                  const std::string& value,
+                                  const std::string& utf8Value,
                                   bool safeForAnonymization)
   {
     removals_.erase(tag);
     privateTagsToKeep_.erase(tag);
-    replacements_[tag] = value;
+    replacements_[tag] = utf8Value;
 
     if (!safeForAnonymization)
     {
--- a/OrthancServer/DicomModification.h	Thu Oct 08 13:49:20 2015 +0200
+++ b/OrthancServer/DicomModification.h	Thu Oct 08 14:34:19 2015 +0200
@@ -73,7 +73,7 @@
     bool IsRemoved(const DicomTag& tag) const;
 
     void Replace(const DicomTag& tag,
-                 const std::string& value,
+                 const std::string& utf8Value,
                  bool safeForAnonymization = false);
 
     bool IsReplaced(const DicomTag& tag) const;
--- a/OrthancServer/FromDcmtkBridge.cpp	Thu Oct 08 13:49:20 2015 +0200
+++ b/OrthancServer/FromDcmtkBridge.cpp	Thu Oct 08 14:34:19 2015 +0200
@@ -1146,17 +1146,23 @@
 
   void FromDcmtkBridge::FillElementWithString(DcmElement& element,
                                               const DicomTag& tag,
-                                              const std::string& value,
-                                              bool decodeBinaryTags)
+                                              const std::string& utf8Value,
+                                              bool decodeBinaryTags,
+                                              Encoding dicomEncoding)
   {
     std::string binary;
-    const std::string* decoded = &value;
+    const std::string* decoded = &utf8Value;
 
     if (decodeBinaryTags &&
-        boost::starts_with(value, "data:application/octet-stream;base64,"))
+        boost::starts_with(utf8Value, "data:application/octet-stream;base64,"))
     {
       std::string mime;
-      Toolbox::DecodeDataUriScheme(mime, binary, value);
+      Toolbox::DecodeDataUriScheme(mime, binary, utf8Value);
+      decoded = &binary;
+    }
+    else if (dicomEncoding != Encoding_Utf8)
+    {
+      binary = Toolbox::ConvertFromUtf8(utf8Value, dicomEncoding);
       decoded = &binary;
     }
 
@@ -1312,7 +1318,8 @@
 
   DcmElement* FromDcmtkBridge::FromJson(const DicomTag& tag,
                                         const Json::Value& value,
-                                        bool decodeBinaryTags)
+                                        bool decodeBinaryTags,
+                                        Encoding dicomEncoding)
   {
     std::auto_ptr<DcmElement> element;
 
@@ -1320,7 +1327,7 @@
     {
       case Json::stringValue:
         element.reset(CreateElementForTag(tag));
-        FillElementWithString(*element, tag, value.asString(), decodeBinaryTags);
+        FillElementWithString(*element, tag, value.asString(), decodeBinaryTags, dicomEncoding);
         break;
 
       case Json::arrayValue:
@@ -1341,7 +1348,7 @@
           Json::Value::Members members = value[i].getMemberNames();
           for (Json::Value::ArrayIndex j = 0; j < members.size(); j++)
           {
-            item->insert(FromJson(ParseTag(members[j]), value[i][members[j]], decodeBinaryTags));
+            item->insert(FromJson(ParseTag(members[j]), value[i][members[j]], decodeBinaryTags, dicomEncoding));
           }
 
           sequence->append(item.release());
--- a/OrthancServer/FromDcmtkBridge.h	Thu Oct 08 13:49:20 2015 +0200
+++ b/OrthancServer/FromDcmtkBridge.h	Thu Oct 08 14:34:19 2015 +0200
@@ -124,11 +124,13 @@
     
     static void FillElementWithString(DcmElement& element,
                                       const DicomTag& tag,
-                                      const std::string& value,
-                                      bool interpretBinaryTags);
+                                      const std::string& utf8alue,  // Encoded using UTF-8
+                                      bool interpretBinaryTags,
+                                      Encoding dicomEncoding);
 
     static DcmElement* FromJson(const DicomTag& tag,
-                                const Json::Value& element,
-                                bool interpretBinaryTags);
+                                const Json::Value& element,  // Encoding using UTF-8
+                                bool interpretBinaryTags,
+                                Encoding dicomEncoding);
   };
 }
--- a/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp	Thu Oct 08 13:49:20 2015 +0200
+++ b/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp	Thu Oct 08 14:34:19 2015 +0200
@@ -504,7 +504,6 @@
         throw OrthancException(ErrorCode_CreateDicomNotString);
       }
 
-      std::string value = tags[name].asString();
       DicomTag tag = FromDcmtkBridge::ParseTag(name);
 
       if (tag != DICOM_TAG_SPECIFIC_CHARACTER_SET)
@@ -529,16 +528,9 @@
         {
           throw OrthancException(ErrorCode_CreateDicomUseContent);
         }
-        else if (decodeBinaryTags &&
-                 boost::starts_with(value, "data:application/octet-stream;base64,"))
-        {
-          std::string mime, binary;
-          Toolbox::DecodeDataUriScheme(mime, binary, value);
-          dicom.Replace(tag, binary);
-        }
         else
         {
-          dicom.Replace(tag, Toolbox::ConvertFromUtf8(value, dicom.GetEncoding()));
+          dicom.Replace(tag, tags[name], decodeBinaryTags);
         }
       }
     }
--- a/OrthancServer/ParsedDicomFile.cpp	Thu Oct 08 13:49:20 2015 +0200
+++ b/OrthancServer/ParsedDicomFile.cpp	Thu Oct 08 14:34:19 2015 +0200
@@ -149,7 +149,6 @@
   struct ParsedDicomFile::PImpl
   {
     std::auto_ptr<DcmFileFormat> file_;
-    Encoding encoding_;
   };
 
 
@@ -174,8 +173,6 @@
     }
     pimpl_->file_->loadAllDataIntoMemory();
     pimpl_->file_->transferEnd();
-
-    pimpl_->encoding_ = FromDcmtkBridge::DetectEncoding(*pimpl_->file_->getDataset());
   }
 
 
@@ -594,10 +591,10 @@
 
 
   void ParsedDicomFile::Insert(const DicomTag& tag,
-                               const std::string& value)
+                               const std::string& utf8Value)
   {
     std::auto_ptr<DcmElement> element(FromDcmtkBridge::CreateElementForTag(tag));
-    FromDcmtkBridge::FillElementWithString(*element, tag, value, false);
+    FromDcmtkBridge::FillElementWithString(*element, tag, utf8Value, false, GetEncoding());
     InsertInternal(*pimpl_->file_->getDataset(), element.release());
   }
 
@@ -606,7 +603,7 @@
                                const Json::Value& value,
                                bool decodeBinaryTags)
   {
-    std::auto_ptr<DcmElement> element(FromDcmtkBridge::FromJson(tag, value, decodeBinaryTags));
+    std::auto_ptr<DcmElement> element(FromDcmtkBridge::FromJson(tag, value, decodeBinaryTags, GetEncoding()));
     InsertInternal(*pimpl_->file_->getDataset(), element.release());
   }
 
@@ -640,7 +637,7 @@
 
 
   void ParsedDicomFile::UpdateStorageUid(const DicomTag& tag,
-                                         const std::string& value,
+                                         const std::string& utf8Value,
                                          bool decodeBinaryTags)
   {
     if (tag != DICOM_TAG_SOP_CLASS_UID &&
@@ -650,15 +647,24 @@
     }
 
     std::string binary;
-    const std::string* decoded = &value;
+    const std::string* decoded = &utf8Value;
 
     if (decodeBinaryTags &&
-        boost::starts_with(value, "data:application/octet-stream;base64,"))
+        boost::starts_with(utf8Value, "data:application/octet-stream;base64,"))
     {
       std::string mime;
-      Toolbox::DecodeDataUriScheme(mime, binary, value);
+      Toolbox::DecodeDataUriScheme(mime, binary, utf8Value);
       decoded = &binary;
     }
+    else
+    {
+      Encoding encoding = GetEncoding();
+      if (GetEncoding() != Encoding_Utf8)
+      {
+        binary = Toolbox::ConvertFromUtf8(utf8Value, encoding);
+        decoded = &binary;
+      }
+    }
 
     /**
      * dcmodify will automatically correct 'Media Storage SOP Class
@@ -682,13 +688,13 @@
 
 
   void ParsedDicomFile::Replace(const DicomTag& tag,
-                                const std::string& value,
+                                const std::string& utf8Value,
                                 DicomReplaceMode mode)
   {
     std::auto_ptr<DcmElement> element(FromDcmtkBridge::CreateElementForTag(tag));
-    FromDcmtkBridge::FillElementWithString(*element, tag, value, false);
+    FromDcmtkBridge::FillElementWithString(*element, tag, utf8Value, false, GetEncoding());
     ReplaceInternal(*pimpl_->file_->getDataset(), element, mode);
-    UpdateStorageUid(tag, value, false);
+    UpdateStorageUid(tag, utf8Value, false);
   }
 
     
@@ -697,7 +703,7 @@
                                 bool decodeBinaryTags,
                                 DicomReplaceMode mode)
   {
-    std::auto_ptr<DcmElement> element(FromDcmtkBridge::FromJson(tag, value, decodeBinaryTags));
+    std::auto_ptr<DcmElement> element(FromDcmtkBridge::FromJson(tag, value, decodeBinaryTags, GetEncoding()));
     ReplaceInternal(*pimpl_->file_->getDataset(), element, mode);
 
     if (tag == DICOM_TAG_SOP_CLASS_UID ||
@@ -766,7 +772,7 @@
         return false;
       }
 
-      std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement(*element, pimpl_->encoding_));
+      std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement(*element, GetEncoding()));
       
       if (v.get() == NULL)
       {
@@ -846,7 +852,6 @@
   ParsedDicomFile::ParsedDicomFile() : pimpl_(new PImpl)
   {
     pimpl_->file_.reset(new DcmFileFormat);
-    pimpl_->encoding_ = Encoding_Ascii;
     Replace(DICOM_TAG_PATIENT_ID, FromDcmtkBridge::GenerateUniqueIdentifier(ResourceType_Patient));
     Replace(DICOM_TAG_STUDY_INSTANCE_UID, FromDcmtkBridge::GenerateUniqueIdentifier(ResourceType_Study));
     Replace(DICOM_TAG_SERIES_INSTANCE_UID, FromDcmtkBridge::GenerateUniqueIdentifier(ResourceType_Series));
@@ -876,7 +881,6 @@
     pimpl_(new PImpl)
   {
     pimpl_->file_.reset(dynamic_cast<DcmFileFormat*>(other.pimpl_->file_->clone()));
-    pimpl_->encoding_ = other.pimpl_->encoding_;
 
     // Create a new instance-level identifier
     Replace(DICOM_TAG_SOP_INSTANCE_UID, FromDcmtkBridge::GenerateUniqueIdentifier(ResourceType_Instance));
@@ -1106,7 +1110,7 @@
 
   Encoding ParsedDicomFile::GetEncoding() const
   {
-    return pimpl_->encoding_;
+    return FromDcmtkBridge::DetectEncoding(*pimpl_->file_->getDataset());
   }
 
 
@@ -1119,8 +1123,6 @@
       return;
     }
 
-    pimpl_->encoding_ = encoding;
-
     std::string s = GetDicomSpecificCharacterSet(encoding);
     Replace(DICOM_TAG_SPECIFIC_CHARACTER_SET, s, DicomReplaceMode_InsertIfAbsent);
   }
--- a/OrthancServer/ParsedDicomFile.h	Thu Oct 08 13:49:20 2015 +0200
+++ b/OrthancServer/ParsedDicomFile.h	Thu Oct 08 14:34:19 2015 +0200
@@ -79,21 +79,21 @@
     void Remove(const DicomTag& tag);
 
     void Insert(const DicomTag& tag,
-                const std::string& value);
+                const std::string& utf8Value);
+
+    void Replace(const DicomTag& tag,
+                 const std::string& utf8Value,
+                 DicomReplaceMode mode = DicomReplaceMode_InsertIfAbsent);
 
     void Insert(const DicomTag& tag,
-                const Json::Value& value,
+                const Json::Value& value,   // Assumed to be encoded with UTF-8
                 bool decodeBinaryTags);
 
     void Replace(const DicomTag& tag,
-                 const Json::Value& value,
+                 const Json::Value& value,  // Assumed to be encoded with UTF-8
                  bool decodeBinaryTags,
                  DicomReplaceMode mode = DicomReplaceMode_InsertIfAbsent);
 
-    void Replace(const DicomTag& tag,
-                 const std::string& value,
-                 DicomReplaceMode mode = DicomReplaceMode_InsertIfAbsent);
-
     void RemovePrivateTags()
     {
       RemovePrivateTagsInternal(NULL);
--- a/UnitTestsSources/FromDcmtkTests.cpp	Thu Oct 08 13:49:20 2015 +0200
+++ b/UnitTestsSources/FromDcmtkTests.cpp	Thu Oct 08 14:34:19 2015 +0200
@@ -207,7 +207,7 @@
     std::string source(testEncodingsEncoded[i]);
     std::string expected(testEncodingsExpected[i]);
     std::string s = Toolbox::ConvertToUtf8(source, testEncodings[i]);
-    std::cout << EnumerationToString(testEncodings[i]) << std::endl;
+    //std::cout << EnumerationToString(testEncodings[i]) << std::endl;
     EXPECT_EQ(expected, s);
   }
 }
@@ -262,13 +262,15 @@
 {
   for (unsigned int i = 0; i < testEncodingsCount; i++)
   {
-    std::cout << EnumerationToString(testEncodings[i]) << std::endl;
+    //std::cout << EnumerationToString(testEncodings[i]) << std::endl;
     std::string dicom;
 
     {
       ParsedDicomFile f;
       f.SetEncoding(testEncodings[i]);
-      f.Insert(DICOM_TAG_PATIENT_NAME, testEncodingsEncoded[i]);
+
+      std::string s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i]);
+      f.Insert(DICOM_TAG_PATIENT_NAME, s);
       f.SaveToMemoryBuffer(dicom);
     }
 
@@ -334,7 +336,7 @@
   {
     Json::Value a;
     a = "Hello";
-    element.reset(FromDcmtkBridge::FromJson(DICOM_TAG_PATIENT_NAME, a, false));
+    element.reset(FromDcmtkBridge::FromJson(DICOM_TAG_PATIENT_NAME, a, false, Encoding_Utf8));
 
     Json::Value b;
     FromDcmtkBridge::ToJson(b, *element, DicomToJsonFormat_Short, 0, Encoding_Ascii);
@@ -345,20 +347,20 @@
     Json::Value a;
     a = "Hello";
     // Cannot assign a string to a sequence
-    ASSERT_THROW(element.reset(FromDcmtkBridge::FromJson(REFERENCED_STUDY_SEQUENCE, a, false)), OrthancException);
+    ASSERT_THROW(element.reset(FromDcmtkBridge::FromJson(REFERENCED_STUDY_SEQUENCE, a, false, Encoding_Utf8)), OrthancException);
   }
 
   {
     Json::Value a = Json::arrayValue;
     a.append("Hello");
     // Cannot assign an array to a string
-    ASSERT_THROW(element.reset(FromDcmtkBridge::FromJson(DICOM_TAG_PATIENT_NAME, a, false)), OrthancException);
+    ASSERT_THROW(element.reset(FromDcmtkBridge::FromJson(DICOM_TAG_PATIENT_NAME, a, false, Encoding_Utf8)), OrthancException);
   }
 
   {
     Json::Value a;
     a = "data:application/octet-stream;base64,SGVsbG8=";  // echo -n "Hello" | base64
-    element.reset(FromDcmtkBridge::FromJson(DICOM_TAG_PATIENT_NAME, a, true));
+    element.reset(FromDcmtkBridge::FromJson(DICOM_TAG_PATIENT_NAME, a, true, Encoding_Utf8));
 
     Json::Value b;
     FromDcmtkBridge::ToJson(b, *element, DicomToJsonFormat_Short, 0, Encoding_Ascii);
@@ -368,7 +370,7 @@
   {
     Json::Value a = Json::arrayValue;
     CreateSampleJson(a);
-    element.reset(FromDcmtkBridge::FromJson(REFERENCED_STUDY_SEQUENCE, a, true));
+    element.reset(FromDcmtkBridge::FromJson(REFERENCED_STUDY_SEQUENCE, a, true, Encoding_Utf8));
 
     {
       Json::Value b;
@@ -491,3 +493,30 @@
   ASSERT_TRUE(f.GetTagValue(s, DICOM_TAG_MEDIA_STORAGE_SOP_CLASS_UID));  // Implicitly modified by (**)
   ASSERT_EQ(s, "Tata");
 }
+
+
+TEST(ParsedDicomFile, JsonEncoding)
+{
+  ParsedDicomFile f;
+
+  for (unsigned int i = 0; i < testEncodingsCount; i++)
+  {
+    if (testEncodings[i] != Encoding_Windows1251)
+    {
+      //std::cout << EnumerationToString(testEncodings[i]) << std::endl;
+      f.SetEncoding(testEncodings[i]);
+
+      if (testEncodings[i] != Encoding_Ascii)
+      {
+        ASSERT_EQ(testEncodings[i], f.GetEncoding());
+      }
+
+      Json::Value s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i]);
+      f.Replace(DICOM_TAG_PATIENT_NAME, s, false);
+
+      Json::Value v;
+      f.ToJson(v, DicomToJsonFormat_Simple, 0);
+      ASSERT_EQ(v["PatientName"].asString(), std::string(testEncodingsExpected[i]));
+    }
+  }
+}