changeset 3217:cf8cbeb35f33

preliminary support of Korean character set
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 13 Feb 2019 17:46:12 +0100
parents c9a71eb4edcf
children 9a83d94b2a1e
files .hgignore Core/DicomParsing/DicomDirWriter.cpp Core/DicomParsing/FromDcmtkBridge.cpp Core/DicomParsing/FromDcmtkBridge.h Core/DicomParsing/ParsedDicomFile.cpp Core/DicomParsing/ParsedDicomFile.h Core/Enumerations.cpp Core/Enumerations.h Core/Toolbox.cpp Core/Toolbox.h NEWS OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp OrthancServer/Search/DatabaseLookup.cpp OrthancServer/Search/DatabaseLookup.h OrthancServer/Search/HierarchicalMatcher.cpp OrthancServer/Search/HierarchicalMatcher.h UnitTestsSources/DicomMapTests.cpp UnitTestsSources/FromDcmtkTests.cpp UnitTestsSources/UnitTestsMain.cpp
diffstat 19 files changed, 615 insertions(+), 162 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Tue Feb 12 17:27:33 2019 +0100
+++ b/.hgignore	Wed Feb 13 17:46:12 2019 +0100
@@ -4,3 +4,4 @@
 *.cpp.orig
 *.h.orig
 .vs/
+*~
--- a/Core/DicomParsing/DicomDirWriter.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/DicomParsing/DicomDirWriter.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -161,6 +161,7 @@
     static bool GetUtf8TagValue(std::string& result,
                                 DcmItem& source,
                                 Encoding encoding,
+                                bool hasCodeExtensions,
                                 const DcmTagKey& key)
     {
       DcmElement* element = NULL;
@@ -174,7 +175,7 @@
         {
           if (s != NULL)
           {
-            result = Toolbox::ConvertToUtf8(s, encoding);
+            result = Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions);
           }
           
           return true;
@@ -202,6 +203,7 @@
     static bool CopyString(DcmDirectoryRecord& target,
                            DcmDataset& source,
                            Encoding encoding,
+                           bool hasCodeExtensions,
                            const DcmTagKey& key,
                            bool optional,
                            bool copyEmpty)
@@ -214,7 +216,7 @@
       }
 
       std::string value;
-      bool found = GetUtf8TagValue(value, source, encoding, key);
+      bool found = GetUtf8TagValue(value, source, encoding, hasCodeExtensions, key);
 
       if (!found)
       {
@@ -231,33 +233,37 @@
     static void CopyStringType1(DcmDirectoryRecord& target,
                                 DcmDataset& source,
                                 Encoding encoding,
+                                bool hasCodeExtensions,
                                 const DcmTagKey& key)
     {
-      CopyString(target, source, encoding, key, false, false);
+      CopyString(target, source, encoding, hasCodeExtensions, key, false, false);
     }
 
     static void CopyStringType1C(DcmDirectoryRecord& target,
                                  DcmDataset& source,
                                  Encoding encoding,
+                                 bool hasCodeExtensions,
                                  const DcmTagKey& key)
     {
-      CopyString(target, source, encoding, key, true, false);
+      CopyString(target, source, encoding, hasCodeExtensions, key, true, false);
     }
 
     static void CopyStringType2(DcmDirectoryRecord& target,
                                 DcmDataset& source,
                                 Encoding encoding,
+                                bool hasCodeExtensions,
                                 const DcmTagKey& key)
     {
-      CopyString(target, source, encoding, key, false, true);
+      CopyString(target, source, encoding, hasCodeExtensions, key, false, true);
     }
 
     static void CopyStringType3(DcmDirectoryRecord& target,
                                 DcmDataset& source,
                                 Encoding encoding,
+                                bool hasCodeExtensions,
                                 const DcmTagKey& key)
     {
-      CopyString(target, source, encoding, key, true, true);
+      CopyString(target, source, encoding, hasCodeExtensions, key, true, true);
     }
 
 
@@ -298,17 +304,19 @@
 
     void FillPatient(DcmDirectoryRecord& record,
                      DcmDataset& dicom,
-                     Encoding encoding)
+                     Encoding encoding,
+                     bool hasCodeExtensions)
     {
       // cf. "DicomDirInterface::buildPatientRecord()"
 
-      CopyStringType1C(record, dicom, encoding, DCM_PatientID);
-      CopyStringType2(record, dicom, encoding, DCM_PatientName);
+      CopyStringType1C(record, dicom, encoding, hasCodeExtensions, DCM_PatientID);
+      CopyStringType2(record, dicom, encoding, hasCodeExtensions, DCM_PatientName);
     }
 
     void FillStudy(DcmDirectoryRecord& record,
                    DcmDataset& dicom,
-                   Encoding encoding)
+                   Encoding encoding,
+                   bool hasCodeExtensions)
     {
       // cf. "DicomDirInterface::buildStudyRecord()"
 
@@ -316,19 +324,19 @@
       SystemToolbox::GetNowDicom(nowDate, nowTime, utc_);
 
       std::string studyDate;
-      if (!GetUtf8TagValue(studyDate, dicom, encoding, DCM_StudyDate) &&
-          !GetUtf8TagValue(studyDate, dicom, encoding, DCM_SeriesDate) &&
-          !GetUtf8TagValue(studyDate, dicom, encoding, DCM_AcquisitionDate) &&
-          !GetUtf8TagValue(studyDate, dicom, encoding, DCM_ContentDate))
+      if (!GetUtf8TagValue(studyDate, dicom, encoding, hasCodeExtensions, DCM_StudyDate) &&
+          !GetUtf8TagValue(studyDate, dicom, encoding, hasCodeExtensions, DCM_SeriesDate) &&
+          !GetUtf8TagValue(studyDate, dicom, encoding, hasCodeExtensions, DCM_AcquisitionDate) &&
+          !GetUtf8TagValue(studyDate, dicom, encoding, hasCodeExtensions, DCM_ContentDate))
       {
         studyDate = nowDate;
       }
           
       std::string studyTime;
-      if (!GetUtf8TagValue(studyTime, dicom, encoding, DCM_StudyTime) &&
-          !GetUtf8TagValue(studyTime, dicom, encoding, DCM_SeriesTime) &&
-          !GetUtf8TagValue(studyTime, dicom, encoding, DCM_AcquisitionTime) &&
-          !GetUtf8TagValue(studyTime, dicom, encoding, DCM_ContentTime))
+      if (!GetUtf8TagValue(studyTime, dicom, encoding, hasCodeExtensions, DCM_StudyTime) &&
+          !GetUtf8TagValue(studyTime, dicom, encoding, hasCodeExtensions, DCM_SeriesTime) &&
+          !GetUtf8TagValue(studyTime, dicom, encoding, hasCodeExtensions, DCM_AcquisitionTime) &&
+          !GetUtf8TagValue(studyTime, dicom, encoding, hasCodeExtensions, DCM_ContentTime))
       {
         studyTime = nowTime;
       }
@@ -336,52 +344,54 @@
       /* copy attribute values from dataset to study record */
       SetTagValue(record, DCM_StudyDate, studyDate);
       SetTagValue(record, DCM_StudyTime, studyTime);
-      CopyStringType2(record, dicom, encoding, DCM_StudyDescription);
-      CopyStringType1(record, dicom, encoding, DCM_StudyInstanceUID);
+      CopyStringType2(record, dicom, encoding, hasCodeExtensions, DCM_StudyDescription);
+      CopyStringType1(record, dicom, encoding, hasCodeExtensions, DCM_StudyInstanceUID);
       /* use type 1C instead of 1 in order to avoid unwanted overwriting */
-      CopyStringType1C(record, dicom, encoding, DCM_StudyID);
-      CopyStringType2(record, dicom, encoding, DCM_AccessionNumber);
+      CopyStringType1C(record, dicom, encoding, hasCodeExtensions, DCM_StudyID);
+      CopyStringType2(record, dicom, encoding, hasCodeExtensions, DCM_AccessionNumber);
     }
 
     void FillSeries(DcmDirectoryRecord& record,
                     DcmDataset& dicom,
-                    Encoding encoding)
+                    Encoding encoding,
+                    bool hasCodeExtensions)
     {
       // cf. "DicomDirInterface::buildSeriesRecord()"
 
       /* copy attribute values from dataset to series record */
-      CopyStringType1(record, dicom, encoding, DCM_Modality);
-      CopyStringType1(record, dicom, encoding, DCM_SeriesInstanceUID);
+      CopyStringType1(record, dicom, encoding, hasCodeExtensions, DCM_Modality);
+      CopyStringType1(record, dicom, encoding, hasCodeExtensions, DCM_SeriesInstanceUID);
       /* use type 1C instead of 1 in order to avoid unwanted overwriting */
-      CopyStringType1C(record, dicom, encoding, DCM_SeriesNumber);
+      CopyStringType1C(record, dicom, encoding, hasCodeExtensions, DCM_SeriesNumber);
 
       // Add extended (non-standard) type 3 tags, those are not generated by DCMTK
       // http://dicom.nema.org/medical/Dicom/2016a/output/chtml/part02/sect_7.3.html
       // https://groups.google.com/d/msg/orthanc-users/Y7LOvZMDeoc/9cp3kDgxAwAJ
       if (extendedSopClass_)
       {
-        CopyStringType3(record, dicom, encoding, DCM_SeriesDescription);
+        CopyStringType3(record, dicom, encoding, hasCodeExtensions, DCM_SeriesDescription);
       }
     }
 
     void FillInstance(DcmDirectoryRecord& record,
                       DcmDataset& dicom,
                       Encoding encoding,
+                      bool hasCodeExtensions,
                       DcmMetaInfo& metaInfo,
                       const char* path)
     {
       // cf. "DicomDirInterface::buildImageRecord()"
 
       /* copy attribute values from dataset to image record */
-      CopyStringType1(record, dicom, encoding, DCM_InstanceNumber);
-      //CopyElementType1C(record, dicom, encoding, DCM_ImageType);
+      CopyStringType1(record, dicom, encoding, hasCodeExtensions, DCM_InstanceNumber);
+      //CopyElementType1C(record, dicom, encoding, hasCodeExtensions, DCM_ImageType);
 
       // REMOVED since 0.9.7: copyElementType1C(dicom, DCM_ReferencedImageSequence, record);
 
       std::string sopClassUid, sopInstanceUid, transferSyntaxUid;
-      if (!GetUtf8TagValue(sopClassUid, dicom, encoding, DCM_SOPClassUID) ||
-          !GetUtf8TagValue(sopInstanceUid, dicom, encoding, DCM_SOPInstanceUID) ||
-          !GetUtf8TagValue(transferSyntaxUid, metaInfo, encoding, DCM_TransferSyntaxUID))
+      if (!GetUtf8TagValue(sopClassUid, dicom, encoding, hasCodeExtensions, DCM_SOPClassUID) ||
+          !GetUtf8TagValue(sopInstanceUid, dicom, encoding, hasCodeExtensions, DCM_SOPInstanceUID) ||
+          !GetUtf8TagValue(transferSyntaxUid, metaInfo, encoding, hasCodeExtensions, DCM_TransferSyntaxUID))
       {
         throw OrthancException(ErrorCode_BadFileFormat);
       }
@@ -401,7 +411,9 @@
                         const char* path)
     {
       DcmDataset& dataset = *dicom.GetDcmtkObject().getDataset();
-      Encoding encoding = dicom.GetEncoding();
+
+      bool hasCodeExtensions;
+      Encoding encoding = dicom.DetectEncoding(hasCodeExtensions);
 
       bool found;
       std::string id;
@@ -410,7 +422,7 @@
       switch (level)
       {
         case ResourceType_Patient:
-          if (!GetUtf8TagValue(id, dataset, encoding, DCM_PatientID))
+          if (!GetUtf8TagValue(id, dataset, encoding, hasCodeExtensions, DCM_PatientID))
           {
             // Be tolerant about missing patient ID. Fixes issue #124
             // (GET /studies/ID/media fails for certain dicom file).
@@ -422,17 +434,17 @@
           break;
 
         case ResourceType_Study:
-          found = GetUtf8TagValue(id, dataset, encoding, DCM_StudyInstanceUID);
+          found = GetUtf8TagValue(id, dataset, encoding, hasCodeExtensions, DCM_StudyInstanceUID);
           type = ERT_Study;
           break;
 
         case ResourceType_Series:
-          found = GetUtf8TagValue(id, dataset, encoding, DCM_SeriesInstanceUID);
+          found = GetUtf8TagValue(id, dataset, encoding, hasCodeExtensions, DCM_SeriesInstanceUID);
           type = ERT_Series;
           break;
 
         case ResourceType_Instance:
-          found = GetUtf8TagValue(id, dataset, encoding, DCM_SOPInstanceUID);
+          found = GetUtf8TagValue(id, dataset, encoding, hasCodeExtensions, DCM_SOPInstanceUID);
           type = ERT_Image;
           break;
 
@@ -459,26 +471,26 @@
       switch (level)
       {
         case ResourceType_Patient:
-          FillPatient(*record, dataset, encoding);
+          FillPatient(*record, dataset, encoding, hasCodeExtensions);
           break;
 
         case ResourceType_Study:
-          FillStudy(*record, dataset, encoding);
+          FillStudy(*record, dataset, encoding, hasCodeExtensions);
           break;
 
         case ResourceType_Series:
-          FillSeries(*record, dataset, encoding);
+          FillSeries(*record, dataset, encoding, hasCodeExtensions);
           break;
 
         case ResourceType_Instance:
-          FillInstance(*record, dataset, encoding, *dicom.GetDcmtkObject().getMetaInfo(), path);
+          FillInstance(*record, dataset, encoding, hasCodeExtensions, *dicom.GetDcmtkObject().getMetaInfo(), path);
           break;
 
         default:
           throw OrthancException(ErrorCode_InternalError);
       }
 
-      CopyStringType1C(*record, dataset, encoding, DCM_SpecificCharacterSet);
+      CopyStringType1C(*record, dataset, encoding, hasCodeExtensions, DCM_SpecificCharacterSet);
 
       target = record.get();
       GetRoot().insertSub(record.release());
--- a/Core/DicomParsing/FromDcmtkBridge.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/DicomParsing/FromDcmtkBridge.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -414,37 +414,49 @@
   }
 
 
-  Encoding FromDcmtkBridge::DetectEncoding(DcmItem& dataset,
+  Encoding FromDcmtkBridge::DetectEncoding(bool& hasCodeExtensions,
+                                           DcmItem& dataset,
                                            Encoding defaultEncoding)
   {
-    Encoding encoding = defaultEncoding;
+    // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2
 
     OFString tmp;
-    if (dataset.findAndGetOFString(DCM_SpecificCharacterSet, tmp).good())
+    if (dataset.findAndGetOFStringArray(DCM_SpecificCharacterSet, tmp).good())
     {
-      std::string characterSet = Toolbox::StripSpaces(std::string(tmp.c_str()));
-
-      if (characterSet.empty())
+      std::vector<std::string> tokens;
+      Toolbox::TokenizeString(tokens, std::string(tmp.c_str()), '\\');
+
+      hasCodeExtensions = (tokens.size() > 1);
+
+      for (size_t i = 0; i < tokens.size(); i++)
       {
-        // Empty specific character set tag: Use the default encoding
-      }
-      else if (GetDicomEncoding(encoding, characterSet.c_str()))
-      {
-        // The specific character set is supported by the Orthanc core
-      }
-      else
-      {
-        LOG(WARNING) << "Value of Specific Character Set (0008,0005) is not supported: " << characterSet
-                     << ", fallback to ASCII (remove all special characters)";
-        encoding = Encoding_Ascii;
+        std::string characterSet = Toolbox::StripSpaces(tokens[i]);
+
+        if (!characterSet.empty())
+        {
+          Encoding encoding;
+          
+          if (GetDicomEncoding(encoding, characterSet.c_str()))
+          {
+            // The specific character set is supported by the Orthanc core
+            return encoding;
+          }
+          else
+          {
+            LOG(WARNING) << "Value of Specific Character Set (0008,0005) is not supported: " << characterSet
+                         << ", fallback to ASCII (remove all special characters)";
+            return Encoding_Ascii;
+          }
+        }
       }
     }
     else
     {
-      // No specific character set tag: Use the default encoding
+      hasCodeExtensions = false;
     }
-
-    return encoding;
+    
+    // No specific character set tag: Use the default encoding
+    return defaultEncoding;
   }
 
 
@@ -454,8 +466,9 @@
                                             Encoding defaultEncoding)
   {
     std::set<DicomTag> ignoreTagLength;
-    
-    Encoding encoding = DetectEncoding(dataset, defaultEncoding);
+
+    bool hasCodeExtensions;
+    Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding);
 
     target.Clear();
     for (unsigned long i = 0; i < dataset.card(); i++)
@@ -466,7 +479,7 @@
         target.SetValue(element->getTag().getGTag(),
                         element->getTag().getETag(),
                         ConvertLeafElement(*element, DicomToJsonFlags_Default,
-                                           maxStringLength, encoding, ignoreTagLength));
+                                           maxStringLength, encoding, hasCodeExtensions, ignoreTagLength));
       }
     }
   }
@@ -488,6 +501,7 @@
                                                   DicomToJsonFlags flags,
                                                   unsigned int maxStringLength,
                                                   Encoding encoding,
+                                                  bool hasCodeExtensions,
                                                   const std::set<DicomTag>& ignoreTagLength)
   {
     if (!element.isLeaf())
@@ -507,7 +521,7 @@
       else
       {
         std::string s(c);
-        std::string utf8 = Toolbox::ConvertToUtf8(s, encoding);
+        std::string utf8 = Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions);
 
         if (maxStringLength != 0 &&
             utf8.size() > maxStringLength &&
@@ -855,6 +869,7 @@
                                       DicomToJsonFlags flags,
                                       unsigned int maxStringLength,
                                       Encoding encoding,
+                                      bool hasCodeExtensions,
                                       const std::set<DicomTag>& ignoreTagLength)
   {
     if (parent.type() == Json::nullValue)
@@ -869,7 +884,7 @@
     {
       // The "0" below lets "LeafValueToJson()" take care of "TooLong" values
       std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement
-                                  (element, flags, 0, encoding, ignoreTagLength));
+                                  (element, flags, 0, encoding, hasCodeExtensions, ignoreTagLength));
 
       if (ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end())
       {
@@ -894,7 +909,7 @@
       {
         DcmItem* child = sequence.getItem(i);
         Json::Value& v = target.append(Json::objectValue);
-        DatasetToJson(v, *child, format, flags, maxStringLength, encoding, ignoreTagLength);
+        DatasetToJson(v, *child, format, flags, maxStringLength, encoding, hasCodeExtensions, ignoreTagLength);
       }
     }
   }
@@ -906,6 +921,7 @@
                                       DicomToJsonFlags flags,
                                       unsigned int maxStringLength,
                                       Encoding encoding,
+                                      bool hasCodeExtensions,
                                       const std::set<DicomTag>& ignoreTagLength)
   {
     assert(parent.type() == Json::objectValue);
@@ -952,7 +968,7 @@
       }
 
       FromDcmtkBridge::ElementToJson(parent, *element, format, flags,
-                                     maxStringLength, encoding, ignoreTagLength);
+                                     maxStringLength, encoding, hasCodeExtensions, ignoreTagLength);
     }
   }
 
@@ -965,10 +981,11 @@
                                            Encoding defaultEncoding,
                                            const std::set<DicomTag>& ignoreTagLength)
   {
-    Encoding encoding = DetectEncoding(dataset, defaultEncoding);
+    bool hasCodeExtensions;
+    Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding);
 
     target = Json::objectValue;
-    DatasetToJson(target, dataset, format, flags, maxStringLength, encoding, ignoreTagLength);
+    DatasetToJson(target, dataset, format, flags, maxStringLength, encoding, hasCodeExtensions, ignoreTagLength);
   }
 
 
@@ -980,7 +997,7 @@
   {
     std::set<DicomTag> ignoreTagLength;
     target = Json::objectValue;
-    DatasetToJson(target, dataset, format, flags, maxStringLength, Encoding_Ascii, ignoreTagLength);
+    DatasetToJson(target, dataset, format, flags, maxStringLength, Encoding_Ascii, false, ignoreTagLength);
   }
 
 
@@ -2033,6 +2050,7 @@
 
   void FromDcmtkBridge::ChangeStringEncoding(DcmItem& dataset,
                                              Encoding source,
+                                             bool hasSourceCodeExtensions,
                                              Encoding target)
   {
     // Recursive exploration of a dataset to change the encoding of
@@ -2055,7 +2073,7 @@
               element->getString(c).good() && 
               c != NULL)
           {
-            std::string a = Toolbox::ConvertToUtf8(c, source);
+            std::string a = Toolbox::ConvertToUtf8(c, source, hasSourceCodeExtensions);
             std::string b = Toolbox::ConvertFromUtf8(a, target);
             element->putString(b.c_str());
           }
@@ -2069,7 +2087,7 @@
 
           for (unsigned long j = 0; j < sequence.card(); j++)
           {
-            ChangeStringEncoding(*sequence.getItem(j), source, target);
+            ChangeStringEncoding(*sequence.getItem(j), source, hasSourceCodeExtensions, target);
           }
         }
       }
@@ -2192,13 +2210,15 @@
                                     ITagVisitor& visitor,
                                     const std::vector<DicomTag>& parentTags,
                                     const std::vector<size_t>& parentIndexes,
-                                    Encoding encoding);
+                                    Encoding encoding,
+                                    bool hasCodeExtensions);
  
   static void ApplyVisitorToDataset(DcmItem& dataset,
                                     ITagVisitor& visitor,
                                     const std::vector<DicomTag>& parentTags,
                                     const std::vector<size_t>& parentIndexes,
-                                    Encoding encoding)
+                                    Encoding encoding,
+                                    bool hasCodeExtensions)
   {
     assert(parentTags.size() == parentIndexes.size());
 
@@ -2211,7 +2231,7 @@
       }
       else
       {
-        ApplyVisitorToElement(*element, visitor, parentTags, parentIndexes, encoding);
+        ApplyVisitorToElement(*element, visitor, parentTags, parentIndexes, encoding, hasCodeExtensions);
       }      
     }
   }
@@ -2222,7 +2242,8 @@
                                  const std::vector<DicomTag>& parentTags,
                                  const std::vector<size_t>& parentIndexes,
                                  const DicomTag& tag,
-                                 Encoding encoding)
+                                 Encoding encoding,
+                                 bool hasCodeExtensions)
   {
     // TODO - Merge this function, that is more recent, with ConvertLeafElement()
 
@@ -2299,7 +2320,7 @@
       if (c != NULL)  // This case corresponds to the empty string
       {
         std::string s(c);
-        utf8 = Toolbox::ConvertToUtf8(s, encoding);
+        utf8 = Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions);
       }
 
       std::string newValue;
@@ -2380,7 +2401,7 @@
               std::string s(reinterpret_cast<const char*>(data), l);
               ITagVisitor::Action action = visitor.VisitString
                 (ignored, parentTags, parentIndexes, tag, vr,
-                 Toolbox::ConvertToUtf8(s, encoding));
+                 Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions));
 
               if (action != ITagVisitor::Action_None)
               {
@@ -2608,7 +2629,8 @@
                                     ITagVisitor& visitor,
                                     const std::vector<DicomTag>& parentTags,
                                     const std::vector<size_t>& parentIndexes,
-                                    Encoding encoding)
+                                    Encoding encoding,
+                                    bool hasCodeExtensions)
   {
     assert(parentTags.size() == parentIndexes.size());
 
@@ -2616,7 +2638,7 @@
 
     if (element.isLeaf())
     {
-      ApplyVisitorToLeaf(element, visitor, parentTags, parentIndexes, tag, encoding);
+      ApplyVisitorToLeaf(element, visitor, parentTags, parentIndexes, tag, encoding, hasCodeExtensions);
     }
     else
     {
@@ -2640,7 +2662,7 @@
         {
           indexes.back() = static_cast<size_t>(i);
           DcmItem* child = sequence.getItem(i);
-          ApplyVisitorToDataset(*child, visitor, tags, indexes, encoding);
+          ApplyVisitorToDataset(*child, visitor, tags, indexes, encoding, hasCodeExtensions);
         }
       }
     }
@@ -2653,7 +2675,8 @@
   {
     std::vector<DicomTag> parentTags;
     std::vector<size_t> parentIndexes;
-    Encoding encoding = DetectEncoding(dataset, defaultEncoding);
-    ApplyVisitorToDataset(dataset, visitor, parentTags, parentIndexes, encoding);
+    bool hasCodeExtensions;
+    Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding);
+    ApplyVisitorToDataset(dataset, visitor, parentTags, parentIndexes, encoding, hasCodeExtensions);
   }
 }
--- a/Core/DicomParsing/FromDcmtkBridge.h	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/DicomParsing/FromDcmtkBridge.h	Wed Feb 13 17:46:12 2019 +0100
@@ -92,6 +92,7 @@
                               DicomToJsonFlags flags,
                               unsigned int maxStringLength,
                               Encoding encoding,
+                              bool hasCodeExtensions,
                               const std::set<DicomTag>& ignoreTagLength);
 
     static void ElementToJson(Json::Value& parent,
@@ -100,6 +101,7 @@
                               DicomToJsonFlags flags,
                               unsigned int maxStringLength,
                               Encoding dicomEncoding,
+                              bool hasCodeExtensions,
                               const std::set<DicomTag>& ignoreTagLength);
 
     static void ExtractDicomAsJson(Json::Value& target, 
@@ -112,6 +114,7 @@
 
     static void ChangeStringEncoding(DcmItem& dataset,
                                      Encoding source,
+                                     bool hasSourceCodeExtensions,
                                      Encoding target);
 
   public:
@@ -124,7 +127,8 @@
                                       unsigned int maxMultiplicity,
                                       const std::string& privateCreator);
 
-    static Encoding DetectEncoding(DcmItem& dataset,
+    static Encoding DetectEncoding(bool& hasCodeExtensions,
+                                   DcmItem& dataset,
                                    Encoding defaultEncoding);
 
     static DicomTag Convert(const DcmTag& tag);
@@ -137,6 +141,7 @@
                                           DicomToJsonFlags flags,
                                           unsigned int maxStringLength,
                                           Encoding encoding,
+                                          bool hasCodeExtensions,
                                           const std::set<DicomTag>& ignoreTagLength);
 
     static void ExtractHeaderAsJson(Json::Value& target, 
--- a/Core/DicomParsing/ParsedDicomFile.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/DicomParsing/ParsedDicomFile.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -645,7 +645,10 @@
     }
 
     InvalidateCache();
-    std::auto_ptr<DcmElement> element(FromDcmtkBridge::FromJson(tag, value, decodeDataUriScheme, GetEncoding()));
+
+    bool hasCodeExtensions;
+    Encoding encoding = DetectEncoding(hasCodeExtensions);
+    std::auto_ptr<DcmElement> element(FromDcmtkBridge::FromJson(tag, value, decodeDataUriScheme, encoding));
     InsertInternal(*pimpl_->file_->getDataset(), element.release());
   }
 
@@ -706,8 +709,9 @@
     }
     else
     {
-      Encoding encoding = GetEncoding();
-      if (GetEncoding() != Encoding_Utf8)
+      bool hasCodeExtensions;
+      Encoding encoding = DetectEncoding(hasCodeExtensions);
+      if (encoding != Encoding_Utf8)
       {
         binary = Toolbox::ConvertFromUtf8(utf8Value, encoding);
         decoded = &binary;
@@ -766,7 +770,10 @@
       }
 
       std::auto_ptr<DcmElement> element(FromDcmtkBridge::CreateElementForTag(tag));
-      FromDcmtkBridge::FillElementWithString(*element, tag, utf8Value, decodeDataUriScheme, GetEncoding());
+
+      bool hasCodeExtensions;
+      Encoding encoding = DetectEncoding(hasCodeExtensions);
+      FromDcmtkBridge::FillElementWithString(*element, tag, utf8Value, decodeDataUriScheme, encoding);
 
       InsertInternal(dicom, element.release());
       UpdateStorageUid(tag, utf8Value, false);
@@ -805,7 +812,9 @@
         }
       }
 
-      InsertInternal(dicom, FromDcmtkBridge::FromJson(tag, value, decodeDataUriScheme, GetEncoding()));
+      bool hasCodeExtensions;
+      Encoding encoding = DetectEncoding(hasCodeExtensions);
+      InsertInternal(dicom, FromDcmtkBridge::FromJson(tag, value, decodeDataUriScheme, encoding));
 
       if (tag == DICOM_TAG_SOP_CLASS_UID ||
           tag == DICOM_TAG_SOP_INSTANCE_UID)
@@ -875,10 +884,13 @@
         return false;
       }
 
+      bool hasCodeExtensions;
+      Encoding encoding = DetectEncoding(hasCodeExtensions);
+      
       std::set<DicomTag> tmp;
       std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement
                                   (*element, DicomToJsonFlags_Default, 
-                                   0, GetEncoding(), tmp));
+                                   0, encoding, hasCodeExtensions, tmp));
       
       if (v.get() == NULL ||
           v->IsNull())
@@ -1294,9 +1306,10 @@
   }
 
   
-  Encoding ParsedDicomFile::GetEncoding() const
+  Encoding ParsedDicomFile::DetectEncoding(bool& hasCodeExtensions) const
   {
-    return FromDcmtkBridge::DetectEncoding(*pimpl_->file_->getDataset(),
+    return FromDcmtkBridge::DetectEncoding(hasCodeExtensions,
+                                           *pimpl_->file_->getDataset(),
                                            GetDefaultDicomEncoding());
   }
 
@@ -1532,12 +1545,13 @@
 
   void ParsedDicomFile::ChangeEncoding(Encoding target)
   {
-    Encoding source = GetEncoding();
+    bool hasCodeExtensions;
+    Encoding source = DetectEncoding(hasCodeExtensions);
 
     if (source != target)  // Avoid unnecessary conversion
     {
       ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, GetDicomSpecificCharacterSet(target));
-      FromDcmtkBridge::ChangeStringEncoding(*pimpl_->file_->getDataset(), source, target);
+      FromDcmtkBridge::ChangeStringEncoding(*pimpl_->file_->getDataset(), source, hasCodeExtensions, target);
     }
   }
 
--- a/Core/DicomParsing/ParsedDicomFile.h	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/DicomParsing/ParsedDicomFile.h	Wed Feb 13 17:46:12 2019 +0100
@@ -186,7 +186,7 @@
     void EmbedImage(MimeType mime,
                     const std::string& content);
 
-    Encoding GetEncoding() const;
+    Encoding DetectEncoding(bool& hasCodeExtensions) const;
 
     // WARNING: This function only sets the encoding, it will not
     // convert the encoding of the tags. Use "ChangeEncoding()" if need be.
--- a/Core/Enumerations.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/Enumerations.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -647,6 +647,9 @@
       case Encoding_Chinese:
         return "Chinese";
 
+      case Encoding_Korean:
+        return "Korean";
+
       default:
         throw OrthancException(ErrorCode_ParameterOutOfRange);
     }
@@ -1202,6 +1205,11 @@
       return Encoding_Chinese;
     }
 
+    if (s == "KOREAN")
+    {
+      return Encoding_Korean;
+    }
+
     throw OrthancException(ErrorCode_ParameterOutOfRange);
   }
 
@@ -1836,11 +1844,13 @@
     {
       encoding = Encoding_Hebrew;
     }
-    else if (s == "ISO_IR 166" || s == "ISO 2022 IR 166")
+    else if (s == "ISO_IR 166" ||
+             s == "ISO 2022 IR 166")
     {
       encoding = Encoding_Thai;
     }
-    else if (s == "ISO_IR 13" || s == "ISO 2022 IR 13")
+    else if (s == "ISO_IR 13" ||
+             s == "ISO 2022 IR 13")
     {
       encoding = Encoding_Japanese;
     }
@@ -1855,11 +1865,11 @@
        **/
       encoding = Encoding_Chinese;
     }
+    else if (s == "ISO 2022 IR 149")
+    {
+      encoding = Encoding_Korean;
+    }
     /*
-      else if (s == "ISO 2022 IR 149")
-      {
-      TODO
-      }
       else if (s == "ISO 2022 IR 159")
       {
       TODO
@@ -2013,6 +2023,9 @@
       case Encoding_Thai:
         return "ISO_IR 166";
 
+      case Encoding_Korean:
+        return "ISO 2022 IR 149";
+
       default:
         throw OrthancException(ErrorCode_ParameterOutOfRange);
     }
--- a/Core/Enumerations.h	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/Enumerations.h	Wed Feb 13 17:46:12 2019 +0100
@@ -442,10 +442,10 @@
     Encoding_Hebrew,
     Encoding_Thai,                          // TIS 620-2533
     Encoding_Japanese,                      // JIS X 0201 (Shift JIS): Katakana
-    Encoding_Chinese                        // GB18030 - Chinese simplified
+    Encoding_Chinese,                       // GB18030 - Chinese simplified
     //Encoding_JapaneseKanji,               // Multibyte - JIS X 0208: Kanji
     //Encoding_JapaneseSupplementaryKanji,  // Multibyte - JIS X 0212: Supplementary Kanji set
-    //Encoding_Korean,                      // Multibyte - KS X 1001: Hangul and Hanja
+    Encoding_Korean                         // Multibyte - KS X 1001: Hangul and Hanja
   };
 
 
--- a/Core/Toolbox.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/Toolbox.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -514,6 +514,10 @@
         return "TIS620.2533-0";
         break;
 
+      case Encoding_Korean:
+        return "ISO-IR-149";
+        break;
+
       default:
         throw OrthancException(ErrorCode_NotImplemented);
     }
@@ -522,27 +526,46 @@
 
 
 #if ORTHANC_ENABLE_LOCALE == 1
+  // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2
   std::string Toolbox::ConvertToUtf8(const std::string& source,
-                                     Encoding sourceEncoding)
+                                     Encoding sourceEncoding,
+                                     bool hasCodeExtensions)
   {
     // The "::skip" flag makes boost skip invalid UTF-8
     // characters. This can occur in badly-encoded DICOM files.
     
     try
     {
-      if (sourceEncoding == Encoding_Utf8)
-      {
-        // Already in UTF-8: No conversion is required
-        return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
-      }
-      else if (sourceEncoding == Encoding_Ascii)
+      if (sourceEncoding == Encoding_Ascii)
       {
         return ConvertToAscii(source);
       }
-      else
+      else 
       {
-        const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
-        return boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip);
+        std::string s;
+        
+        if (sourceEncoding == Encoding_Utf8)
+        {
+          // Already in UTF-8: No conversion is required, but we ensure
+          // the output is correctly encoded
+          s = boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
+        }
+        else
+        {
+          const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
+          s = boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip);
+        }
+
+        if (hasCodeExtensions)
+        {
+          std::string t;
+          RemoveIso2022EscapeSequences(t, s);
+          return t;
+        }
+        else
+        {
+          return s;
+        }        
       }
     }
     catch (std::runtime_error&)
@@ -1593,6 +1616,182 @@
 
     return boost::regex_replace(source, pattern, formatter);
   }
+
+
+  namespace Iso2022
+  {
+    /**
+       Returns whether the string s contains a single-byte control message
+       at index i
+    **/
+    static inline bool IsControlMessage1(const std::string& s, size_t i)
+    {
+      if (i < s.size())
+      {
+        char c = s[i];
+        return
+          (c == '\x0f') || // Locking shift zero
+          (c == '\x0e');   // Locking shift one
+      }
+      else
+      {
+        return false;
+      }
+    }
+
+    /**
+       Returns whether the string s contains a double-byte control message
+       at index i
+    **/
+    static inline size_t IsControlMessage2(const std::string& s, size_t i)
+    {
+      if (i + 1 < s.size())
+      {
+        char c1 = s[i];
+        char c2 = s[i + 1];
+        return (c1 == 0x1b) && (
+          (c2 == '\x6e') || // Locking shift two
+          (c2 == '\x6f') || // Locking shift three
+          (c2 == '\x4e') || // Single shift two (alt)
+          (c2 == '\x4f') || // Single shift three (alt)
+          (c2 == '\x7c') || // Locking shift three right
+          (c2 == '\x7d') || // Locking shift two right
+          (c2 == '\x7e')    // Locking shift one right
+          );
+      }
+      else
+      {
+        return false;
+      }
+    }
+
+    /**
+       Returns whether the string s contains a triple-byte control message
+       at index i
+    **/
+    static inline size_t IsControlMessage3(const std::string& s, size_t i)
+    {
+      if (i + 2 < s.size())
+      {
+        char c1 = s[i];
+        char c2 = s[i + 1];
+        char c3 = s[i + 2];
+        return ((c1 == '\x8e' && c2 == 0x1b && c3 == '\x4e') ||
+                (c1 == '\x8f' && c2 == 0x1b && c3 == '\x4f'));
+      }
+      else
+      {
+        return false;
+      }
+    }
+
+    /**
+       This function returns true if the index i in the supplied string s:
+       - is valid
+       - contains the c character
+       This function returns false otherwise.
+    **/
+    static inline bool TestCharValue(
+      const std::string& s, size_t i, char c)
+    {
+      if (i < s.size())
+        return s[i] == c;
+      else
+        return false;
+    }
+
+    /**
+       This function returns true if the index i in the supplied string s:
+       - is valid
+       - has a c character that is >= cMin and <= cMax (included)
+       This function returns false otherwise.
+    **/
+    static inline bool TestCharRange(
+      const std::string& s, size_t i, char cMin, char cMax)
+    {
+      if (i < s.size())
+        return (s[i] >= cMin) && (s[i] <= cMax);
+      else
+        return false;
+    }
+
+    /**
+       This function returns the total length in bytes of the escape sequence
+       located in string s at index i, if there is one, or 0 otherwise.
+    **/
+    static inline size_t GetEscapeSequenceLength(const std::string& s, size_t i)
+    {
+      if (TestCharValue(s, i, 0x1b))
+      {
+        size_t j = i+1;
+
+        // advance reading cursor while we are in a sequence 
+        while (TestCharRange(s, j, '\x20', '\x2f'))
+          ++j;
+
+        // check there is a valid termination byte AND we're long enough (there
+        // must be at least one byte between 0x20 and 0x2f
+        if (TestCharRange(s, j, '\x30', '\x7f') && (j - i) >= 2)
+          return j - i + 1;
+        else
+          return 0;
+      }
+      else
+        return 0;
+    }
+  }
+
+  
+
+  /**
+     This function will strip all ISO/IEC 2022 control codes and escape
+     sequences.
+     Please see https://en.wikipedia.org/wiki/ISO/IEC_2022 (as of 2019-02)
+     for a list of those.
+
+     Please note that this operation is potentially destructive, because
+     it removes the character set information from the byte stream.
+
+     However, in the case where the encoding is unique, then suppressing
+     the escape sequences allows to provide us with a clean string after
+     conversion to utf-8 with boost.
+  **/
+  void Toolbox::RemoveIso2022EscapeSequences(std::string& dest, const std::string& src)
+  {
+    // we need AT MOST the same size as the source string in the output
+    dest.clear();
+    if (dest.capacity() < src.size())
+      dest.reserve(src.size());
+
+    size_t i = 0;
+
+    // uint8_t view to the string
+    while (i < src.size())
+    {
+      size_t j = i;
+
+      // The i index will only be incremented if a message is detected
+      // in that case, the message is skipped and the index is set to the
+      // next position to read
+      if (Iso2022::IsControlMessage1(src, i))
+        i += 1;
+      else if (Iso2022::IsControlMessage2(src, i))
+        i += 2;
+      else if (Iso2022::IsControlMessage3(src, i))
+        i += 3;
+      else
+        i += Iso2022::GetEscapeSequenceLength(src, i);
+
+      // if the index was NOT incremented, this means there was no message at
+      // this location: we then may copy the character at this index and 
+      // increment the index to point to the next read position
+      if (j == i)
+      {
+        dest.push_back(src[i]);
+        i++;
+      }
+    }
+  }
 }
 
 
--- a/Core/Toolbox.h	Tue Feb 12 17:27:33 2019 +0100
+++ b/Core/Toolbox.h	Wed Feb 13 17:46:12 2019 +0100
@@ -163,7 +163,8 @@
 
 #if ORTHANC_ENABLE_LOCALE == 1
     std::string ConvertToUtf8(const std::string& source,
-                              Encoding sourceEncoding);
+                              Encoding sourceEncoding,
+                              bool hasCodeExtensions);
 
     std::string ConvertFromUtf8(const std::string& source,
                                 Encoding targetEncoding);
@@ -248,6 +249,9 @@
 
     std::string SubstituteVariables(const std::string& source,
                                     const std::map<std::string, std::string>& dictionary);
+
+    void RemoveIso2022EscapeSequences(std::string& dest,
+                                      const std::string& src);
   }
 }
 
--- a/NEWS	Tue Feb 12 17:27:33 2019 +0100
+++ b/NEWS	Wed Feb 13 17:46:12 2019 +0100
@@ -1,6 +1,8 @@
 Pending changes in the mainline
 ===============================
 
+* Basic support for character sets with code extensions
+
 
 Version 1.5.4 (2019-02-08)
 ==========================
--- a/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -519,7 +519,10 @@
           else if (tag["Type"] == "String")
           {
             std::string value = tag["Value"].asString();
-            dicom.ReplacePlainString(*it, Toolbox::ConvertFromUtf8(value, dicom.GetEncoding()));
+
+            bool hasCodeExtensions;
+            Encoding encoding = dicom.DetectEncoding(hasCodeExtensions);
+            dicom.ReplacePlainString(*it, Toolbox::ConvertFromUtf8(value, encoding));
           }
         }
       }
--- a/OrthancServer/Search/DatabaseLookup.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/OrthancServer/Search/DatabaseLookup.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -95,7 +95,8 @@
 
 
   bool DatabaseLookup::IsMatch(DcmItem& item,
-                               Encoding encoding) const
+                               Encoding encoding,
+                               bool hasCodeExtensions) const
   {
     for (size_t i = 0; i < constraints_.size(); i++)
     {
@@ -118,7 +119,7 @@
       std::set<DicomTag> ignoreTagLength;
       std::auto_ptr<DicomValue> value(FromDcmtkBridge::ConvertLeafElement
                                       (*element, DicomToJsonFlags_None, 
-                                       0, encoding, ignoreTagLength));
+                                       0, encoding, hasCodeExtensions, ignoreTagLength));
 
       // WARNING: Also modify "HierarchicalMatcher::Setup()" if modifying this code
       if (value.get() == NULL ||
--- a/OrthancServer/Search/DatabaseLookup.h	Tue Feb 12 17:27:33 2019 +0100
+++ b/OrthancServer/Search/DatabaseLookup.h	Wed Feb 13 17:46:12 2019 +0100
@@ -74,7 +74,8 @@
     bool IsMatch(const DicomMap& value) const;
 
     bool IsMatch(DcmItem& item,
-                 Encoding encoding) const;
+                 Encoding encoding,
+                 bool hasCodeExtensions) const;
 
     void AddDicomConstraint(const DicomTag& tag,
                             const std::string& dicomQuery,
--- a/OrthancServer/Search/HierarchicalMatcher.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/OrthancServer/Search/HierarchicalMatcher.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -53,7 +53,9 @@
       caseSensitivePN = lock.GetConfiguration().GetBooleanParameter("CaseSensitivePN", false);
     }
 
-    Setup(*query.GetDcmtkObject().getDataset(), caseSensitivePN, query.GetEncoding());
+    bool hasCodeExtensions;
+    Encoding encoding = query.DetectEncoding(hasCodeExtensions);
+    Setup(*query.GetDcmtkObject().getDataset(), caseSensitivePN, encoding, hasCodeExtensions);
   }
 
 
@@ -72,7 +74,8 @@
 
   void HierarchicalMatcher::Setup(DcmItem& dataset,
                                   bool caseSensitivePN,
-                                  Encoding encoding)
+                                  Encoding encoding,
+                                  bool hasCodeExtensions)
   {
     for (unsigned long i = 0; i < dataset.card(); i++)
     {
@@ -108,7 +111,7 @@
         }
         else if (sequence.card() == 1)
         {
-          sequences_[tag] = new HierarchicalMatcher(*sequence.getItem(0), caseSensitivePN, encoding);
+          sequences_[tag] = new HierarchicalMatcher(*sequence.getItem(0), caseSensitivePN, encoding, hasCodeExtensions);
         }
         else
         {
@@ -122,7 +125,7 @@
         std::set<DicomTag> ignoreTagLength;
         std::auto_ptr<DicomValue> value(FromDcmtkBridge::ConvertLeafElement
                                         (*element, DicomToJsonFlags_None, 
-                                         0, encoding, ignoreTagLength));
+                                         0, encoding, hasCodeExtensions, ignoreTagLength));
 
         // WARNING: Also modify "DatabaseLookup::IsMatch()" if modifying this code
         if (value.get() == NULL ||
@@ -197,15 +200,19 @@
 
   bool HierarchicalMatcher::Match(ParsedDicomFile& dicom) const
   {
+    bool hasCodeExtensions;
+    Encoding encoding = dicom.DetectEncoding(hasCodeExtensions);
+    
     return MatchInternal(*dicom.GetDcmtkObject().getDataset(),
-                         dicom.GetEncoding());
+                         encoding, hasCodeExtensions);
   }
 
 
   bool HierarchicalMatcher::MatchInternal(DcmItem& item,
-                                          Encoding encoding) const
+                                          Encoding encoding,
+                                          bool hasCodeExtensions) const
   {
-    if (!flatConstraints_.IsMatch(item, encoding))
+    if (!flatConstraints_.IsMatch(item, encoding, hasCodeExtensions))
     {
       return false;
     }
@@ -228,7 +235,7 @@
 
         for (unsigned long i = 0; i < sequence->card(); i++)
         {
-          if (it->second->MatchInternal(*sequence->getItem(i), encoding))
+          if (it->second->MatchInternal(*sequence->getItem(i), encoding, hasCodeExtensions))
           {
             match = true;
             break;
@@ -247,7 +254,8 @@
 
 
   DcmDataset* HierarchicalMatcher::ExtractInternal(DcmItem& source,
-                                                   Encoding encoding) const
+                                                   Encoding encoding,
+                                                   bool hasCodeExtensions) const
   {
     std::auto_ptr<DcmDataset> target(new DcmDataset);
 
@@ -283,13 +291,13 @@
           {
             cloned->append(new DcmItem(*sequence->getItem(i)));
           }
-          else if (it->second->MatchInternal(*sequence->getItem(i), encoding))  // TODO Might be optimized
+          else if (it->second->MatchInternal(*sequence->getItem(i), encoding, hasCodeExtensions))  // TODO Might be optimized
           {
             // It is necessary to encapsulate the child dataset into a
             // "DcmItem" object before it can be included in a
             // sequence. Otherwise, "dciodvfy" reports an error "Bad
             // tag in sequence - Expecting Item or Sequence Delimiter."
-            std::auto_ptr<DcmDataset> child(it->second->ExtractInternal(*sequence->getItem(i), encoding));
+            std::auto_ptr<DcmDataset> child(it->second->ExtractInternal(*sequence->getItem(i), encoding, hasCodeExtensions));
             cloned->append(new DcmItem(*child));
           }
         }
@@ -304,11 +312,14 @@
 
   ParsedDicomFile* HierarchicalMatcher::Extract(ParsedDicomFile& dicom) const
   {
+    bool hasCodeExtensions;
+    Encoding encoding = dicom.DetectEncoding(hasCodeExtensions);
+    
     std::auto_ptr<DcmDataset> dataset(ExtractInternal(*dicom.GetDcmtkObject().getDataset(),
-                                                      dicom.GetEncoding()));
+                                                      encoding, hasCodeExtensions));
 
     std::auto_ptr<ParsedDicomFile> result(new ParsedDicomFile(*dataset));
-    result->SetEncoding(dicom.GetEncoding());
+    result->SetEncoding(encoding);
 
     return result.release();
   }
--- a/OrthancServer/Search/HierarchicalMatcher.h	Tue Feb 12 17:27:33 2019 +0100
+++ b/OrthancServer/Search/HierarchicalMatcher.h	Wed Feb 13 17:46:12 2019 +0100
@@ -51,20 +51,24 @@
 
     void Setup(DcmItem& query,
                bool caseSensitivePN,
-               Encoding encoding);
+               Encoding encoding,
+               bool hasCodeExtensions);
 
     HierarchicalMatcher(DcmItem& query,
                         bool caseSensitivePN,
-                        Encoding encoding)
+                        Encoding encoding,
+                        bool hasCodeExtensions)
     {
-      Setup(query, caseSensitivePN, encoding);
+      Setup(query, caseSensitivePN, encoding, hasCodeExtensions);
     }
 
     bool MatchInternal(DcmItem& dicom,
-                       Encoding encoding) const;
+                       Encoding encoding,
+                       bool hasCodeExtensions) const;
 
     DcmDataset* ExtractInternal(DcmItem& dicom,
-                                Encoding encoding) const;
+                                Encoding encoding,
+                                bool hasCodeExtensions) const;
 
   public:
     HierarchicalMatcher(ParsedDicomFile& query);
--- a/UnitTestsSources/DicomMapTests.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/UnitTestsSources/DicomMapTests.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -425,7 +425,7 @@
   const unsigned char raw[] = { 0x63, 0x72, 0xe2, 0x6e, 0x65 };
   std::string latin1((char*) &raw[0], sizeof(raw) / sizeof(char));
 
-  std::string utf8 = Toolbox::ConvertToUtf8(latin1, Encoding_Latin1);
+  std::string utf8 = Toolbox::ConvertToUtf8(latin1, Encoding_Latin1, false);
 
   ParsedDicomFile dicom(false);
   dicom.SetEncoding(Encoding_Latin1);
--- a/UnitTestsSources/FromDcmtkTests.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/UnitTestsSources/FromDcmtkTests.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -217,7 +217,7 @@
   {
     std::string source(testEncodingsEncoded[i]);
     std::string expected(testEncodingsExpected[i]);
-    std::string s = Toolbox::ConvertToUtf8(source, testEncodings[i]);
+    std::string s = Toolbox::ConvertToUtf8(source, testEncodings[i], false);
     //std::cout << EnumerationToString(testEncodings[i]) << std::endl;
     EXPECT_EQ(expected, s);
   }
@@ -262,7 +262,7 @@
   // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-4
   ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 87"));   //ASSERT_EQ(Encoding_JapaneseKanji, e);
   ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 159"));  //ASSERT_EQ(Encoding_JapaneseKanjiSupplementary, e);
-  ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 149"));  //ASSERT_EQ(Encoding_Korean, e);
+  ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 149"));   ASSERT_EQ(Encoding_Korean, e);
 
   // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-5
   ASSERT_TRUE(GetDicomEncoding(e, "ISO_IR 192"));  ASSERT_EQ(Encoding_Utf8, e);
@@ -282,7 +282,7 @@
       ParsedDicomFile f(true);
       f.SetEncoding(testEncodings[i]);
 
-      std::string s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i]);
+      std::string s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i], false);
       f.Insert(DICOM_TAG_PATIENT_NAME, s, false);
       f.SaveToMemoryBuffer(dicom);
     }
@@ -293,7 +293,9 @@
 
       if (testEncodings[i] != Encoding_Ascii)
       {
-        ASSERT_EQ(testEncodings[i], g.GetEncoding());
+        bool hasCodeExtensions;
+        ASSERT_EQ(testEncodings[i], g.DetectEncoding(hasCodeExtensions));
+        ASSERT_FALSE(hasCodeExtensions);
       }
 
       std::string tag;
@@ -405,16 +407,16 @@
       ignoreTagLength.insert(DICOM_TAG_PATIENT_ID);
 
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_TRUE(b.isMember("0010,0010"));
       ASSERT_EQ("Hello", b["0010,0010"].asString());
 
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_TRUE(b["0010,0010"].isNull()); // "Hello" has more than 3 characters
 
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Full,
-                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_TRUE(b["0010,0010"].isObject());
       ASSERT_EQ("PatientName", b["0010,0010"]["Name"].asString());
       ASSERT_EQ("TooLong", b["0010,0010"]["Type"].asString());
@@ -422,7 +424,7 @@
 
       ignoreTagLength.insert(DICOM_TAG_PATIENT_NAME);
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_EQ("Hello", b["0010,0010"].asString());
     }
 
@@ -448,7 +450,7 @@
       Json::Value b;
       std::set<DicomTag> ignoreTagLength;
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_EQ("Hello", b["0010,0010"].asString());
     }
 
@@ -461,7 +463,7 @@
         Json::Value b;
         std::set<DicomTag> ignoreTagLength;
         FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
         ASSERT_EQ(Json::arrayValue, b["0008,1110"].type());
         ASSERT_EQ(2u, b["0008,1110"].size());
       
@@ -480,7 +482,7 @@
         Json::Value b;
         std::set<DicomTag> ignoreTagLength;
         FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Full,
-                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
 
         Json::Value c;
         ServerToolbox::SimplifyTags(c, b, DicomToJsonFormat_Human);
@@ -599,10 +601,12 @@
 
       if (testEncodings[i] != Encoding_Ascii)
       {
-        ASSERT_EQ(testEncodings[i], f.GetEncoding());
+        bool hasCodeExtensions;
+        ASSERT_EQ(testEncodings[i], f.DetectEncoding(hasCodeExtensions));
+        ASSERT_FALSE(hasCodeExtensions);
       }
 
-      Json::Value s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i]);
+      Json::Value s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i], false);
       f.Replace(DICOM_TAG_PATIENT_NAME, s, false, DicomReplaceMode_InsertIfAbsent);
 
       Json::Value v;
@@ -1161,7 +1165,7 @@
         // Sanity check to test the proper behavior of "EncodingTests.py"
         std::string encoded = Toolbox::ConvertFromUtf8(testEncodingsExpected[i], testEncodings[i]);
         ASSERT_STREQ(testEncodingsEncoded[i], encoded.c_str());
-        std::string decoded = Toolbox::ConvertToUtf8(encoded, testEncodings[i]);
+        std::string decoded = Toolbox::ConvertToUtf8(encoded, testEncodings[i], false);
         ASSERT_STREQ(testEncodingsExpected[i], decoded.c_str());
 
         if (testEncodings[i] != Encoding_Chinese)
@@ -1169,7 +1173,7 @@
           // A specific source string is used in "EncodingTests.py" to
           // test against Chinese, it is normal that it does not correspond to UTF8
 
-          std::string encoded = Toolbox::ConvertToUtf8(Toolbox::ConvertFromUtf8(utf8, testEncodings[i]), testEncodings[i]);
+          std::string encoded = Toolbox::ConvertToUtf8(Toolbox::ConvertFromUtf8(utf8, testEncodings[i]), testEncodings[i], false);
           ASSERT_STREQ(testEncodingsExpected[i], encoded.c_str());
         }
       }
@@ -1227,7 +1231,9 @@
       std::string tag;
 
       ParsedDicomFile dicom(m, Encoding_Utf8);
-      ASSERT_EQ(Encoding_Utf8, dicom.GetEncoding());
+      bool hasCodeExtensions;
+      ASSERT_EQ(Encoding_Utf8, dicom.DetectEncoding(hasCodeExtensions));
+      ASSERT_FALSE(hasCodeExtensions);
       ASSERT_TRUE(dicom.GetTagValue(tag, DICOM_TAG_PATIENT_NAME));
       ASSERT_EQ(tag, testEncodingsExpected[i]);
 
@@ -1240,7 +1246,8 @@
 
       dicom.ChangeEncoding(testEncodings[i]);
 
-      ASSERT_EQ(testEncodings[i], dicom.GetEncoding());
+      ASSERT_EQ(testEncodings[i], dicom.DetectEncoding(hasCodeExtensions));
+      ASSERT_FALSE(hasCodeExtensions);
       
       const char* c = NULL;
       ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->findAndGetString(DCM_PatientName, c).good());
@@ -1275,7 +1282,10 @@
     m.SetValue(DICOM_TAG_PATIENT_NAME, "HELLO", false);
 
     ParsedDicomFile d(m, Encoding_Latin3 /* default encoding */);
-    ASSERT_EQ(Encoding_Latin3, d.GetEncoding());
+
+    bool hasCodeExtensions;
+    ASSERT_EQ(Encoding_Latin3, d.DetectEncoding(hasCodeExtensions));
+    ASSERT_FALSE(hasCodeExtensions);
   }
   
   {
@@ -1285,7 +1295,10 @@
     m.SetValue(DICOM_TAG_PATIENT_NAME, "HELLO", false);
 
     ParsedDicomFile d(m, Encoding_Latin3 /* default encoding */);
-    ASSERT_EQ(Encoding_Japanese, d.GetEncoding());
+
+    bool hasCodeExtensions;
+    ASSERT_EQ(Encoding_Japanese, d.DetectEncoding(hasCodeExtensions));
+    ASSERT_FALSE(hasCodeExtensions);
   }
   
   {
@@ -1314,6 +1327,152 @@
     m.SetValue(DICOM_TAG_PATIENT_NAME, "HELLO", false);
 
     ParsedDicomFile d(m, Encoding_Latin3 /* default encoding */);
-    ASSERT_EQ(Encoding_Latin3, d.GetEncoding());
+
+    bool hasCodeExtensions;
+    ASSERT_EQ(Encoding_Latin3, d.DetectEncoding(hasCodeExtensions));
+    ASSERT_FALSE(hasCodeExtensions);
   }
 }
+
+
+
+TEST(Toolbox, RemoveIso2022EscapeSequences)
+{
+  // +----------------------------------+
+  // | one-byte control messages        |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_oneByteControl[] = {
+    0x0f, 0x41, 
+    0x0e, 0x42, 
+    0x8e, 0x1b, 0x4e, 0x43, 
+    0x8f, 0x1b, 0x4f, 0x44,
+    0x8e, 0x1b, 0x4a, 0x45, 
+    0x8f, 0x1b, 0x4a, 0x46,
+    0x50, 0x51, 0x52, 0x00
+  };
+  
+  static const uint8_t iso2022_cstr_oneByteControl_ref[] = {
+    0x41,
+    0x42,
+    0x43,
+    0x44,
+    0x8e, 0x1b, 0x4a, 0x45, 
+    0x8f, 0x1b, 0x4a, 0x46,
+    0x50, 0x51, 0x52, 0x00
+  };
+
+  // +----------------------------------+
+  // | two-byte control messages        |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_twoByteControl[] = {
+    0x1b, 0x6e, 0x41,
+    0x1b, 0x6f, 0x42,
+    0x1b, 0x4e, 0x43,
+    0x1b, 0x4f, 0x44,
+    0x1b, 0x7e, 0x45,
+    0x1b, 0x7d, 0x46,
+    0x1b, 0x7c, 0x47, 0x00
+  };
+  
+  static const uint8_t iso2022_cstr_twoByteControl_ref[] = {
+    0x41,
+    0x42,
+    0x43,
+    0x44,
+    0x45,
+    0x46,
+    0x47, 0x00
+  };
+
+  // +----------------------------------+
+  // | various-length escape sequences  |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_escapeSequence[] = {
+    0x1b, 0x40, 0x41, // 1b and 40 should not be removed (invalid esc seq)
+    0x1b, 0x50, 0x42, // ditto 
+    0x1b, 0x7f, 0x43, // ditto
+    0x1b, 0x21, 0x4a, 0x44, // this will match
+    0x1b, 0x20, 0x21, 0x2f, 0x40, 0x45, // this will match
+    0x1b, 0x20, 0x21, 0x2f, 0x2f, 0x40, 0x46, // this will match too
+    0x1b, 0x20, 0x21, 0x2f, 0x1f, 0x47, 0x48, 0x00 // this will NOT match!
+  };
+  
+  static const uint8_t iso2022_cstr_escapeSequence_ref[] = {
+    0x1b, 0x40, 0x41, // 1b and 40 should not be removed (invalid esc seq)
+    0x1b, 0x50, 0x42, // ditto 
+    0x1b, 0x7f, 0x43, // ditto
+    0x44, // this will match
+    0x45, // this will match
+    0x46, // this will match too
+    0x1b, 0x20, 0x21, 0x2f, 0x1f, 0x47, 0x48, 0x00 // this will NOT match!
+  };
+
+  
+  // +----------------------------------+
+  // | a real-world japanese sample     |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_real_ir13[] = {
+    0xd4, 0xcf, 0xc0, 0xde, 0x5e, 0xc0, 0xdb, 0xb3,
+    0x3d, 0x1b, 0x24, 0x42, 0x3b, 0x33, 0x45, 0x44,
+    0x1b, 0x28, 0x4a, 0x5e, 0x1b, 0x24, 0x42, 0x42,
+    0x40, 0x4f, 0x3a, 0x1b, 0x28, 0x4a, 0x3d, 0x1b,
+    0x24, 0x42, 0x24, 0x64, 0x24, 0x5e, 0x24, 0x40,
+    0x1b, 0x28, 0x4a, 0x5e, 0x1b, 0x24, 0x42, 0x24,
+    0x3f, 0x24, 0x6d, 0x24, 0x26, 0x1b, 0x28, 0x4a, 0x00
+  };
+
+  static const uint8_t iso2022_cstr_real_ir13_ref[] = {
+    0xd4, 0xcf, 0xc0, 0xde, 0x5e, 0xc0, 0xdb, 0xb3,
+    0x3d,
+    0x3b, 0x33, 0x45, 0x44,
+    0x5e,
+    0x42,
+    0x40, 0x4f, 0x3a,
+    0x3d,
+    0x24, 0x64, 0x24, 0x5e, 0x24, 0x40,
+    0x5e,
+    0x24,
+    0x3f, 0x24, 0x6d, 0x24, 0x26, 0x00
+  };
+
+
+
+  // +----------------------------------+
+  // | the actual test                  |
+  // +----------------------------------+
+
+  std::string iso2022_str_oneByteControl(
+    reinterpret_cast<const char*>(iso2022_cstr_oneByteControl));
+  std::string iso2022_str_oneByteControl_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_oneByteControl_ref));
+  std::string iso2022_str_twoByteControl(
+    reinterpret_cast<const char*>(iso2022_cstr_twoByteControl));
+  std::string iso2022_str_twoByteControl_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_twoByteControl_ref));
+  std::string iso2022_str_escapeSequence(
+    reinterpret_cast<const char*>(iso2022_cstr_escapeSequence));
+  std::string iso2022_str_escapeSequence_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_escapeSequence_ref));
+  std::string iso2022_str_real_ir13(
+    reinterpret_cast<const char*>(iso2022_cstr_real_ir13));
+  std::string iso2022_str_real_ir13_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_real_ir13_ref));
+
+  std::string dest;
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_oneByteControl);
+  ASSERT_EQ(dest, iso2022_str_oneByteControl_ref);
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_twoByteControl);
+  ASSERT_EQ(dest, iso2022_str_twoByteControl_ref);
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_escapeSequence);
+  ASSERT_EQ(dest, iso2022_str_escapeSequence_ref);
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_real_ir13);
+  ASSERT_EQ(dest, iso2022_str_real_ir13_ref);
+}
--- a/UnitTestsSources/UnitTestsMain.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/UnitTestsSources/UnitTestsMain.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -450,7 +450,7 @@
   ASSERT_EQ("&abc", Toolbox::ConvertToAscii(s));
 
   // Open in Emacs, then save with UTF-8 encoding, then "hexdump -C"
-  std::string utf8 = Toolbox::ConvertToUtf8(s, Encoding_Latin1);
+  std::string utf8 = Toolbox::ConvertToUtf8(s, Encoding_Latin1, false);
   ASSERT_EQ(15u, utf8.size());
   ASSERT_EQ(0xc3, static_cast<unsigned char>(utf8[0]));
   ASSERT_EQ(0xa0, static_cast<unsigned char>(utf8[1]));
@@ -477,8 +477,8 @@
 
   std::string s((char*) &latin1[0], sizeof(latin1) / sizeof(char));
 
-  ASSERT_EQ(s, Toolbox::ConvertFromUtf8(Toolbox::ConvertToUtf8(s, Encoding_Latin1), Encoding_Latin1));
-  ASSERT_EQ("cre", Toolbox::ConvertToUtf8(s, Encoding_Utf8));
+  ASSERT_EQ(s, Toolbox::ConvertFromUtf8(Toolbox::ConvertToUtf8(s, Encoding_Latin1, false), Encoding_Latin1));
+  ASSERT_EQ("cre", Toolbox::ConvertToUtf8(s, Encoding_Utf8, false));
 }
 
 
@@ -690,6 +690,7 @@
   ASSERT_EQ(Encoding_Japanese, StringToEncoding(EnumerationToString(Encoding_Japanese)));
   ASSERT_EQ(Encoding_Chinese, StringToEncoding(EnumerationToString(Encoding_Chinese)));
   ASSERT_EQ(Encoding_Thai, StringToEncoding(EnumerationToString(Encoding_Thai)));
+  ASSERT_EQ(Encoding_Korean, StringToEncoding(EnumerationToString(Encoding_Korean)));
 
   ASSERT_EQ(ResourceType_Patient, StringToResourceType(EnumerationToString(ResourceType_Patient)));
   ASSERT_EQ(ResourceType_Study, StringToResourceType(EnumerationToString(ResourceType_Study)));