diff UnitTestsSources/FromDcmtkTests.cpp @ 3217:cf8cbeb35f33

preliminary support of Korean character set
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 13 Feb 2019 17:46:12 +0100
parents 683d572424b6
children 4351f52f15d5
line wrap: on
line diff
--- a/UnitTestsSources/FromDcmtkTests.cpp	Tue Feb 12 17:27:33 2019 +0100
+++ b/UnitTestsSources/FromDcmtkTests.cpp	Wed Feb 13 17:46:12 2019 +0100
@@ -217,7 +217,7 @@
   {
     std::string source(testEncodingsEncoded[i]);
     std::string expected(testEncodingsExpected[i]);
-    std::string s = Toolbox::ConvertToUtf8(source, testEncodings[i]);
+    std::string s = Toolbox::ConvertToUtf8(source, testEncodings[i], false);
     //std::cout << EnumerationToString(testEncodings[i]) << std::endl;
     EXPECT_EQ(expected, s);
   }
@@ -262,7 +262,7 @@
   // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-4
   ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 87"));   //ASSERT_EQ(Encoding_JapaneseKanji, e);
   ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 159"));  //ASSERT_EQ(Encoding_JapaneseKanjiSupplementary, e);
-  ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 149"));  //ASSERT_EQ(Encoding_Korean, e);
+  ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 149"));   ASSERT_EQ(Encoding_Korean, e);
 
   // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-5
   ASSERT_TRUE(GetDicomEncoding(e, "ISO_IR 192"));  ASSERT_EQ(Encoding_Utf8, e);
@@ -282,7 +282,7 @@
       ParsedDicomFile f(true);
       f.SetEncoding(testEncodings[i]);
 
-      std::string s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i]);
+      std::string s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i], false);
       f.Insert(DICOM_TAG_PATIENT_NAME, s, false);
       f.SaveToMemoryBuffer(dicom);
     }
@@ -293,7 +293,9 @@
 
       if (testEncodings[i] != Encoding_Ascii)
       {
-        ASSERT_EQ(testEncodings[i], g.GetEncoding());
+        bool hasCodeExtensions;
+        ASSERT_EQ(testEncodings[i], g.DetectEncoding(hasCodeExtensions));
+        ASSERT_FALSE(hasCodeExtensions);
       }
 
       std::string tag;
@@ -405,16 +407,16 @@
       ignoreTagLength.insert(DICOM_TAG_PATIENT_ID);
 
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_TRUE(b.isMember("0010,0010"));
       ASSERT_EQ("Hello", b["0010,0010"].asString());
 
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_TRUE(b["0010,0010"].isNull()); // "Hello" has more than 3 characters
 
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Full,
-                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_TRUE(b["0010,0010"].isObject());
       ASSERT_EQ("PatientName", b["0010,0010"]["Name"].asString());
       ASSERT_EQ("TooLong", b["0010,0010"]["Type"].asString());
@@ -422,7 +424,7 @@
 
       ignoreTagLength.insert(DICOM_TAG_PATIENT_NAME);
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 3, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_EQ("Hello", b["0010,0010"].asString());
     }
 
@@ -448,7 +450,7 @@
       Json::Value b;
       std::set<DicomTag> ignoreTagLength;
       FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                     DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
       ASSERT_EQ("Hello", b["0010,0010"].asString());
     }
 
@@ -461,7 +463,7 @@
         Json::Value b;
         std::set<DicomTag> ignoreTagLength;
         FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Short,
-                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
         ASSERT_EQ(Json::arrayValue, b["0008,1110"].type());
         ASSERT_EQ(2u, b["0008,1110"].size());
       
@@ -480,7 +482,7 @@
         Json::Value b;
         std::set<DicomTag> ignoreTagLength;
         FromDcmtkBridge::ElementToJson(b, *element, DicomToJsonFormat_Full,
-                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, ignoreTagLength);
+                                       DicomToJsonFlags_Default, 0, Encoding_Ascii, false, ignoreTagLength);
 
         Json::Value c;
         ServerToolbox::SimplifyTags(c, b, DicomToJsonFormat_Human);
@@ -599,10 +601,12 @@
 
       if (testEncodings[i] != Encoding_Ascii)
       {
-        ASSERT_EQ(testEncodings[i], f.GetEncoding());
+        bool hasCodeExtensions;
+        ASSERT_EQ(testEncodings[i], f.DetectEncoding(hasCodeExtensions));
+        ASSERT_FALSE(hasCodeExtensions);
       }
 
-      Json::Value s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i]);
+      Json::Value s = Toolbox::ConvertToUtf8(testEncodingsEncoded[i], testEncodings[i], false);
       f.Replace(DICOM_TAG_PATIENT_NAME, s, false, DicomReplaceMode_InsertIfAbsent);
 
       Json::Value v;
@@ -1161,7 +1165,7 @@
         // Sanity check to test the proper behavior of "EncodingTests.py"
         std::string encoded = Toolbox::ConvertFromUtf8(testEncodingsExpected[i], testEncodings[i]);
         ASSERT_STREQ(testEncodingsEncoded[i], encoded.c_str());
-        std::string decoded = Toolbox::ConvertToUtf8(encoded, testEncodings[i]);
+        std::string decoded = Toolbox::ConvertToUtf8(encoded, testEncodings[i], false);
         ASSERT_STREQ(testEncodingsExpected[i], decoded.c_str());
 
         if (testEncodings[i] != Encoding_Chinese)
@@ -1169,7 +1173,7 @@
           // A specific source string is used in "EncodingTests.py" to
           // test against Chinese, it is normal that it does not correspond to UTF8
 
-          std::string encoded = Toolbox::ConvertToUtf8(Toolbox::ConvertFromUtf8(utf8, testEncodings[i]), testEncodings[i]);
+          std::string encoded = Toolbox::ConvertToUtf8(Toolbox::ConvertFromUtf8(utf8, testEncodings[i]), testEncodings[i], false);
           ASSERT_STREQ(testEncodingsExpected[i], encoded.c_str());
         }
       }
@@ -1227,7 +1231,9 @@
       std::string tag;
 
       ParsedDicomFile dicom(m, Encoding_Utf8);
-      ASSERT_EQ(Encoding_Utf8, dicom.GetEncoding());
+      bool hasCodeExtensions;
+      ASSERT_EQ(Encoding_Utf8, dicom.DetectEncoding(hasCodeExtensions));
+      ASSERT_FALSE(hasCodeExtensions);
       ASSERT_TRUE(dicom.GetTagValue(tag, DICOM_TAG_PATIENT_NAME));
       ASSERT_EQ(tag, testEncodingsExpected[i]);
 
@@ -1240,7 +1246,8 @@
 
       dicom.ChangeEncoding(testEncodings[i]);
 
-      ASSERT_EQ(testEncodings[i], dicom.GetEncoding());
+      ASSERT_EQ(testEncodings[i], dicom.DetectEncoding(hasCodeExtensions));
+      ASSERT_FALSE(hasCodeExtensions);
       
       const char* c = NULL;
       ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->findAndGetString(DCM_PatientName, c).good());
@@ -1275,7 +1282,10 @@
     m.SetValue(DICOM_TAG_PATIENT_NAME, "HELLO", false);
 
     ParsedDicomFile d(m, Encoding_Latin3 /* default encoding */);
-    ASSERT_EQ(Encoding_Latin3, d.GetEncoding());
+
+    bool hasCodeExtensions;
+    ASSERT_EQ(Encoding_Latin3, d.DetectEncoding(hasCodeExtensions));
+    ASSERT_FALSE(hasCodeExtensions);
   }
   
   {
@@ -1285,7 +1295,10 @@
     m.SetValue(DICOM_TAG_PATIENT_NAME, "HELLO", false);
 
     ParsedDicomFile d(m, Encoding_Latin3 /* default encoding */);
-    ASSERT_EQ(Encoding_Japanese, d.GetEncoding());
+
+    bool hasCodeExtensions;
+    ASSERT_EQ(Encoding_Japanese, d.DetectEncoding(hasCodeExtensions));
+    ASSERT_FALSE(hasCodeExtensions);
   }
   
   {
@@ -1314,6 +1327,152 @@
     m.SetValue(DICOM_TAG_PATIENT_NAME, "HELLO", false);
 
     ParsedDicomFile d(m, Encoding_Latin3 /* default encoding */);
-    ASSERT_EQ(Encoding_Latin3, d.GetEncoding());
+
+    bool hasCodeExtensions;
+    ASSERT_EQ(Encoding_Latin3, d.DetectEncoding(hasCodeExtensions));
+    ASSERT_FALSE(hasCodeExtensions);
   }
 }
+
+
+
+TEST(Toolbox, RemoveIso2022EscapeSequences)
+{
+  // +----------------------------------+
+  // | one-byte control messages        |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_oneByteControl[] = {
+    0x0f, 0x41, 
+    0x0e, 0x42, 
+    0x8e, 0x1b, 0x4e, 0x43, 
+    0x8f, 0x1b, 0x4f, 0x44,
+    0x8e, 0x1b, 0x4a, 0x45, 
+    0x8f, 0x1b, 0x4a, 0x46,
+    0x50, 0x51, 0x52, 0x00
+  };
+  
+  static const uint8_t iso2022_cstr_oneByteControl_ref[] = {
+    0x41,
+    0x42,
+    0x43,
+    0x44,
+    0x8e, 0x1b, 0x4a, 0x45, 
+    0x8f, 0x1b, 0x4a, 0x46,
+    0x50, 0x51, 0x52, 0x00
+  };
+
+  // +----------------------------------+
+  // | two-byte control messages        |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_twoByteControl[] = {
+    0x1b, 0x6e, 0x41,
+    0x1b, 0x6f, 0x42,
+    0x1b, 0x4e, 0x43,
+    0x1b, 0x4f, 0x44,
+    0x1b, 0x7e, 0x45,
+    0x1b, 0x7d, 0x46,
+    0x1b, 0x7c, 0x47, 0x00
+  };
+  
+  static const uint8_t iso2022_cstr_twoByteControl_ref[] = {
+    0x41,
+    0x42,
+    0x43,
+    0x44,
+    0x45,
+    0x46,
+    0x47, 0x00
+  };
+
+  // +----------------------------------+
+  // | various-length escape sequences  |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_escapeSequence[] = {
+    0x1b, 0x40, 0x41, // 1b and 40 should not be removed (invalid esc seq)
+    0x1b, 0x50, 0x42, // ditto 
+    0x1b, 0x7f, 0x43, // ditto
+    0x1b, 0x21, 0x4a, 0x44, // this will match
+    0x1b, 0x20, 0x21, 0x2f, 0x40, 0x45, // this will match
+    0x1b, 0x20, 0x21, 0x2f, 0x2f, 0x40, 0x46, // this will match too
+    0x1b, 0x20, 0x21, 0x2f, 0x1f, 0x47, 0x48, 0x00 // this will NOT match!
+  };
+  
+  static const uint8_t iso2022_cstr_escapeSequence_ref[] = {
+    0x1b, 0x40, 0x41, // 1b and 40 should not be removed (invalid esc seq)
+    0x1b, 0x50, 0x42, // ditto 
+    0x1b, 0x7f, 0x43, // ditto
+    0x44, // this will match
+    0x45, // this will match
+    0x46, // this will match too
+    0x1b, 0x20, 0x21, 0x2f, 0x1f, 0x47, 0x48, 0x00 // this will NOT match!
+  };
+
+  
+  // +----------------------------------+
+  // | a real-world japanese sample     |
+  // +----------------------------------+
+
+  static const uint8_t iso2022_cstr_real_ir13[] = {
+    0xd4, 0xcf, 0xc0, 0xde, 0x5e, 0xc0, 0xdb, 0xb3,
+    0x3d, 0x1b, 0x24, 0x42, 0x3b, 0x33, 0x45, 0x44,
+    0x1b, 0x28, 0x4a, 0x5e, 0x1b, 0x24, 0x42, 0x42,
+    0x40, 0x4f, 0x3a, 0x1b, 0x28, 0x4a, 0x3d, 0x1b,
+    0x24, 0x42, 0x24, 0x64, 0x24, 0x5e, 0x24, 0x40,
+    0x1b, 0x28, 0x4a, 0x5e, 0x1b, 0x24, 0x42, 0x24,
+    0x3f, 0x24, 0x6d, 0x24, 0x26, 0x1b, 0x28, 0x4a, 0x00
+  };
+
+  static const uint8_t iso2022_cstr_real_ir13_ref[] = {
+    0xd4, 0xcf, 0xc0, 0xde, 0x5e, 0xc0, 0xdb, 0xb3,
+    0x3d,
+    0x3b, 0x33, 0x45, 0x44,
+    0x5e,
+    0x42,
+    0x40, 0x4f, 0x3a,
+    0x3d,
+    0x24, 0x64, 0x24, 0x5e, 0x24, 0x40,
+    0x5e,
+    0x24,
+    0x3f, 0x24, 0x6d, 0x24, 0x26, 0x00
+  };
+
+
+
+  // +----------------------------------+
+  // | the actual test                  |
+  // +----------------------------------+
+
+  std::string iso2022_str_oneByteControl(
+    reinterpret_cast<const char*>(iso2022_cstr_oneByteControl));
+  std::string iso2022_str_oneByteControl_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_oneByteControl_ref));
+  std::string iso2022_str_twoByteControl(
+    reinterpret_cast<const char*>(iso2022_cstr_twoByteControl));
+  std::string iso2022_str_twoByteControl_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_twoByteControl_ref));
+  std::string iso2022_str_escapeSequence(
+    reinterpret_cast<const char*>(iso2022_cstr_escapeSequence));
+  std::string iso2022_str_escapeSequence_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_escapeSequence_ref));
+  std::string iso2022_str_real_ir13(
+    reinterpret_cast<const char*>(iso2022_cstr_real_ir13));
+  std::string iso2022_str_real_ir13_ref(
+    reinterpret_cast<const char*>(iso2022_cstr_real_ir13_ref));
+
+  std::string dest;
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_oneByteControl);
+  ASSERT_EQ(dest, iso2022_str_oneByteControl_ref);
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_twoByteControl);
+  ASSERT_EQ(dest, iso2022_str_twoByteControl_ref);
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_escapeSequence);
+  ASSERT_EQ(dest, iso2022_str_escapeSequence_ref);
+
+  Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_real_ir13);
+  ASSERT_EQ(dest, iso2022_str_real_ir13_ref);
+}