changeset 3224:6399f5f25498

unit testing simplified chinese
author Sebastien Jodogne <s.jodogne@gmail.com>
date Thu, 14 Feb 2019 14:48:43 +0100
parents c94f23198946
children c85510b5f21d
files TODO UnitTestsSources/FromDcmtkTests.cpp
diffstat 2 files changed, 91 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/TODO	Thu Feb 14 14:16:35 2019 +0100
+++ b/TODO	Thu Feb 14 14:48:43 2019 +0100
@@ -93,7 +93,6 @@
 * Support Supplementary Kanji set (ISO 2022 IR 159)
 * Implement the following unit tests:
   - Chinese:
-    http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_J.html
     http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_K.html
 
 
--- a/UnitTestsSources/FromDcmtkTests.cpp	Thu Feb 14 14:16:35 2019 +0100
+++ b/UnitTestsSources/FromDcmtkTests.cpp	Thu Feb 14 14:48:43 2019 +0100
@@ -54,6 +54,7 @@
 
 #include <dcmtk/dcmdata/dcelem.h>
 #include <dcmtk/dcmdata/dcdeftag.h>
+#include <boost/algorithm/string/predicate.hpp>
 
 #if ORTHANC_ENABLE_PUGIXML == 1
 #  include <pugixml.hpp>
@@ -1665,3 +1666,93 @@
   ASSERT_EQ(utf8.substr(37), node.node().text().as_string());
 #endif  
 }
+
+
+
+TEST(Toolbox, EncodingsChinese3)
+{
+  // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.3.html
+
+  static const uint8_t chinese[] = {
+    0x57, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f,
+    0x6e, 0x67, 0x3d, 0xcd, 0xf5, 0x5e, 0xd0, 0xa1, 0xb6, 0xab, 0x3d
+  };
+
+  ParsedDicomFile dicom(false);
+  dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "GB18030");
+  ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString
+              (DCM_PatientName, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good());
+
+  std::string value;
+  ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME));
+
+  std::vector<std::string> tokens;
+  Orthanc::Toolbox::TokenizeString(tokens, value, '=');
+  ASSERT_EQ(3u, tokens.size());
+  ASSERT_EQ("Wang^XiaoDong", tokens[0]);
+  ASSERT_TRUE(tokens[2].empty());
+
+  std::vector<std::string> middle;
+  Orthanc::Toolbox::TokenizeString(middle, tokens[1], '^');
+  ASSERT_EQ(2u, middle.size());
+  ASSERT_EQ(3u, middle[0].size());
+  ASSERT_EQ(6u, middle[1].size());
+
+  // CDF5 in GB18030
+  ASSERT_EQ(static_cast<char>(0xe7), middle[0][0]);
+  ASSERT_EQ(static_cast<char>(0x8e), middle[0][1]);
+  ASSERT_EQ(static_cast<char>(0x8b), middle[0][2]);
+
+  // D0A1 in GB18030
+  ASSERT_EQ(static_cast<char>(0xe5), middle[1][0]);
+  ASSERT_EQ(static_cast<char>(0xb0), middle[1][1]);
+  ASSERT_EQ(static_cast<char>(0x8f), middle[1][2]);
+
+  // B6AB in GB18030
+  ASSERT_EQ(static_cast<char>(0xe4), middle[1][3]);
+  ASSERT_EQ(static_cast<char>(0xb8), middle[1][4]);
+  ASSERT_EQ(static_cast<char>(0x9c), middle[1][5]);
+}
+
+
+TEST(Toolbox, EncodingsChinese4)
+{
+  // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.4.html
+
+  static const uint8_t chinese[] = {
+    0x54, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e,
+    0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0xd6, 0xd0, 0xce,
+    0xc4, 0x2e, 0x0d, 0x0a, 0x54, 0x68, 0x65, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e,
+    0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64,
+    0x65, 0x73, 0xd6, 0xd0, 0xce, 0xc4, 0x2c, 0x20, 0x74, 0x6f, 0x6f, 0x2e, 0x0d,
+    0x0a, 0x54, 0x68, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x20, 0x6c, 0x69,
+    0x6e, 0x65, 0x2e, 0x0d, 0x0a
+  };
+
+  static const uint8_t patternRaw[] = {
+    0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87
+  };
+
+  const std::string pattern(reinterpret_cast<const char*>(patternRaw), sizeof(patternRaw));
+
+  ParsedDicomFile dicom(false);
+  dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "GB18030");
+  ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString
+              (DCM_PatientComments, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good());
+
+  std::string value;
+  ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_COMMENTS));
+
+  std::vector<std::string> lines;
+  Orthanc::Toolbox::TokenizeString(lines, value, '\n');
+  ASSERT_EQ(4u, lines.size());
+  ASSERT_TRUE(boost::starts_with(lines[0], "The first line includes"));
+  ASSERT_TRUE(boost::ends_with(lines[0], ".\r"));
+  ASSERT_TRUE(lines[0].find(pattern) != std::string::npos);
+  ASSERT_TRUE(boost::starts_with(lines[1], "The second line includes"));
+  ASSERT_TRUE(boost::ends_with(lines[1], ", too.\r"));
+  ASSERT_TRUE(lines[1].find(pattern) != std::string::npos);
+  ASSERT_EQ("The third line.\r", lines[2]);
+  ASSERT_FALSE(lines[1].find(pattern) == std::string::npos);
+  ASSERT_TRUE(lines[3].empty());
+}