# HG changeset patch # User Sebastien Jodogne # Date 1550152123 -3600 # Node ID 6399f5f254980dfd076c85516ca75c0577efe721 # Parent c94f231989460e84a2f1bd22be46da678237667c unit testing simplified chinese diff -r c94f23198946 -r 6399f5f25498 TODO --- a/TODO Thu Feb 14 14:16:35 2019 +0100 +++ b/TODO Thu Feb 14 14:48:43 2019 +0100 @@ -93,7 +93,6 @@ * Support Supplementary Kanji set (ISO 2022 IR 159) * Implement the following unit tests: - Chinese: - http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_J.html http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_K.html diff -r c94f23198946 -r 6399f5f25498 UnitTestsSources/FromDcmtkTests.cpp --- a/UnitTestsSources/FromDcmtkTests.cpp Thu Feb 14 14:16:35 2019 +0100 +++ b/UnitTestsSources/FromDcmtkTests.cpp Thu Feb 14 14:48:43 2019 +0100 @@ -54,6 +54,7 @@ #include #include +#include #if ORTHANC_ENABLE_PUGIXML == 1 # include @@ -1665,3 +1666,93 @@ ASSERT_EQ(utf8.substr(37), node.node().text().as_string()); #endif } + + + +TEST(Toolbox, EncodingsChinese3) +{ + // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.3.html + + static const uint8_t chinese[] = { + 0x57, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f, + 0x6e, 0x67, 0x3d, 0xcd, 0xf5, 0x5e, 0xd0, 0xa1, 0xb6, 0xab, 0x3d + }; + + ParsedDicomFile dicom(false); + dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "GB18030"); + ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString + (DCM_PatientName, reinterpret_cast(chinese), sizeof(chinese), true).good()); + + std::string value; + ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME)); + + std::vector tokens; + Orthanc::Toolbox::TokenizeString(tokens, value, '='); + ASSERT_EQ(3u, tokens.size()); + ASSERT_EQ("Wang^XiaoDong", tokens[0]); + ASSERT_TRUE(tokens[2].empty()); + + std::vector middle; + Orthanc::Toolbox::TokenizeString(middle, tokens[1], '^'); + ASSERT_EQ(2u, middle.size()); + ASSERT_EQ(3u, middle[0].size()); + ASSERT_EQ(6u, middle[1].size()); + + // CDF5 in GB18030 + ASSERT_EQ(static_cast(0xe7), middle[0][0]); + ASSERT_EQ(static_cast(0x8e), middle[0][1]); + ASSERT_EQ(static_cast(0x8b), middle[0][2]); + + // D0A1 in GB18030 + ASSERT_EQ(static_cast(0xe5), middle[1][0]); + ASSERT_EQ(static_cast(0xb0), middle[1][1]); + ASSERT_EQ(static_cast(0x8f), middle[1][2]); + + // B6AB in GB18030 + ASSERT_EQ(static_cast(0xe4), middle[1][3]); + ASSERT_EQ(static_cast(0xb8), middle[1][4]); + ASSERT_EQ(static_cast(0x9c), middle[1][5]); +} + + +TEST(Toolbox, EncodingsChinese4) +{ + // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.4.html + + static const uint8_t chinese[] = { + 0x54, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, + 0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0xd6, 0xd0, 0xce, + 0xc4, 0x2e, 0x0d, 0x0a, 0x54, 0x68, 0x65, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, + 0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, + 0x65, 0x73, 0xd6, 0xd0, 0xce, 0xc4, 0x2c, 0x20, 0x74, 0x6f, 0x6f, 0x2e, 0x0d, + 0x0a, 0x54, 0x68, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x20, 0x6c, 0x69, + 0x6e, 0x65, 0x2e, 0x0d, 0x0a + }; + + static const uint8_t patternRaw[] = { + 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87 + }; + + const std::string pattern(reinterpret_cast(patternRaw), sizeof(patternRaw)); + + ParsedDicomFile dicom(false); + dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "GB18030"); + ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString + (DCM_PatientComments, reinterpret_cast(chinese), sizeof(chinese), true).good()); + + std::string value; + ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_COMMENTS)); + + std::vector lines; + Orthanc::Toolbox::TokenizeString(lines, value, '\n'); + ASSERT_EQ(4u, lines.size()); + ASSERT_TRUE(boost::starts_with(lines[0], "The first line includes")); + ASSERT_TRUE(boost::ends_with(lines[0], ".\r")); + ASSERT_TRUE(lines[0].find(pattern) != std::string::npos); + ASSERT_TRUE(boost::starts_with(lines[1], "The second line includes")); + ASSERT_TRUE(boost::ends_with(lines[1], ", too.\r")); + ASSERT_TRUE(lines[1].find(pattern) != std::string::npos); + ASSERT_EQ("The third line.\r", lines[2]); + ASSERT_FALSE(lines[1].find(pattern) == std::string::npos); + ASSERT_TRUE(lines[3].empty()); +}