comparison UnitTestsSources/FromDcmtkTests.cpp @ 3224:6399f5f25498

unit testing simplified chinese
author Sebastien Jodogne <s.jodogne@gmail.com>
date Thu, 14 Feb 2019 14:48:43 +0100
parents c94f23198946
children c85510b5f21d
comparison
equal deleted inserted replaced
3223:c94f23198946 3224:6399f5f25498
52 #include "../Plugins/Engine/PluginsEnumerations.h" 52 #include "../Plugins/Engine/PluginsEnumerations.h"
53 #include "../Resources/EncodingTests.h" 53 #include "../Resources/EncodingTests.h"
54 54
55 #include <dcmtk/dcmdata/dcelem.h> 55 #include <dcmtk/dcmdata/dcelem.h>
56 #include <dcmtk/dcmdata/dcdeftag.h> 56 #include <dcmtk/dcmdata/dcdeftag.h>
57 #include <boost/algorithm/string/predicate.hpp>
57 58
58 #if ORTHANC_ENABLE_PUGIXML == 1 59 #if ORTHANC_ENABLE_PUGIXML == 1
59 # include <pugixml.hpp> 60 # include <pugixml.hpp>
60 #endif 61 #endif
61 62
1663 1664
1664 node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Phonetic/GivenName"); 1665 node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Phonetic/GivenName");
1665 ASSERT_EQ(utf8.substr(37), node.node().text().as_string()); 1666 ASSERT_EQ(utf8.substr(37), node.node().text().as_string());
1666 #endif 1667 #endif
1667 } 1668 }
1669
1670
1671
1672 TEST(Toolbox, EncodingsChinese3)
1673 {
1674 // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.3.html
1675
1676 static const uint8_t chinese[] = {
1677 0x57, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f,
1678 0x6e, 0x67, 0x3d, 0xcd, 0xf5, 0x5e, 0xd0, 0xa1, 0xb6, 0xab, 0x3d
1679 };
1680
1681 ParsedDicomFile dicom(false);
1682 dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "GB18030");
1683 ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString
1684 (DCM_PatientName, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good());
1685
1686 std::string value;
1687 ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME));
1688
1689 std::vector<std::string> tokens;
1690 Orthanc::Toolbox::TokenizeString(tokens, value, '=');
1691 ASSERT_EQ(3u, tokens.size());
1692 ASSERT_EQ("Wang^XiaoDong", tokens[0]);
1693 ASSERT_TRUE(tokens[2].empty());
1694
1695 std::vector<std::string> middle;
1696 Orthanc::Toolbox::TokenizeString(middle, tokens[1], '^');
1697 ASSERT_EQ(2u, middle.size());
1698 ASSERT_EQ(3u, middle[0].size());
1699 ASSERT_EQ(6u, middle[1].size());
1700
1701 // CDF5 in GB18030
1702 ASSERT_EQ(static_cast<char>(0xe7), middle[0][0]);
1703 ASSERT_EQ(static_cast<char>(0x8e), middle[0][1]);
1704 ASSERT_EQ(static_cast<char>(0x8b), middle[0][2]);
1705
1706 // D0A1 in GB18030
1707 ASSERT_EQ(static_cast<char>(0xe5), middle[1][0]);
1708 ASSERT_EQ(static_cast<char>(0xb0), middle[1][1]);
1709 ASSERT_EQ(static_cast<char>(0x8f), middle[1][2]);
1710
1711 // B6AB in GB18030
1712 ASSERT_EQ(static_cast<char>(0xe4), middle[1][3]);
1713 ASSERT_EQ(static_cast<char>(0xb8), middle[1][4]);
1714 ASSERT_EQ(static_cast<char>(0x9c), middle[1][5]);
1715 }
1716
1717
1718 TEST(Toolbox, EncodingsChinese4)
1719 {
1720 // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.4.html
1721
1722 static const uint8_t chinese[] = {
1723 0x54, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e,
1724 0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0xd6, 0xd0, 0xce,
1725 0xc4, 0x2e, 0x0d, 0x0a, 0x54, 0x68, 0x65, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e,
1726 0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64,
1727 0x65, 0x73, 0xd6, 0xd0, 0xce, 0xc4, 0x2c, 0x20, 0x74, 0x6f, 0x6f, 0x2e, 0x0d,
1728 0x0a, 0x54, 0x68, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x20, 0x6c, 0x69,
1729 0x6e, 0x65, 0x2e, 0x0d, 0x0a
1730 };
1731
1732 static const uint8_t patternRaw[] = {
1733 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87
1734 };
1735
1736 const std::string pattern(reinterpret_cast<const char*>(patternRaw), sizeof(patternRaw));
1737
1738 ParsedDicomFile dicom(false);
1739 dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "GB18030");
1740 ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString
1741 (DCM_PatientComments, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good());
1742
1743 std::string value;
1744 ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_COMMENTS));
1745
1746 std::vector<std::string> lines;
1747 Orthanc::Toolbox::TokenizeString(lines, value, '\n');
1748 ASSERT_EQ(4u, lines.size());
1749 ASSERT_TRUE(boost::starts_with(lines[0], "The first line includes"));
1750 ASSERT_TRUE(boost::ends_with(lines[0], ".\r"));
1751 ASSERT_TRUE(lines[0].find(pattern) != std::string::npos);
1752 ASSERT_TRUE(boost::starts_with(lines[1], "The second line includes"));
1753 ASSERT_TRUE(boost::ends_with(lines[1], ", too.\r"));
1754 ASSERT_TRUE(lines[1].find(pattern) != std::string::npos);
1755 ASSERT_EQ("The third line.\r", lines[2]);
1756 ASSERT_FALSE(lines[1].find(pattern) == std::string::npos);
1757 ASSERT_TRUE(lines[3].empty());
1758 }