comparison UnitTestsSources/FromDcmtkTests.cpp @ 3226:45e1631febbb

support of simplified chinese
author Sebastien Jodogne <s.jodogne@gmail.com>
date Thu, 14 Feb 2019 15:25:41 +0100
parents c85510b5f21d
children 4b9cfd92d1ae
comparison
equal deleted inserted replaced
3225:c85510b5f21d 3226:45e1631febbb
267 267
268 // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-4 268 // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-4
269 ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 87")); ASSERT_EQ(Encoding_JapaneseKanji, e); 269 ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 87")); ASSERT_EQ(Encoding_JapaneseKanji, e);
270 ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 159")); //ASSERT_EQ(Encoding_JapaneseKanjiSupplementary, e); 270 ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 159")); //ASSERT_EQ(Encoding_JapaneseKanjiSupplementary, e);
271 ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 149")); ASSERT_EQ(Encoding_Korean, e); 271 ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 149")); ASSERT_EQ(Encoding_Korean, e);
272 ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 58")); ASSERT_EQ(Encoding_SimplifiedChinese, e);
272 273
273 // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-5 274 // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-5
274 ASSERT_TRUE(GetDicomEncoding(e, "ISO_IR 192")); ASSERT_EQ(Encoding_Utf8, e); 275 ASSERT_TRUE(GetDicomEncoding(e, "ISO_IR 192")); ASSERT_EQ(Encoding_Utf8, e);
275 ASSERT_TRUE(GetDicomEncoding(e, "GB18030")); ASSERT_EQ(Encoding_Chinese, e); 276 ASSERT_TRUE(GetDicomEncoding(e, "GB18030")); ASSERT_EQ(Encoding_Chinese, e);
276 ASSERT_TRUE(GetDicomEncoding(e, "GBK")); ASSERT_EQ(Encoding_Chinese, e); 277 ASSERT_TRUE(GetDicomEncoding(e, "GBK")); ASSERT_EQ(Encoding_Chinese, e);
1519 1520
1520 1521
1521 1522
1522 TEST(Toolbox, EncodingsKorean) 1523 TEST(Toolbox, EncodingsKorean)
1523 { 1524 {
1524 // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_I.2.html 1525 // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_I.2.html
1525 1526
1526 std::string korean = DecodeFromSpecification( 1527 std::string korean = DecodeFromSpecification(
1527 "04/08 06/15 06/14 06/07 05/14 04/07 06/09 06/12 06/04 06/15 06/14 06/07 03/13 " 1528 "04/08 06/15 06/14 06/07 05/14 04/07 06/09 06/12 06/04 06/15 06/14 06/07 03/13 "
1528 "01/11 02/04 02/09 04/03 15/11 15/03 05/14 01/11 02/04 02/09 04/03 13/01 12/14 " 1529 "01/11 02/04 02/09 04/03 15/11 15/03 05/14 01/11 02/04 02/09 04/03 13/01 12/14 "
1529 "13/04 13/07 03/13 01/11 02/04 02/09 04/03 12/08 10/11 05/14 01/11 02/04 02/09 " 1530 "13/04 13/07 03/13 01/11 02/04 02/09 04/03 12/08 10/11 05/14 01/11 02/04 02/09 "
1598 1599
1599 1600
1600 1601
1601 TEST(Toolbox, EncodingsJapaneseKanji) 1602 TEST(Toolbox, EncodingsJapaneseKanji)
1602 { 1603 {
1603 // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_H.3.html 1604 // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_H.3.html
1604 1605
1605 std::string japanese = DecodeFromSpecification( 1606 std::string japanese = DecodeFromSpecification(
1606 "05/09 06/01 06/13 06/01 06/04 06/01 05/14 05/04 06/01 07/02 06/15 07/05 03/13 " 1607 "05/09 06/01 06/13 06/01 06/04 06/01 05/14 05/04 06/01 07/02 06/15 07/05 03/13 "
1607 "01/11 02/04 04/02 03/11 03/03 04/05 04/04 01/11 02/08 04/02 05/14 01/11 02/04 " 1608 "01/11 02/04 04/02 03/11 03/03 04/05 04/04 01/11 02/08 04/02 05/14 01/11 02/04 "
1608 "04/02 04/02 04/00 04/15 03/10 01/11 02/08 04/02 03/13 01/11 02/04 04/02 02/04 " 1609 "04/02 04/02 04/00 04/15 03/10 01/11 02/08 04/02 03/13 01/11 02/04 04/02 02/04 "
1679 1680
1680 1681
1681 1682
1682 TEST(Toolbox, EncodingsChinese3) 1683 TEST(Toolbox, EncodingsChinese3)
1683 { 1684 {
1684 // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.3.html 1685 // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_J.3.html
1685 1686
1686 static const uint8_t chinese[] = { 1687 static const uint8_t chinese[] = {
1687 0x57, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f, 1688 0x57, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f,
1688 0x6e, 0x67, 0x3d, 0xcd, 0xf5, 0x5e, 0xd0, 0xa1, 0xb6, 0xab, 0x3d 1689 0x6e, 0x67, 0x3d, 0xcd, 0xf5, 0x5e, 0xd0, 0xa1, 0xb6, 0xab, 0x3d
1689 }; 1690 };
1730 } 1731 }
1731 1732
1732 1733
1733 TEST(Toolbox, EncodingsChinese4) 1734 TEST(Toolbox, EncodingsChinese4)
1734 { 1735 {
1735 // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.4.html 1736 // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_J.4.html
1736 1737
1737 static const uint8_t chinese[] = { 1738 static const uint8_t chinese[] = {
1738 0x54, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, 1739 0x54, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e,
1739 0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0xd6, 0xd0, 0xce, 1740 0x65, 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0xd6, 0xd0, 0xce,
1740 0xc4, 0x2e, 0x0d, 0x0a, 0x54, 0x68, 0x65, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 1741 0xc4, 0x2e, 0x0d, 0x0a, 0x54, 0x68, 0x65, 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e,
1774 ASSERT_TRUE(lines[1].find(pattern) != std::string::npos); 1775 ASSERT_TRUE(lines[1].find(pattern) != std::string::npos);
1775 ASSERT_EQ("The third line.\r", lines[2]); 1776 ASSERT_EQ("The third line.\r", lines[2]);
1776 ASSERT_FALSE(lines[1].find(pattern) == std::string::npos); 1777 ASSERT_FALSE(lines[1].find(pattern) == std::string::npos);
1777 ASSERT_TRUE(lines[3].empty()); 1778 ASSERT_TRUE(lines[3].empty());
1778 } 1779 }
1780
1781
1782 TEST(Toolbox, EncodingsSimplifiedChinese2)
1783 {
1784 // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_K.2.html
1785
1786 static const uint8_t chinese[] = {
1787 0x5a, 0x68, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f,
1788 0x6e, 0x67, 0x3d, 0x1b, 0x24, 0x29, 0x41, 0xd5, 0xc5, 0x5e, 0x1b, 0x24,
1789 0x29, 0x41, 0xd0, 0xa1, 0xb6, 0xab, 0x3d, 0x20
1790 };
1791
1792 // echo -n "Zhang^XiaoDong=..." | hexdump -v -e '14/1 "0x%02x, "' -e '"\n"'
1793 static const uint8_t utf8[] = {
1794 0x5a, 0x68, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f, 0x6e, 0x67,
1795 0x3d, 0xe5, 0xbc, 0xa0, 0x5e, 0xe5, 0xb0, 0x8f, 0xe4, 0xb8, 0x9c, 0x3d
1796 };
1797
1798 ParsedDicomFile dicom(false);
1799 dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "\\ISO 2022 IR 58");
1800 ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString
1801 (DCM_PatientName, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good());
1802
1803 bool hasCodeExtensions;
1804 Encoding encoding = dicom.DetectEncoding(hasCodeExtensions);
1805 ASSERT_EQ(Encoding_SimplifiedChinese, encoding);
1806 ASSERT_TRUE(hasCodeExtensions);
1807
1808 std::string value;
1809 ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME));
1810 ASSERT_EQ(value, std::string(reinterpret_cast<const char*>(utf8), sizeof(utf8)));
1811 }
1812
1813
1814 TEST(Toolbox, EncodingsSimplifiedChinese3)
1815 {
1816 // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_K.2.html
1817
1818 static const uint8_t chinese[] = {
1819 0x31, 0x2e, 0x1b, 0x24, 0x29, 0x41, 0xb5, 0xda, 0xd2, 0xbb, 0xd0, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xa1, 0xa3, 0x0d, 0x0a,
1820 0x32, 0x2e, 0x1b, 0x24, 0x29, 0x41, 0xb5, 0xda, 0xb6, 0xfe, 0xd0, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xa1, 0xa3, 0x0d, 0x0a,
1821 0x33, 0x2e, 0x1b, 0x24, 0x29, 0x41, 0xb5, 0xda, 0xc8, 0xfd, 0xd0, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xa1, 0xa3, 0x0d, 0x0a
1822 };
1823
1824 static const uint8_t line1[] = {
1825 0x31, 0x2e, 0xe7, 0xac, 0xac, 0xe4, 0xb8, 0x80, 0xe8, 0xa1, 0x8c, 0xe6, 0x96, 0x87,
1826 0xe5, 0xad, 0x97, 0xe3, 0x80, 0x82, '\r'
1827 };
1828
1829 static const uint8_t line2[] = {
1830 0x32, 0x2e, 0xe7, 0xac, 0xac, 0xe4, 0xba, 0x8c, 0xe8, 0xa1, 0x8c, 0xe6, 0x96, 0x87,
1831 0xe5, 0xad, 0x97, 0xe3, 0x80, 0x82, '\r'
1832 };
1833
1834 static const uint8_t line3[] = {
1835 0x33, 0x2e, 0xe7, 0xac, 0xac, 0xe4, 0xb8, 0x89, 0xe8, 0xa1, 0x8c, 0xe6, 0x96, 0x87,
1836 0xe5, 0xad, 0x97, 0xe3, 0x80, 0x82, '\r'
1837 };
1838
1839 ParsedDicomFile dicom(false);
1840 dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "\\ISO 2022 IR 58");
1841 ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString
1842 (DCM_PatientName, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good());
1843
1844 bool hasCodeExtensions;
1845 Encoding encoding = dicom.DetectEncoding(hasCodeExtensions);
1846 ASSERT_EQ(Encoding_SimplifiedChinese, encoding);
1847 ASSERT_TRUE(hasCodeExtensions);
1848
1849 std::string value;
1850 ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME));
1851
1852 std::vector<std::string> lines;
1853 Toolbox::TokenizeString(lines, value, '\n');
1854 ASSERT_EQ(4u, lines.size());
1855 ASSERT_EQ(std::string(reinterpret_cast<const char*>(line1), sizeof(line1)), lines[0]);
1856 ASSERT_EQ(std::string(reinterpret_cast<const char*>(line2), sizeof(line2)), lines[1]);
1857 ASSERT_EQ(std::string(reinterpret_cast<const char*>(line3), sizeof(line3)), lines[2]);
1858 ASSERT_TRUE(lines[3].empty());
1859 }
1860