Mercurial > hg > orthanc
changeset 3226:45e1631febbb
support of simplified chinese
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Thu, 14 Feb 2019 15:25:41 +0100 |
parents | c85510b5f21d |
children | 53bb1f4b3844 |
files | Core/Enumerations.cpp Core/Enumerations.h Core/Toolbox.cpp NEWS Resources/Configuration.json TODO UnitTestsSources/FromDcmtkTests.cpp UnitTestsSources/UnitTestsMain.cpp |
diffstat | 8 files changed, 113 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/Core/Enumerations.cpp Thu Feb 14 14:56:31 2019 +0100 +++ b/Core/Enumerations.cpp Thu Feb 14 15:25:41 2019 +0100 @@ -653,6 +653,9 @@ case Encoding_JapaneseKanji: return "JapaneseKanji"; + case Encoding_SimplifiedChinese: + return "SimplifiedChinese"; + default: throw OrthancException(ErrorCode_ParameterOutOfRange); } @@ -1218,6 +1221,11 @@ return Encoding_JapaneseKanji; } + if (s == "SIMPLIFIEDCHINESE") + { + return Encoding_SimplifiedChinese; + } + throw OrthancException(ErrorCode_ParameterOutOfRange); } @@ -1881,15 +1889,15 @@ { encoding = Encoding_JapaneseKanji; } + else if (s == "ISO 2022 IR 58") + { + encoding = Encoding_SimplifiedChinese; + } /* else if (s == "ISO 2022 IR 159") { TODO - Supplementary Kanji set } - else if (s == "ISO 2022 IR 58") - { - TODO - Simplified Chinese - } */ else { @@ -2041,6 +2049,9 @@ case Encoding_JapaneseKanji: return "ISO 2022 IR 87"; + case Encoding_SimplifiedChinese: + return "ISO 2022 IR 58"; + default: throw OrthancException(ErrorCode_ParameterOutOfRange); }
--- a/Core/Enumerations.h Thu Feb 14 14:56:31 2019 +0100 +++ b/Core/Enumerations.h Thu Feb 14 15:25:41 2019 +0100 @@ -445,7 +445,8 @@ Encoding_Chinese, // GB18030 - Chinese simplified Encoding_JapaneseKanji, // Multibyte - JIS X 0208: Kanji //Encoding_JapaneseSupplementaryKanji, // Multibyte - JIS X 0212: Supplementary Kanji set - Encoding_Korean // Multibyte - KS X 1001: Hangul and Hanja + Encoding_Korean, // Multibyte - KS X 1001: Hangul and Hanja + Encoding_SimplifiedChinese // ISO 2022 IR 58 };
--- a/Core/Toolbox.cpp Thu Feb 14 14:56:31 2019 +0100 +++ b/Core/Toolbox.cpp Thu Feb 14 15:25:41 2019 +0100 @@ -507,6 +507,9 @@ case Encoding_JapaneseKanji: return "JIS"; + case Encoding_SimplifiedChinese: + return "GB2312"; + default: throw OrthancException(ErrorCode_NotImplemented); }
--- a/NEWS Thu Feb 14 14:56:31 2019 +0100 +++ b/NEWS Thu Feb 14 15:25:41 2019 +0100 @@ -2,7 +2,10 @@ =============================== * Separation of ideographic and phonetic characters in DICOMweb JSON and XML -* Support of Japanese Kanji (ISO 2022 IR 87) and Korean (ISO 2022 IR 149) encodings +* Support of the following multi-byte specific character sets: + - Japanese Kanji (ISO 2022 IR 87) + - Korean (ISO 2022 IR 149) + - Simplified Chinese (ISO 2022 IR 58) * Basic support for character sets with code extensions (ISO 2022 escape sequences)
--- a/Resources/Configuration.json Thu Feb 14 14:56:31 2019 +0100 +++ b/Resources/Configuration.json Thu Feb 14 15:25:41 2019 +0100 @@ -111,7 +111,8 @@ // C-Find requests (including worklists). The allowed values are // "Ascii", "Utf8", "Latin1", "Latin2", "Latin3", "Latin4", // "Latin5", "Cyrillic", "Windows1251", "Arabic", "Greek", "Hebrew", - // "Thai", "Japanese", "Chinese", "JapaneseKanji" and "Korean". + // "Thai", "Japanese", "Chinese", "JapaneseKanji", "Korean", and + // "SimplifiedChinese". "DefaultEncoding" : "Latin1", // The transfer syntaxes that are accepted by Orthanc C-Store SCP
--- a/TODO Thu Feb 14 14:56:31 2019 +0100 +++ b/TODO Thu Feb 14 15:25:41 2019 +0100 @@ -89,11 +89,7 @@ * Support multiple specific character sets (cf. "SCSH32" in orthanc-tests) - http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2 - Japanese test: http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_H.3.2.html -* Support Simplified Chinese (ISO 2022 IR 58) * Support Supplementary Kanji set (ISO 2022 IR 159) -* Implement the following unit tests: - - Chinese: - http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_K.html =======
--- a/UnitTestsSources/FromDcmtkTests.cpp Thu Feb 14 14:56:31 2019 +0100 +++ b/UnitTestsSources/FromDcmtkTests.cpp Thu Feb 14 15:25:41 2019 +0100 @@ -269,6 +269,7 @@ ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 87")); ASSERT_EQ(Encoding_JapaneseKanji, e); ASSERT_FALSE(GetDicomEncoding(e, "ISO 2022 IR 159")); //ASSERT_EQ(Encoding_JapaneseKanjiSupplementary, e); ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 149")); ASSERT_EQ(Encoding_Korean, e); + ASSERT_TRUE(GetDicomEncoding(e, "ISO 2022 IR 58")); ASSERT_EQ(Encoding_SimplifiedChinese, e); // http://dicom.nema.org/medical/dicom/current/output/html/part03.html#table_C.12-5 ASSERT_TRUE(GetDicomEncoding(e, "ISO_IR 192")); ASSERT_EQ(Encoding_Utf8, e); @@ -1521,7 +1522,7 @@ TEST(Toolbox, EncodingsKorean) { - // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_I.2.html + // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_I.2.html std::string korean = DecodeFromSpecification( "04/08 06/15 06/14 06/07 05/14 04/07 06/09 06/12 06/04 06/15 06/14 06/07 03/13 " @@ -1600,7 +1601,7 @@ TEST(Toolbox, EncodingsJapaneseKanji) { - // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_H.3.html + // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_H.3.html std::string japanese = DecodeFromSpecification( "05/09 06/01 06/13 06/01 06/04 06/01 05/14 05/04 06/01 07/02 06/15 07/05 03/13 " @@ -1681,7 +1682,7 @@ TEST(Toolbox, EncodingsChinese3) { - // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.3.html + // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_J.3.html static const uint8_t chinese[] = { 0x57, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f, @@ -1732,7 +1733,7 @@ TEST(Toolbox, EncodingsChinese4) { - // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_J.4.html + // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_J.4.html static const uint8_t chinese[] = { 0x54, 0x68, 0x65, 0x20, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, @@ -1776,3 +1777,84 @@ ASSERT_FALSE(lines[1].find(pattern) == std::string::npos); ASSERT_TRUE(lines[3].empty()); } + + +TEST(Toolbox, EncodingsSimplifiedChinese2) +{ + // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_K.2.html + + static const uint8_t chinese[] = { + 0x5a, 0x68, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f, + 0x6e, 0x67, 0x3d, 0x1b, 0x24, 0x29, 0x41, 0xd5, 0xc5, 0x5e, 0x1b, 0x24, + 0x29, 0x41, 0xd0, 0xa1, 0xb6, 0xab, 0x3d, 0x20 + }; + + // echo -n "Zhang^XiaoDong=..." | hexdump -v -e '14/1 "0x%02x, "' -e '"\n"' + static const uint8_t utf8[] = { + 0x5a, 0x68, 0x61, 0x6e, 0x67, 0x5e, 0x58, 0x69, 0x61, 0x6f, 0x44, 0x6f, 0x6e, 0x67, + 0x3d, 0xe5, 0xbc, 0xa0, 0x5e, 0xe5, 0xb0, 0x8f, 0xe4, 0xb8, 0x9c, 0x3d + }; + + ParsedDicomFile dicom(false); + dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "\\ISO 2022 IR 58"); + ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString + (DCM_PatientName, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good()); + + bool hasCodeExtensions; + Encoding encoding = dicom.DetectEncoding(hasCodeExtensions); + ASSERT_EQ(Encoding_SimplifiedChinese, encoding); + ASSERT_TRUE(hasCodeExtensions); + + std::string value; + ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME)); + ASSERT_EQ(value, std::string(reinterpret_cast<const char*>(utf8), sizeof(utf8))); +} + + +TEST(Toolbox, EncodingsSimplifiedChinese3) +{ + // http://dicom.nema.org/MEDICAL/dicom/current/output/chtml/part05/sect_K.2.html + + static const uint8_t chinese[] = { + 0x31, 0x2e, 0x1b, 0x24, 0x29, 0x41, 0xb5, 0xda, 0xd2, 0xbb, 0xd0, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xa1, 0xa3, 0x0d, 0x0a, + 0x32, 0x2e, 0x1b, 0x24, 0x29, 0x41, 0xb5, 0xda, 0xb6, 0xfe, 0xd0, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xa1, 0xa3, 0x0d, 0x0a, + 0x33, 0x2e, 0x1b, 0x24, 0x29, 0x41, 0xb5, 0xda, 0xc8, 0xfd, 0xd0, 0xd0, 0xce, 0xc4, 0xd7, 0xd6, 0xa1, 0xa3, 0x0d, 0x0a + }; + + static const uint8_t line1[] = { + 0x31, 0x2e, 0xe7, 0xac, 0xac, 0xe4, 0xb8, 0x80, 0xe8, 0xa1, 0x8c, 0xe6, 0x96, 0x87, + 0xe5, 0xad, 0x97, 0xe3, 0x80, 0x82, '\r' + }; + + static const uint8_t line2[] = { + 0x32, 0x2e, 0xe7, 0xac, 0xac, 0xe4, 0xba, 0x8c, 0xe8, 0xa1, 0x8c, 0xe6, 0x96, 0x87, + 0xe5, 0xad, 0x97, 0xe3, 0x80, 0x82, '\r' + }; + + static const uint8_t line3[] = { + 0x33, 0x2e, 0xe7, 0xac, 0xac, 0xe4, 0xb8, 0x89, 0xe8, 0xa1, 0x8c, 0xe6, 0x96, 0x87, + 0xe5, 0xad, 0x97, 0xe3, 0x80, 0x82, '\r' + }; + + ParsedDicomFile dicom(false); + dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "\\ISO 2022 IR 58"); + ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString + (DCM_PatientName, reinterpret_cast<const char*>(chinese), sizeof(chinese), true).good()); + + bool hasCodeExtensions; + Encoding encoding = dicom.DetectEncoding(hasCodeExtensions); + ASSERT_EQ(Encoding_SimplifiedChinese, encoding); + ASSERT_TRUE(hasCodeExtensions); + + std::string value; + ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME)); + + std::vector<std::string> lines; + Toolbox::TokenizeString(lines, value, '\n'); + ASSERT_EQ(4u, lines.size()); + ASSERT_EQ(std::string(reinterpret_cast<const char*>(line1), sizeof(line1)), lines[0]); + ASSERT_EQ(std::string(reinterpret_cast<const char*>(line2), sizeof(line2)), lines[1]); + ASSERT_EQ(std::string(reinterpret_cast<const char*>(line3), sizeof(line3)), lines[2]); + ASSERT_TRUE(lines[3].empty()); +} +
--- a/UnitTestsSources/UnitTestsMain.cpp Thu Feb 14 14:56:31 2019 +0100 +++ b/UnitTestsSources/UnitTestsMain.cpp Thu Feb 14 15:25:41 2019 +0100 @@ -692,6 +692,7 @@ ASSERT_EQ(Encoding_Thai, StringToEncoding(EnumerationToString(Encoding_Thai))); ASSERT_EQ(Encoding_Korean, StringToEncoding(EnumerationToString(Encoding_Korean))); ASSERT_EQ(Encoding_JapaneseKanji, StringToEncoding(EnumerationToString(Encoding_JapaneseKanji))); + ASSERT_EQ(Encoding_SimplifiedChinese, StringToEncoding(EnumerationToString(Encoding_SimplifiedChinese))); ASSERT_EQ(ResourceType_Patient, StringToResourceType(EnumerationToString(ResourceType_Patient))); ASSERT_EQ(ResourceType_Study, StringToResourceType(EnumerationToString(ResourceType_Study)));