# HG changeset patch # User Sebastien Jodogne # Date 1550149444 -3600 # Node ID 4be505c2ac565df203c7b4a2991fd7f13ac9cca6 # Parent 1a0b4db799e85563810a783c1f27113429511497 Separation of ideographic and phonetic characters in DICOMweb JSON and XML diff -r 1a0b4db799e8 -r 4be505c2ac56 Core/DicomParsing/DicomWebJsonVisitor.cpp --- a/Core/DicomParsing/DicomWebJsonVisitor.cpp Thu Feb 14 12:15:28 2019 +0100 +++ b/Core/DicomParsing/DicomWebJsonVisitor.cpp Thu Feb 14 14:04:04 2019 +0100 @@ -43,9 +43,12 @@ static const char* const KEY_ALPHABETIC = "Alphabetic"; +static const char* const KEY_IDEOGRAPHIC = "Ideographic"; +static const char* const KEY_PHONETIC = "Phonetic"; static const char* const KEY_BULK_DATA_URI = "BulkDataURI"; static const char* const KEY_INLINE_BINARY = "InlineBinary"; static const char* const KEY_SQ = "SQ"; +static const char* const KEY_TAG = "tag"; static const char* const KEY_VALUE = "Value"; static const char* const KEY_VR = "vr"; @@ -53,9 +56,42 @@ namespace Orthanc { #if ORTHANC_ENABLE_PUGIXML == 1 + static void DecomposeXmlPersonName(pugi::xml_node& target, + const std::string& source) + { + std::vector tokens; + Toolbox::TokenizeString(tokens, source, '^'); + + if (tokens.size() >= 1) + { + target.append_child("FamilyName").text() = tokens[0].c_str(); + } + + if (tokens.size() >= 2) + { + target.append_child("GivenName").text() = tokens[1].c_str(); + } + + if (tokens.size() >= 3) + { + target.append_child("MiddleName").text() = tokens[2].c_str(); + } + + if (tokens.size() >= 4) + { + target.append_child("NamePrefix").text() = tokens[3].c_str(); + } + + if (tokens.size() >= 5) + { + target.append_child("NameSuffix").text() = tokens[4].c_str(); + } + } + static void ExploreXmlDataset(pugi::xml_node& target, const Json::Value& source) { + // http://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_F.3.html#table_F.3.1-1 assert(source.type() == Json::objectValue); Json::Value::Members members = source.getMemberNames(); @@ -65,15 +101,15 @@ const Json::Value& content = source[members[i]]; assert(content.type() == Json::objectValue && - content.isMember("vr") && - content["vr"].type() == Json::stringValue); - const std::string vr = content["vr"].asString(); + content.isMember(KEY_VR) && + content[KEY_VR].type() == Json::stringValue); + const std::string vr = content[KEY_VR].asString(); const std::string keyword = FromDcmtkBridge::GetTagName(tag, ""); pugi::xml_node node = target.append_child("DicomAttribute"); - node.append_attribute("tag").set_value(members[i].c_str()); - node.append_attribute("vr").set_value(vr.c_str()); + node.append_attribute(KEY_TAG).set_value(members[i].c_str()); + node.append_attribute(KEY_VR).set_value(vr.c_str()); if (keyword != std::string(DcmTag_ERROR_TagName)) { @@ -99,40 +135,38 @@ } if (vr == "PN") { - if (content[KEY_VALUE][j].isMember(KEY_ALPHABETIC) && - content[KEY_VALUE][j][KEY_ALPHABETIC].type() == Json::stringValue) + bool hasAlphabetic = (content[KEY_VALUE][j].isMember(KEY_ALPHABETIC) && + content[KEY_VALUE][j][KEY_ALPHABETIC].type() == Json::stringValue); + + bool hasIdeographic = (content[KEY_VALUE][j].isMember(KEY_IDEOGRAPHIC) && + content[KEY_VALUE][j][KEY_IDEOGRAPHIC].type() == Json::stringValue); + + bool hasPhonetic = (content[KEY_VALUE][j].isMember(KEY_PHONETIC) && + content[KEY_VALUE][j][KEY_PHONETIC].type() == Json::stringValue); + + if (hasAlphabetic || + hasIdeographic || + hasPhonetic) { - std::vector tokens; - Toolbox::TokenizeString(tokens, content[KEY_VALUE][j][KEY_ALPHABETIC].asString(), '^'); - pugi::xml_node child = node.append_child("PersonName"); child.append_attribute("number").set_value(number.c_str()); - - pugi::xml_node name = child.append_child(KEY_ALPHABETIC); - - if (tokens.size() >= 1) + + if (hasAlphabetic) { - name.append_child("FamilyName").text() = tokens[0].c_str(); - } - - if (tokens.size() >= 2) - { - name.append_child("GivenName").text() = tokens[1].c_str(); + pugi::xml_node name = child.append_child(KEY_ALPHABETIC); + DecomposeXmlPersonName(name, content[KEY_VALUE][j][KEY_ALPHABETIC].asString()); } - - if (tokens.size() >= 3) + + if (hasIdeographic) { - name.append_child("MiddleName").text() = tokens[2].c_str(); + pugi::xml_node name = child.append_child(KEY_IDEOGRAPHIC); + DecomposeXmlPersonName(name, content[KEY_VALUE][j][KEY_IDEOGRAPHIC].asString()); } - - if (tokens.size() >= 4) + + if (hasPhonetic) { - name.append_child("NamePrefix").text() = tokens[3].c_str(); - } - - if (tokens.size() >= 5) - { - name.append_child("NameSuffix").text() = tokens[4].c_str(); + pugi::xml_node name = child.append_child(KEY_PHONETIC); + DecomposeXmlPersonName(name, content[KEY_VALUE][j][KEY_PHONETIC].asString()); } } } @@ -517,8 +551,25 @@ Json::Value value = Json::objectValue; if (!tokens[i].empty()) { - value[KEY_ALPHABETIC] = tokens[i]; + std::vector components; + Toolbox::TokenizeString(components, tokens[i], '='); + + if (components.size() >= 1) + { + value[KEY_ALPHABETIC] = components[0]; + } + + if (components.size() >= 2) + { + value[KEY_IDEOGRAPHIC] = components[1]; + } + + if (components.size() >= 3) + { + value[KEY_PHONETIC] = components[2]; + } } + node[KEY_VALUE].append(value); break; } diff -r 1a0b4db799e8 -r 4be505c2ac56 Core/Toolbox.cpp --- a/Core/Toolbox.cpp Thu Feb 14 12:15:28 2019 +0100 +++ b/Core/Toolbox.cpp Thu Feb 14 14:04:04 2019 +0100 @@ -464,63 +464,48 @@ case Encoding_Latin1: return "ISO-8859-1"; - break; case Encoding_Latin2: return "ISO-8859-2"; - break; case Encoding_Latin3: return "ISO-8859-3"; - break; case Encoding_Latin4: return "ISO-8859-4"; - break; case Encoding_Latin5: return "ISO-8859-9"; - break; case Encoding_Cyrillic: return "ISO-8859-5"; - break; case Encoding_Windows1251: return "WINDOWS-1251"; - break; case Encoding_Arabic: return "ISO-8859-6"; - break; case Encoding_Greek: return "ISO-8859-7"; - break; case Encoding_Hebrew: return "ISO-8859-8"; - break; case Encoding_Japanese: return "SHIFT-JIS"; - break; case Encoding_Chinese: return "GB18030"; - break; case Encoding_Thai: return "TIS620.2533-0"; - break; case Encoding_Korean: return "ISO-IR-149"; - break; case Encoding_JapaneseKanji: return "JIS"; - break; default: throw OrthancException(ErrorCode_NotImplemented); diff -r 1a0b4db799e8 -r 4be505c2ac56 NEWS --- a/NEWS Thu Feb 14 12:15:28 2019 +0100 +++ b/NEWS Thu Feb 14 14:04:04 2019 +0100 @@ -1,6 +1,7 @@ Pending changes in the mainline =============================== +* Separation of ideographic and phonetic characters in DICOMweb JSON and XML * Support of Japanese Kanji (ISO 2022 IR 87) and Korean (ISO 2022 IR 149) encodings * Basic support for character sets with code extensions (ISO 2022 escape sequences) diff -r 1a0b4db799e8 -r 4be505c2ac56 TODO --- a/TODO Thu Feb 14 12:15:28 2019 +0100 +++ b/TODO Thu Feb 14 14:04:04 2019 +0100 @@ -92,7 +92,6 @@ * Support Supplementary Kanji set (ISO 2022 IR 159) * Implement the following unit tests: - Japanese: http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_H.html - - Korean: http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_I.html - Chinese: http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_J.html http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/chapter_K.html diff -r 1a0b4db799e8 -r 4be505c2ac56 UnitTestsSources/FromDcmtkTests.cpp --- a/UnitTestsSources/FromDcmtkTests.cpp Thu Feb 14 12:15:28 2019 +0100 +++ b/UnitTestsSources/FromDcmtkTests.cpp Thu Feb 14 14:04:04 2019 +0100 @@ -36,6 +36,7 @@ #include "../Core/DicomNetworking/DicomFindAnswers.h" #include "../Core/DicomParsing/DicomModification.h" +#include "../Core/DicomParsing/DicomWebJsonVisitor.h" #include "../Core/DicomParsing/FromDcmtkBridge.h" #include "../Core/DicomParsing/Internals/DicomImageDecoder.h" #include "../Core/DicomParsing/ToDcmtkBridge.h" @@ -54,6 +55,10 @@ #include #include +#if ORTHANC_ENABLE_PUGIXML == 1 +# include +#endif + using namespace Orthanc; TEST(DicomFormat, Tag) @@ -1476,3 +1481,112 @@ Toolbox::RemoveIso2022EscapeSequences(dest, iso2022_str_real_ir13); ASSERT_EQ(dest, iso2022_str_real_ir13_ref); } + + + +static std::string DecodeFromSpecification(const std::string& s) +{ + std::vector tokens; + Toolbox::TokenizeString(tokens, s, ' '); + + std::string result; + result.resize(tokens.size()); + + for (size_t i = 0; i < tokens.size(); i++) + { + std::vector components; + Toolbox::TokenizeString(components, tokens[i], '/'); + + if (components.size() != 2) + { + throw; + } + + int a = boost::lexical_cast(components[0]); + int b = boost::lexical_cast(components[1]); + if (a < 0 || a > 15 || + b < 0 || b > 15) + { + throw; + } + + result[i] = static_cast(a * 16 + b); + } + + return result; +} + + + +TEST(Toolbox, EncodingsKorean) +{ + // http://dicom.nema.org/MEDICAL/dicom/2017c/output/chtml/part05/sect_I.2.html + + std::string korean = DecodeFromSpecification( + "04/08 06/15 06/14 06/07 05/14 04/07 06/09 06/12 06/04 06/15 06/14 06/07 03/13 " + "01/11 02/04 02/09 04/03 15/11 15/03 05/14 01/11 02/04 02/09 04/03 13/01 12/14 " + "13/04 13/07 03/13 01/11 02/04 02/09 04/03 12/08 10/11 05/14 01/11 02/04 02/09 " + "04/03 11/01 14/06 11/05 11/15"); + + // This array can be re-generated using command-line: + // echo -n "Hong^Gildong=..." | hexdump -v -e '14/1 "0x%02x, "' -e '"\n"' + static const uint8_t utf8raw[] = { + 0x48, 0x6f, 0x6e, 0x67, 0x5e, 0x47, 0x69, 0x6c, 0x64, 0x6f, 0x6e, 0x67, 0x3d, 0xe6, + 0xb4, 0xaa, 0x5e, 0xe5, 0x90, 0x89, 0xe6, 0xb4, 0x9e, 0x3d, 0xed, 0x99, 0x8d, 0x5e, + 0xea, 0xb8, 0xb8, 0xeb, 0x8f, 0x99 + }; + + std::string utf8(reinterpret_cast(utf8raw), sizeof(utf8raw)); + + ParsedDicomFile dicom(false); + dicom.ReplacePlainString(DICOM_TAG_SPECIFIC_CHARACTER_SET, "\\ISO 2022 IR 149"); + ASSERT_TRUE(dicom.GetDcmtkObject().getDataset()->putAndInsertString + (DCM_PatientName, korean.c_str(), korean.size(), true).good()); + + std::string value; + ASSERT_TRUE(dicom.GetTagValue(value, DICOM_TAG_PATIENT_NAME)); + ASSERT_EQ(utf8, value); + + DicomWebJsonVisitor visitor; + dicom.Apply(visitor); + ASSERT_EQ(utf8.substr(0, 12), visitor.GetResult()["00100010"]["Value"][0]["Alphabetic"].asString()); + ASSERT_EQ(utf8.substr(13, 10), visitor.GetResult()["00100010"]["Value"][0]["Ideographic"].asString()); + ASSERT_EQ(utf8.substr(24), visitor.GetResult()["00100010"]["Value"][0]["Phonetic"].asString()); + +#if ORTHANC_ENABLE_PUGIXML == 1 + // http://dicom.nema.org/medical/dicom/current/output/chtml/part18/sect_F.3.html#table_F.3.1-1 + std::string xml; + visitor.FormatXml(xml); + + pugi::xml_document doc; + doc.load_string(xml.c_str()); + + pugi::xpath_node node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00080005\"]/Value"); + ASSERT_STREQ("ISO_IR 192", node.node().text().as_string()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00080005\"]"); + ASSERT_STREQ("CS", node.node().attribute("vr").value()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]"); + ASSERT_STREQ("PN", node.node().attribute("vr").value()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Alphabetic/FamilyName"); + ASSERT_STREQ("Hong", node.node().text().as_string()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Alphabetic/GivenName"); + ASSERT_STREQ("Gildong", node.node().text().as_string()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Ideographic/FamilyName"); + ASSERT_EQ(utf8.substr(13, 3), node.node().text().as_string()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Ideographic/GivenName"); + ASSERT_EQ(utf8.substr(17, 6), node.node().text().as_string()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Phonetic/FamilyName"); + ASSERT_EQ(utf8.substr(24, 3), node.node().text().as_string()); + + node = doc.select_single_node("//NativeDicomModel/DicomAttribute[@tag=\"00100010\"]/PersonName/Phonetic/GivenName"); + ASSERT_EQ(utf8.substr(28), node.node().text().as_string()); +#endif +} +