Mercurial > hg > orthanc
comparison Core/DicomParsing/FromDcmtkBridge.cpp @ 3217:cf8cbeb35f33
preliminary support of Korean character set
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Wed, 13 Feb 2019 17:46:12 +0100 |
parents | f6374c36a671 |
children | 8ed445e94486 |
comparison
equal
deleted
inserted
replaced
3216:c9a71eb4edcf | 3217:cf8cbeb35f33 |
---|---|
412 locker->addEntry(entry.release()); | 412 locker->addEntry(entry.release()); |
413 } | 413 } |
414 } | 414 } |
415 | 415 |
416 | 416 |
417 Encoding FromDcmtkBridge::DetectEncoding(DcmItem& dataset, | 417 Encoding FromDcmtkBridge::DetectEncoding(bool& hasCodeExtensions, |
418 DcmItem& dataset, | |
418 Encoding defaultEncoding) | 419 Encoding defaultEncoding) |
419 { | 420 { |
420 Encoding encoding = defaultEncoding; | 421 // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2 |
421 | 422 |
422 OFString tmp; | 423 OFString tmp; |
423 if (dataset.findAndGetOFString(DCM_SpecificCharacterSet, tmp).good()) | 424 if (dataset.findAndGetOFStringArray(DCM_SpecificCharacterSet, tmp).good()) |
424 { | 425 { |
425 std::string characterSet = Toolbox::StripSpaces(std::string(tmp.c_str())); | 426 std::vector<std::string> tokens; |
426 | 427 Toolbox::TokenizeString(tokens, std::string(tmp.c_str()), '\\'); |
427 if (characterSet.empty()) | 428 |
428 { | 429 hasCodeExtensions = (tokens.size() > 1); |
429 // Empty specific character set tag: Use the default encoding | 430 |
430 } | 431 for (size_t i = 0; i < tokens.size(); i++) |
431 else if (GetDicomEncoding(encoding, characterSet.c_str())) | 432 { |
432 { | 433 std::string characterSet = Toolbox::StripSpaces(tokens[i]); |
433 // The specific character set is supported by the Orthanc core | 434 |
434 } | 435 if (!characterSet.empty()) |
435 else | 436 { |
436 { | 437 Encoding encoding; |
437 LOG(WARNING) << "Value of Specific Character Set (0008,0005) is not supported: " << characterSet | 438 |
438 << ", fallback to ASCII (remove all special characters)"; | 439 if (GetDicomEncoding(encoding, characterSet.c_str())) |
439 encoding = Encoding_Ascii; | 440 { |
441 // The specific character set is supported by the Orthanc core | |
442 return encoding; | |
443 } | |
444 else | |
445 { | |
446 LOG(WARNING) << "Value of Specific Character Set (0008,0005) is not supported: " << characterSet | |
447 << ", fallback to ASCII (remove all special characters)"; | |
448 return Encoding_Ascii; | |
449 } | |
450 } | |
440 } | 451 } |
441 } | 452 } |
442 else | 453 else |
443 { | 454 { |
444 // No specific character set tag: Use the default encoding | 455 hasCodeExtensions = false; |
445 } | 456 } |
446 | 457 |
447 return encoding; | 458 // No specific character set tag: Use the default encoding |
459 return defaultEncoding; | |
448 } | 460 } |
449 | 461 |
450 | 462 |
451 void FromDcmtkBridge::ExtractDicomSummary(DicomMap& target, | 463 void FromDcmtkBridge::ExtractDicomSummary(DicomMap& target, |
452 DcmItem& dataset, | 464 DcmItem& dataset, |
453 unsigned int maxStringLength, | 465 unsigned int maxStringLength, |
454 Encoding defaultEncoding) | 466 Encoding defaultEncoding) |
455 { | 467 { |
456 std::set<DicomTag> ignoreTagLength; | 468 std::set<DicomTag> ignoreTagLength; |
457 | 469 |
458 Encoding encoding = DetectEncoding(dataset, defaultEncoding); | 470 bool hasCodeExtensions; |
471 Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding); | |
459 | 472 |
460 target.Clear(); | 473 target.Clear(); |
461 for (unsigned long i = 0; i < dataset.card(); i++) | 474 for (unsigned long i = 0; i < dataset.card(); i++) |
462 { | 475 { |
463 DcmElement* element = dataset.getElement(i); | 476 DcmElement* element = dataset.getElement(i); |
464 if (element && element->isLeaf()) | 477 if (element && element->isLeaf()) |
465 { | 478 { |
466 target.SetValue(element->getTag().getGTag(), | 479 target.SetValue(element->getTag().getGTag(), |
467 element->getTag().getETag(), | 480 element->getTag().getETag(), |
468 ConvertLeafElement(*element, DicomToJsonFlags_Default, | 481 ConvertLeafElement(*element, DicomToJsonFlags_Default, |
469 maxStringLength, encoding, ignoreTagLength)); | 482 maxStringLength, encoding, hasCodeExtensions, ignoreTagLength)); |
470 } | 483 } |
471 } | 484 } |
472 } | 485 } |
473 | 486 |
474 | 487 |
486 | 499 |
487 DicomValue* FromDcmtkBridge::ConvertLeafElement(DcmElement& element, | 500 DicomValue* FromDcmtkBridge::ConvertLeafElement(DcmElement& element, |
488 DicomToJsonFlags flags, | 501 DicomToJsonFlags flags, |
489 unsigned int maxStringLength, | 502 unsigned int maxStringLength, |
490 Encoding encoding, | 503 Encoding encoding, |
504 bool hasCodeExtensions, | |
491 const std::set<DicomTag>& ignoreTagLength) | 505 const std::set<DicomTag>& ignoreTagLength) |
492 { | 506 { |
493 if (!element.isLeaf()) | 507 if (!element.isLeaf()) |
494 { | 508 { |
495 // This function is only applicable to leaf elements | 509 // This function is only applicable to leaf elements |
505 return new DicomValue("", false); | 519 return new DicomValue("", false); |
506 } | 520 } |
507 else | 521 else |
508 { | 522 { |
509 std::string s(c); | 523 std::string s(c); |
510 std::string utf8 = Toolbox::ConvertToUtf8(s, encoding); | 524 std::string utf8 = Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions); |
511 | 525 |
512 if (maxStringLength != 0 && | 526 if (maxStringLength != 0 && |
513 utf8.size() > maxStringLength && | 527 utf8.size() > maxStringLength && |
514 ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end()) | 528 ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end()) |
515 { | 529 { |
853 DcmElement& element, | 867 DcmElement& element, |
854 DicomToJsonFormat format, | 868 DicomToJsonFormat format, |
855 DicomToJsonFlags flags, | 869 DicomToJsonFlags flags, |
856 unsigned int maxStringLength, | 870 unsigned int maxStringLength, |
857 Encoding encoding, | 871 Encoding encoding, |
872 bool hasCodeExtensions, | |
858 const std::set<DicomTag>& ignoreTagLength) | 873 const std::set<DicomTag>& ignoreTagLength) |
859 { | 874 { |
860 if (parent.type() == Json::nullValue) | 875 if (parent.type() == Json::nullValue) |
861 { | 876 { |
862 parent = Json::objectValue; | 877 parent = Json::objectValue; |
867 | 882 |
868 if (element.isLeaf()) | 883 if (element.isLeaf()) |
869 { | 884 { |
870 // The "0" below lets "LeafValueToJson()" take care of "TooLong" values | 885 // The "0" below lets "LeafValueToJson()" take care of "TooLong" values |
871 std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement | 886 std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement |
872 (element, flags, 0, encoding, ignoreTagLength)); | 887 (element, flags, 0, encoding, hasCodeExtensions, ignoreTagLength)); |
873 | 888 |
874 if (ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end()) | 889 if (ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end()) |
875 { | 890 { |
876 LeafValueToJson(target, *v, format, flags, maxStringLength); | 891 LeafValueToJson(target, *v, format, flags, maxStringLength); |
877 } | 892 } |
892 | 907 |
893 for (unsigned long i = 0; i < sequence.card(); i++) | 908 for (unsigned long i = 0; i < sequence.card(); i++) |
894 { | 909 { |
895 DcmItem* child = sequence.getItem(i); | 910 DcmItem* child = sequence.getItem(i); |
896 Json::Value& v = target.append(Json::objectValue); | 911 Json::Value& v = target.append(Json::objectValue); |
897 DatasetToJson(v, *child, format, flags, maxStringLength, encoding, ignoreTagLength); | 912 DatasetToJson(v, *child, format, flags, maxStringLength, encoding, hasCodeExtensions, ignoreTagLength); |
898 } | 913 } |
899 } | 914 } |
900 } | 915 } |
901 | 916 |
902 | 917 |
904 DcmItem& item, | 919 DcmItem& item, |
905 DicomToJsonFormat format, | 920 DicomToJsonFormat format, |
906 DicomToJsonFlags flags, | 921 DicomToJsonFlags flags, |
907 unsigned int maxStringLength, | 922 unsigned int maxStringLength, |
908 Encoding encoding, | 923 Encoding encoding, |
924 bool hasCodeExtensions, | |
909 const std::set<DicomTag>& ignoreTagLength) | 925 const std::set<DicomTag>& ignoreTagLength) |
910 { | 926 { |
911 assert(parent.type() == Json::objectValue); | 927 assert(parent.type() == Json::objectValue); |
912 | 928 |
913 for (unsigned long i = 0; i < item.card(); i++) | 929 for (unsigned long i = 0; i < item.card(); i++) |
950 continue; | 966 continue; |
951 } | 967 } |
952 } | 968 } |
953 | 969 |
954 FromDcmtkBridge::ElementToJson(parent, *element, format, flags, | 970 FromDcmtkBridge::ElementToJson(parent, *element, format, flags, |
955 maxStringLength, encoding, ignoreTagLength); | 971 maxStringLength, encoding, hasCodeExtensions, ignoreTagLength); |
956 } | 972 } |
957 } | 973 } |
958 | 974 |
959 | 975 |
960 void FromDcmtkBridge::ExtractDicomAsJson(Json::Value& target, | 976 void FromDcmtkBridge::ExtractDicomAsJson(Json::Value& target, |
963 DicomToJsonFlags flags, | 979 DicomToJsonFlags flags, |
964 unsigned int maxStringLength, | 980 unsigned int maxStringLength, |
965 Encoding defaultEncoding, | 981 Encoding defaultEncoding, |
966 const std::set<DicomTag>& ignoreTagLength) | 982 const std::set<DicomTag>& ignoreTagLength) |
967 { | 983 { |
968 Encoding encoding = DetectEncoding(dataset, defaultEncoding); | 984 bool hasCodeExtensions; |
985 Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding); | |
969 | 986 |
970 target = Json::objectValue; | 987 target = Json::objectValue; |
971 DatasetToJson(target, dataset, format, flags, maxStringLength, encoding, ignoreTagLength); | 988 DatasetToJson(target, dataset, format, flags, maxStringLength, encoding, hasCodeExtensions, ignoreTagLength); |
972 } | 989 } |
973 | 990 |
974 | 991 |
975 void FromDcmtkBridge::ExtractHeaderAsJson(Json::Value& target, | 992 void FromDcmtkBridge::ExtractHeaderAsJson(Json::Value& target, |
976 DcmMetaInfo& dataset, | 993 DcmMetaInfo& dataset, |
978 DicomToJsonFlags flags, | 995 DicomToJsonFlags flags, |
979 unsigned int maxStringLength) | 996 unsigned int maxStringLength) |
980 { | 997 { |
981 std::set<DicomTag> ignoreTagLength; | 998 std::set<DicomTag> ignoreTagLength; |
982 target = Json::objectValue; | 999 target = Json::objectValue; |
983 DatasetToJson(target, dataset, format, flags, maxStringLength, Encoding_Ascii, ignoreTagLength); | 1000 DatasetToJson(target, dataset, format, flags, maxStringLength, Encoding_Ascii, false, ignoreTagLength); |
984 } | 1001 } |
985 | 1002 |
986 | 1003 |
987 | 1004 |
988 static std::string GetTagNameInternal(DcmTag& tag) | 1005 static std::string GetTagNameInternal(DcmTag& tag) |
2031 } | 2048 } |
2032 | 2049 |
2033 | 2050 |
2034 void FromDcmtkBridge::ChangeStringEncoding(DcmItem& dataset, | 2051 void FromDcmtkBridge::ChangeStringEncoding(DcmItem& dataset, |
2035 Encoding source, | 2052 Encoding source, |
2053 bool hasSourceCodeExtensions, | |
2036 Encoding target) | 2054 Encoding target) |
2037 { | 2055 { |
2038 // Recursive exploration of a dataset to change the encoding of | 2056 // Recursive exploration of a dataset to change the encoding of |
2039 // each string-like element | 2057 // each string-like element |
2040 | 2058 |
2053 char *c = NULL; | 2071 char *c = NULL; |
2054 if (element->isaString() && | 2072 if (element->isaString() && |
2055 element->getString(c).good() && | 2073 element->getString(c).good() && |
2056 c != NULL) | 2074 c != NULL) |
2057 { | 2075 { |
2058 std::string a = Toolbox::ConvertToUtf8(c, source); | 2076 std::string a = Toolbox::ConvertToUtf8(c, source, hasSourceCodeExtensions); |
2059 std::string b = Toolbox::ConvertFromUtf8(a, target); | 2077 std::string b = Toolbox::ConvertFromUtf8(a, target); |
2060 element->putString(b.c_str()); | 2078 element->putString(b.c_str()); |
2061 } | 2079 } |
2062 } | 2080 } |
2063 else | 2081 else |
2067 // etc. are not." The following dynamic_cast is thus OK. | 2085 // etc. are not." The following dynamic_cast is thus OK. |
2068 DcmSequenceOfItems& sequence = dynamic_cast<DcmSequenceOfItems&>(*element); | 2086 DcmSequenceOfItems& sequence = dynamic_cast<DcmSequenceOfItems&>(*element); |
2069 | 2087 |
2070 for (unsigned long j = 0; j < sequence.card(); j++) | 2088 for (unsigned long j = 0; j < sequence.card(); j++) |
2071 { | 2089 { |
2072 ChangeStringEncoding(*sequence.getItem(j), source, target); | 2090 ChangeStringEncoding(*sequence.getItem(j), source, hasSourceCodeExtensions, target); |
2073 } | 2091 } |
2074 } | 2092 } |
2075 } | 2093 } |
2076 } | 2094 } |
2077 } | 2095 } |
2190 // Forward declaration | 2208 // Forward declaration |
2191 static void ApplyVisitorToElement(DcmElement& element, | 2209 static void ApplyVisitorToElement(DcmElement& element, |
2192 ITagVisitor& visitor, | 2210 ITagVisitor& visitor, |
2193 const std::vector<DicomTag>& parentTags, | 2211 const std::vector<DicomTag>& parentTags, |
2194 const std::vector<size_t>& parentIndexes, | 2212 const std::vector<size_t>& parentIndexes, |
2195 Encoding encoding); | 2213 Encoding encoding, |
2214 bool hasCodeExtensions); | |
2196 | 2215 |
2197 static void ApplyVisitorToDataset(DcmItem& dataset, | 2216 static void ApplyVisitorToDataset(DcmItem& dataset, |
2198 ITagVisitor& visitor, | 2217 ITagVisitor& visitor, |
2199 const std::vector<DicomTag>& parentTags, | 2218 const std::vector<DicomTag>& parentTags, |
2200 const std::vector<size_t>& parentIndexes, | 2219 const std::vector<size_t>& parentIndexes, |
2201 Encoding encoding) | 2220 Encoding encoding, |
2221 bool hasCodeExtensions) | |
2202 { | 2222 { |
2203 assert(parentTags.size() == parentIndexes.size()); | 2223 assert(parentTags.size() == parentIndexes.size()); |
2204 | 2224 |
2205 for (unsigned long i = 0; i < dataset.card(); i++) | 2225 for (unsigned long i = 0; i < dataset.card(); i++) |
2206 { | 2226 { |
2209 { | 2229 { |
2210 throw OrthancException(ErrorCode_InternalError); | 2230 throw OrthancException(ErrorCode_InternalError); |
2211 } | 2231 } |
2212 else | 2232 else |
2213 { | 2233 { |
2214 ApplyVisitorToElement(*element, visitor, parentTags, parentIndexes, encoding); | 2234 ApplyVisitorToElement(*element, visitor, parentTags, parentIndexes, encoding, hasCodeExtensions); |
2215 } | 2235 } |
2216 } | 2236 } |
2217 } | 2237 } |
2218 | 2238 |
2219 | 2239 |
2220 static void ApplyVisitorToLeaf(DcmElement& element, | 2240 static void ApplyVisitorToLeaf(DcmElement& element, |
2221 ITagVisitor& visitor, | 2241 ITagVisitor& visitor, |
2222 const std::vector<DicomTag>& parentTags, | 2242 const std::vector<DicomTag>& parentTags, |
2223 const std::vector<size_t>& parentIndexes, | 2243 const std::vector<size_t>& parentIndexes, |
2224 const DicomTag& tag, | 2244 const DicomTag& tag, |
2225 Encoding encoding) | 2245 Encoding encoding, |
2246 bool hasCodeExtensions) | |
2226 { | 2247 { |
2227 // TODO - Merge this function, that is more recent, with ConvertLeafElement() | 2248 // TODO - Merge this function, that is more recent, with ConvertLeafElement() |
2228 | 2249 |
2229 assert(element.isLeaf()); | 2250 assert(element.isLeaf()); |
2230 | 2251 |
2297 std::string utf8; | 2318 std::string utf8; |
2298 | 2319 |
2299 if (c != NULL) // This case corresponds to the empty string | 2320 if (c != NULL) // This case corresponds to the empty string |
2300 { | 2321 { |
2301 std::string s(c); | 2322 std::string s(c); |
2302 utf8 = Toolbox::ConvertToUtf8(s, encoding); | 2323 utf8 = Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions); |
2303 } | 2324 } |
2304 | 2325 |
2305 std::string newValue; | 2326 std::string newValue; |
2306 ITagVisitor::Action action = visitor.VisitString | 2327 ITagVisitor::Action action = visitor.VisitString |
2307 (newValue, parentTags, parentIndexes, tag, vr, utf8); | 2328 (newValue, parentTags, parentIndexes, tag, vr, utf8); |
2378 { | 2399 { |
2379 std::string ignored; | 2400 std::string ignored; |
2380 std::string s(reinterpret_cast<const char*>(data), l); | 2401 std::string s(reinterpret_cast<const char*>(data), l); |
2381 ITagVisitor::Action action = visitor.VisitString | 2402 ITagVisitor::Action action = visitor.VisitString |
2382 (ignored, parentTags, parentIndexes, tag, vr, | 2403 (ignored, parentTags, parentIndexes, tag, vr, |
2383 Toolbox::ConvertToUtf8(s, encoding)); | 2404 Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions)); |
2384 | 2405 |
2385 if (action != ITagVisitor::Action_None) | 2406 if (action != ITagVisitor::Action_None) |
2386 { | 2407 { |
2387 LOG(WARNING) << "Cannot replace this string tag: " | 2408 LOG(WARNING) << "Cannot replace this string tag: " |
2388 << FromDcmtkBridge::GetTagName(element) | 2409 << FromDcmtkBridge::GetTagName(element) |
2606 | 2627 |
2607 static void ApplyVisitorToElement(DcmElement& element, | 2628 static void ApplyVisitorToElement(DcmElement& element, |
2608 ITagVisitor& visitor, | 2629 ITagVisitor& visitor, |
2609 const std::vector<DicomTag>& parentTags, | 2630 const std::vector<DicomTag>& parentTags, |
2610 const std::vector<size_t>& parentIndexes, | 2631 const std::vector<size_t>& parentIndexes, |
2611 Encoding encoding) | 2632 Encoding encoding, |
2633 bool hasCodeExtensions) | |
2612 { | 2634 { |
2613 assert(parentTags.size() == parentIndexes.size()); | 2635 assert(parentTags.size() == parentIndexes.size()); |
2614 | 2636 |
2615 DicomTag tag(FromDcmtkBridge::Convert(element.getTag())); | 2637 DicomTag tag(FromDcmtkBridge::Convert(element.getTag())); |
2616 | 2638 |
2617 if (element.isLeaf()) | 2639 if (element.isLeaf()) |
2618 { | 2640 { |
2619 ApplyVisitorToLeaf(element, visitor, parentTags, parentIndexes, tag, encoding); | 2641 ApplyVisitorToLeaf(element, visitor, parentTags, parentIndexes, tag, encoding, hasCodeExtensions); |
2620 } | 2642 } |
2621 else | 2643 else |
2622 { | 2644 { |
2623 // "All subclasses of DcmElement except for DcmSequenceOfItems | 2645 // "All subclasses of DcmElement except for DcmSequenceOfItems |
2624 // are leaf nodes, while DcmSequenceOfItems, DcmItem, DcmDataset | 2646 // are leaf nodes, while DcmSequenceOfItems, DcmItem, DcmDataset |
2638 | 2660 |
2639 for (unsigned long i = 0; i < sequence.card(); i++) | 2661 for (unsigned long i = 0; i < sequence.card(); i++) |
2640 { | 2662 { |
2641 indexes.back() = static_cast<size_t>(i); | 2663 indexes.back() = static_cast<size_t>(i); |
2642 DcmItem* child = sequence.getItem(i); | 2664 DcmItem* child = sequence.getItem(i); |
2643 ApplyVisitorToDataset(*child, visitor, tags, indexes, encoding); | 2665 ApplyVisitorToDataset(*child, visitor, tags, indexes, encoding, hasCodeExtensions); |
2644 } | 2666 } |
2645 } | 2667 } |
2646 } | 2668 } |
2647 } | 2669 } |
2648 | 2670 |
2651 ITagVisitor& visitor, | 2673 ITagVisitor& visitor, |
2652 Encoding defaultEncoding) | 2674 Encoding defaultEncoding) |
2653 { | 2675 { |
2654 std::vector<DicomTag> parentTags; | 2676 std::vector<DicomTag> parentTags; |
2655 std::vector<size_t> parentIndexes; | 2677 std::vector<size_t> parentIndexes; |
2656 Encoding encoding = DetectEncoding(dataset, defaultEncoding); | 2678 bool hasCodeExtensions; |
2657 ApplyVisitorToDataset(dataset, visitor, parentTags, parentIndexes, encoding); | 2679 Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding); |
2680 ApplyVisitorToDataset(dataset, visitor, parentTags, parentIndexes, encoding, hasCodeExtensions); | |
2658 } | 2681 } |
2659 } | 2682 } |