comparison Core/DicomParsing/FromDcmtkBridge.cpp @ 3217:cf8cbeb35f33

preliminary support of Korean character set
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 13 Feb 2019 17:46:12 +0100
parents f6374c36a671
children 8ed445e94486
comparison
equal deleted inserted replaced
3216:c9a71eb4edcf 3217:cf8cbeb35f33
412 locker->addEntry(entry.release()); 412 locker->addEntry(entry.release());
413 } 413 }
414 } 414 }
415 415
416 416
417 Encoding FromDcmtkBridge::DetectEncoding(DcmItem& dataset, 417 Encoding FromDcmtkBridge::DetectEncoding(bool& hasCodeExtensions,
418 DcmItem& dataset,
418 Encoding defaultEncoding) 419 Encoding defaultEncoding)
419 { 420 {
420 Encoding encoding = defaultEncoding; 421 // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2
421 422
422 OFString tmp; 423 OFString tmp;
423 if (dataset.findAndGetOFString(DCM_SpecificCharacterSet, tmp).good()) 424 if (dataset.findAndGetOFStringArray(DCM_SpecificCharacterSet, tmp).good())
424 { 425 {
425 std::string characterSet = Toolbox::StripSpaces(std::string(tmp.c_str())); 426 std::vector<std::string> tokens;
426 427 Toolbox::TokenizeString(tokens, std::string(tmp.c_str()), '\\');
427 if (characterSet.empty()) 428
428 { 429 hasCodeExtensions = (tokens.size() > 1);
429 // Empty specific character set tag: Use the default encoding 430
430 } 431 for (size_t i = 0; i < tokens.size(); i++)
431 else if (GetDicomEncoding(encoding, characterSet.c_str())) 432 {
432 { 433 std::string characterSet = Toolbox::StripSpaces(tokens[i]);
433 // The specific character set is supported by the Orthanc core 434
434 } 435 if (!characterSet.empty())
435 else 436 {
436 { 437 Encoding encoding;
437 LOG(WARNING) << "Value of Specific Character Set (0008,0005) is not supported: " << characterSet 438
438 << ", fallback to ASCII (remove all special characters)"; 439 if (GetDicomEncoding(encoding, characterSet.c_str()))
439 encoding = Encoding_Ascii; 440 {
441 // The specific character set is supported by the Orthanc core
442 return encoding;
443 }
444 else
445 {
446 LOG(WARNING) << "Value of Specific Character Set (0008,0005) is not supported: " << characterSet
447 << ", fallback to ASCII (remove all special characters)";
448 return Encoding_Ascii;
449 }
450 }
440 } 451 }
441 } 452 }
442 else 453 else
443 { 454 {
444 // No specific character set tag: Use the default encoding 455 hasCodeExtensions = false;
445 } 456 }
446 457
447 return encoding; 458 // No specific character set tag: Use the default encoding
459 return defaultEncoding;
448 } 460 }
449 461
450 462
451 void FromDcmtkBridge::ExtractDicomSummary(DicomMap& target, 463 void FromDcmtkBridge::ExtractDicomSummary(DicomMap& target,
452 DcmItem& dataset, 464 DcmItem& dataset,
453 unsigned int maxStringLength, 465 unsigned int maxStringLength,
454 Encoding defaultEncoding) 466 Encoding defaultEncoding)
455 { 467 {
456 std::set<DicomTag> ignoreTagLength; 468 std::set<DicomTag> ignoreTagLength;
457 469
458 Encoding encoding = DetectEncoding(dataset, defaultEncoding); 470 bool hasCodeExtensions;
471 Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding);
459 472
460 target.Clear(); 473 target.Clear();
461 for (unsigned long i = 0; i < dataset.card(); i++) 474 for (unsigned long i = 0; i < dataset.card(); i++)
462 { 475 {
463 DcmElement* element = dataset.getElement(i); 476 DcmElement* element = dataset.getElement(i);
464 if (element && element->isLeaf()) 477 if (element && element->isLeaf())
465 { 478 {
466 target.SetValue(element->getTag().getGTag(), 479 target.SetValue(element->getTag().getGTag(),
467 element->getTag().getETag(), 480 element->getTag().getETag(),
468 ConvertLeafElement(*element, DicomToJsonFlags_Default, 481 ConvertLeafElement(*element, DicomToJsonFlags_Default,
469 maxStringLength, encoding, ignoreTagLength)); 482 maxStringLength, encoding, hasCodeExtensions, ignoreTagLength));
470 } 483 }
471 } 484 }
472 } 485 }
473 486
474 487
486 499
487 DicomValue* FromDcmtkBridge::ConvertLeafElement(DcmElement& element, 500 DicomValue* FromDcmtkBridge::ConvertLeafElement(DcmElement& element,
488 DicomToJsonFlags flags, 501 DicomToJsonFlags flags,
489 unsigned int maxStringLength, 502 unsigned int maxStringLength,
490 Encoding encoding, 503 Encoding encoding,
504 bool hasCodeExtensions,
491 const std::set<DicomTag>& ignoreTagLength) 505 const std::set<DicomTag>& ignoreTagLength)
492 { 506 {
493 if (!element.isLeaf()) 507 if (!element.isLeaf())
494 { 508 {
495 // This function is only applicable to leaf elements 509 // This function is only applicable to leaf elements
505 return new DicomValue("", false); 519 return new DicomValue("", false);
506 } 520 }
507 else 521 else
508 { 522 {
509 std::string s(c); 523 std::string s(c);
510 std::string utf8 = Toolbox::ConvertToUtf8(s, encoding); 524 std::string utf8 = Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions);
511 525
512 if (maxStringLength != 0 && 526 if (maxStringLength != 0 &&
513 utf8.size() > maxStringLength && 527 utf8.size() > maxStringLength &&
514 ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end()) 528 ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end())
515 { 529 {
853 DcmElement& element, 867 DcmElement& element,
854 DicomToJsonFormat format, 868 DicomToJsonFormat format,
855 DicomToJsonFlags flags, 869 DicomToJsonFlags flags,
856 unsigned int maxStringLength, 870 unsigned int maxStringLength,
857 Encoding encoding, 871 Encoding encoding,
872 bool hasCodeExtensions,
858 const std::set<DicomTag>& ignoreTagLength) 873 const std::set<DicomTag>& ignoreTagLength)
859 { 874 {
860 if (parent.type() == Json::nullValue) 875 if (parent.type() == Json::nullValue)
861 { 876 {
862 parent = Json::objectValue; 877 parent = Json::objectValue;
867 882
868 if (element.isLeaf()) 883 if (element.isLeaf())
869 { 884 {
870 // The "0" below lets "LeafValueToJson()" take care of "TooLong" values 885 // The "0" below lets "LeafValueToJson()" take care of "TooLong" values
871 std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement 886 std::auto_ptr<DicomValue> v(FromDcmtkBridge::ConvertLeafElement
872 (element, flags, 0, encoding, ignoreTagLength)); 887 (element, flags, 0, encoding, hasCodeExtensions, ignoreTagLength));
873 888
874 if (ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end()) 889 if (ignoreTagLength.find(GetTag(element)) == ignoreTagLength.end())
875 { 890 {
876 LeafValueToJson(target, *v, format, flags, maxStringLength); 891 LeafValueToJson(target, *v, format, flags, maxStringLength);
877 } 892 }
892 907
893 for (unsigned long i = 0; i < sequence.card(); i++) 908 for (unsigned long i = 0; i < sequence.card(); i++)
894 { 909 {
895 DcmItem* child = sequence.getItem(i); 910 DcmItem* child = sequence.getItem(i);
896 Json::Value& v = target.append(Json::objectValue); 911 Json::Value& v = target.append(Json::objectValue);
897 DatasetToJson(v, *child, format, flags, maxStringLength, encoding, ignoreTagLength); 912 DatasetToJson(v, *child, format, flags, maxStringLength, encoding, hasCodeExtensions, ignoreTagLength);
898 } 913 }
899 } 914 }
900 } 915 }
901 916
902 917
904 DcmItem& item, 919 DcmItem& item,
905 DicomToJsonFormat format, 920 DicomToJsonFormat format,
906 DicomToJsonFlags flags, 921 DicomToJsonFlags flags,
907 unsigned int maxStringLength, 922 unsigned int maxStringLength,
908 Encoding encoding, 923 Encoding encoding,
924 bool hasCodeExtensions,
909 const std::set<DicomTag>& ignoreTagLength) 925 const std::set<DicomTag>& ignoreTagLength)
910 { 926 {
911 assert(parent.type() == Json::objectValue); 927 assert(parent.type() == Json::objectValue);
912 928
913 for (unsigned long i = 0; i < item.card(); i++) 929 for (unsigned long i = 0; i < item.card(); i++)
950 continue; 966 continue;
951 } 967 }
952 } 968 }
953 969
954 FromDcmtkBridge::ElementToJson(parent, *element, format, flags, 970 FromDcmtkBridge::ElementToJson(parent, *element, format, flags,
955 maxStringLength, encoding, ignoreTagLength); 971 maxStringLength, encoding, hasCodeExtensions, ignoreTagLength);
956 } 972 }
957 } 973 }
958 974
959 975
960 void FromDcmtkBridge::ExtractDicomAsJson(Json::Value& target, 976 void FromDcmtkBridge::ExtractDicomAsJson(Json::Value& target,
963 DicomToJsonFlags flags, 979 DicomToJsonFlags flags,
964 unsigned int maxStringLength, 980 unsigned int maxStringLength,
965 Encoding defaultEncoding, 981 Encoding defaultEncoding,
966 const std::set<DicomTag>& ignoreTagLength) 982 const std::set<DicomTag>& ignoreTagLength)
967 { 983 {
968 Encoding encoding = DetectEncoding(dataset, defaultEncoding); 984 bool hasCodeExtensions;
985 Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding);
969 986
970 target = Json::objectValue; 987 target = Json::objectValue;
971 DatasetToJson(target, dataset, format, flags, maxStringLength, encoding, ignoreTagLength); 988 DatasetToJson(target, dataset, format, flags, maxStringLength, encoding, hasCodeExtensions, ignoreTagLength);
972 } 989 }
973 990
974 991
975 void FromDcmtkBridge::ExtractHeaderAsJson(Json::Value& target, 992 void FromDcmtkBridge::ExtractHeaderAsJson(Json::Value& target,
976 DcmMetaInfo& dataset, 993 DcmMetaInfo& dataset,
978 DicomToJsonFlags flags, 995 DicomToJsonFlags flags,
979 unsigned int maxStringLength) 996 unsigned int maxStringLength)
980 { 997 {
981 std::set<DicomTag> ignoreTagLength; 998 std::set<DicomTag> ignoreTagLength;
982 target = Json::objectValue; 999 target = Json::objectValue;
983 DatasetToJson(target, dataset, format, flags, maxStringLength, Encoding_Ascii, ignoreTagLength); 1000 DatasetToJson(target, dataset, format, flags, maxStringLength, Encoding_Ascii, false, ignoreTagLength);
984 } 1001 }
985 1002
986 1003
987 1004
988 static std::string GetTagNameInternal(DcmTag& tag) 1005 static std::string GetTagNameInternal(DcmTag& tag)
2031 } 2048 }
2032 2049
2033 2050
2034 void FromDcmtkBridge::ChangeStringEncoding(DcmItem& dataset, 2051 void FromDcmtkBridge::ChangeStringEncoding(DcmItem& dataset,
2035 Encoding source, 2052 Encoding source,
2053 bool hasSourceCodeExtensions,
2036 Encoding target) 2054 Encoding target)
2037 { 2055 {
2038 // Recursive exploration of a dataset to change the encoding of 2056 // Recursive exploration of a dataset to change the encoding of
2039 // each string-like element 2057 // each string-like element
2040 2058
2053 char *c = NULL; 2071 char *c = NULL;
2054 if (element->isaString() && 2072 if (element->isaString() &&
2055 element->getString(c).good() && 2073 element->getString(c).good() &&
2056 c != NULL) 2074 c != NULL)
2057 { 2075 {
2058 std::string a = Toolbox::ConvertToUtf8(c, source); 2076 std::string a = Toolbox::ConvertToUtf8(c, source, hasSourceCodeExtensions);
2059 std::string b = Toolbox::ConvertFromUtf8(a, target); 2077 std::string b = Toolbox::ConvertFromUtf8(a, target);
2060 element->putString(b.c_str()); 2078 element->putString(b.c_str());
2061 } 2079 }
2062 } 2080 }
2063 else 2081 else
2067 // etc. are not." The following dynamic_cast is thus OK. 2085 // etc. are not." The following dynamic_cast is thus OK.
2068 DcmSequenceOfItems& sequence = dynamic_cast<DcmSequenceOfItems&>(*element); 2086 DcmSequenceOfItems& sequence = dynamic_cast<DcmSequenceOfItems&>(*element);
2069 2087
2070 for (unsigned long j = 0; j < sequence.card(); j++) 2088 for (unsigned long j = 0; j < sequence.card(); j++)
2071 { 2089 {
2072 ChangeStringEncoding(*sequence.getItem(j), source, target); 2090 ChangeStringEncoding(*sequence.getItem(j), source, hasSourceCodeExtensions, target);
2073 } 2091 }
2074 } 2092 }
2075 } 2093 }
2076 } 2094 }
2077 } 2095 }
2190 // Forward declaration 2208 // Forward declaration
2191 static void ApplyVisitorToElement(DcmElement& element, 2209 static void ApplyVisitorToElement(DcmElement& element,
2192 ITagVisitor& visitor, 2210 ITagVisitor& visitor,
2193 const std::vector<DicomTag>& parentTags, 2211 const std::vector<DicomTag>& parentTags,
2194 const std::vector<size_t>& parentIndexes, 2212 const std::vector<size_t>& parentIndexes,
2195 Encoding encoding); 2213 Encoding encoding,
2214 bool hasCodeExtensions);
2196 2215
2197 static void ApplyVisitorToDataset(DcmItem& dataset, 2216 static void ApplyVisitorToDataset(DcmItem& dataset,
2198 ITagVisitor& visitor, 2217 ITagVisitor& visitor,
2199 const std::vector<DicomTag>& parentTags, 2218 const std::vector<DicomTag>& parentTags,
2200 const std::vector<size_t>& parentIndexes, 2219 const std::vector<size_t>& parentIndexes,
2201 Encoding encoding) 2220 Encoding encoding,
2221 bool hasCodeExtensions)
2202 { 2222 {
2203 assert(parentTags.size() == parentIndexes.size()); 2223 assert(parentTags.size() == parentIndexes.size());
2204 2224
2205 for (unsigned long i = 0; i < dataset.card(); i++) 2225 for (unsigned long i = 0; i < dataset.card(); i++)
2206 { 2226 {
2209 { 2229 {
2210 throw OrthancException(ErrorCode_InternalError); 2230 throw OrthancException(ErrorCode_InternalError);
2211 } 2231 }
2212 else 2232 else
2213 { 2233 {
2214 ApplyVisitorToElement(*element, visitor, parentTags, parentIndexes, encoding); 2234 ApplyVisitorToElement(*element, visitor, parentTags, parentIndexes, encoding, hasCodeExtensions);
2215 } 2235 }
2216 } 2236 }
2217 } 2237 }
2218 2238
2219 2239
2220 static void ApplyVisitorToLeaf(DcmElement& element, 2240 static void ApplyVisitorToLeaf(DcmElement& element,
2221 ITagVisitor& visitor, 2241 ITagVisitor& visitor,
2222 const std::vector<DicomTag>& parentTags, 2242 const std::vector<DicomTag>& parentTags,
2223 const std::vector<size_t>& parentIndexes, 2243 const std::vector<size_t>& parentIndexes,
2224 const DicomTag& tag, 2244 const DicomTag& tag,
2225 Encoding encoding) 2245 Encoding encoding,
2246 bool hasCodeExtensions)
2226 { 2247 {
2227 // TODO - Merge this function, that is more recent, with ConvertLeafElement() 2248 // TODO - Merge this function, that is more recent, with ConvertLeafElement()
2228 2249
2229 assert(element.isLeaf()); 2250 assert(element.isLeaf());
2230 2251
2297 std::string utf8; 2318 std::string utf8;
2298 2319
2299 if (c != NULL) // This case corresponds to the empty string 2320 if (c != NULL) // This case corresponds to the empty string
2300 { 2321 {
2301 std::string s(c); 2322 std::string s(c);
2302 utf8 = Toolbox::ConvertToUtf8(s, encoding); 2323 utf8 = Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions);
2303 } 2324 }
2304 2325
2305 std::string newValue; 2326 std::string newValue;
2306 ITagVisitor::Action action = visitor.VisitString 2327 ITagVisitor::Action action = visitor.VisitString
2307 (newValue, parentTags, parentIndexes, tag, vr, utf8); 2328 (newValue, parentTags, parentIndexes, tag, vr, utf8);
2378 { 2399 {
2379 std::string ignored; 2400 std::string ignored;
2380 std::string s(reinterpret_cast<const char*>(data), l); 2401 std::string s(reinterpret_cast<const char*>(data), l);
2381 ITagVisitor::Action action = visitor.VisitString 2402 ITagVisitor::Action action = visitor.VisitString
2382 (ignored, parentTags, parentIndexes, tag, vr, 2403 (ignored, parentTags, parentIndexes, tag, vr,
2383 Toolbox::ConvertToUtf8(s, encoding)); 2404 Toolbox::ConvertToUtf8(s, encoding, hasCodeExtensions));
2384 2405
2385 if (action != ITagVisitor::Action_None) 2406 if (action != ITagVisitor::Action_None)
2386 { 2407 {
2387 LOG(WARNING) << "Cannot replace this string tag: " 2408 LOG(WARNING) << "Cannot replace this string tag: "
2388 << FromDcmtkBridge::GetTagName(element) 2409 << FromDcmtkBridge::GetTagName(element)
2606 2627
2607 static void ApplyVisitorToElement(DcmElement& element, 2628 static void ApplyVisitorToElement(DcmElement& element,
2608 ITagVisitor& visitor, 2629 ITagVisitor& visitor,
2609 const std::vector<DicomTag>& parentTags, 2630 const std::vector<DicomTag>& parentTags,
2610 const std::vector<size_t>& parentIndexes, 2631 const std::vector<size_t>& parentIndexes,
2611 Encoding encoding) 2632 Encoding encoding,
2633 bool hasCodeExtensions)
2612 { 2634 {
2613 assert(parentTags.size() == parentIndexes.size()); 2635 assert(parentTags.size() == parentIndexes.size());
2614 2636
2615 DicomTag tag(FromDcmtkBridge::Convert(element.getTag())); 2637 DicomTag tag(FromDcmtkBridge::Convert(element.getTag()));
2616 2638
2617 if (element.isLeaf()) 2639 if (element.isLeaf())
2618 { 2640 {
2619 ApplyVisitorToLeaf(element, visitor, parentTags, parentIndexes, tag, encoding); 2641 ApplyVisitorToLeaf(element, visitor, parentTags, parentIndexes, tag, encoding, hasCodeExtensions);
2620 } 2642 }
2621 else 2643 else
2622 { 2644 {
2623 // "All subclasses of DcmElement except for DcmSequenceOfItems 2645 // "All subclasses of DcmElement except for DcmSequenceOfItems
2624 // are leaf nodes, while DcmSequenceOfItems, DcmItem, DcmDataset 2646 // are leaf nodes, while DcmSequenceOfItems, DcmItem, DcmDataset
2638 2660
2639 for (unsigned long i = 0; i < sequence.card(); i++) 2661 for (unsigned long i = 0; i < sequence.card(); i++)
2640 { 2662 {
2641 indexes.back() = static_cast<size_t>(i); 2663 indexes.back() = static_cast<size_t>(i);
2642 DcmItem* child = sequence.getItem(i); 2664 DcmItem* child = sequence.getItem(i);
2643 ApplyVisitorToDataset(*child, visitor, tags, indexes, encoding); 2665 ApplyVisitorToDataset(*child, visitor, tags, indexes, encoding, hasCodeExtensions);
2644 } 2666 }
2645 } 2667 }
2646 } 2668 }
2647 } 2669 }
2648 2670
2651 ITagVisitor& visitor, 2673 ITagVisitor& visitor,
2652 Encoding defaultEncoding) 2674 Encoding defaultEncoding)
2653 { 2675 {
2654 std::vector<DicomTag> parentTags; 2676 std::vector<DicomTag> parentTags;
2655 std::vector<size_t> parentIndexes; 2677 std::vector<size_t> parentIndexes;
2656 Encoding encoding = DetectEncoding(dataset, defaultEncoding); 2678 bool hasCodeExtensions;
2657 ApplyVisitorToDataset(dataset, visitor, parentTags, parentIndexes, encoding); 2679 Encoding encoding = DetectEncoding(hasCodeExtensions, dataset, defaultEncoding);
2680 ApplyVisitorToDataset(dataset, visitor, parentTags, parentIndexes, encoding, hasCodeExtensions);
2658 } 2681 }
2659 } 2682 }