comparison Core/Toolbox.cpp @ 2907:0204af4ece6a

Remove invalid characters from badly-encoded UTF-8 strings
author Sebastien Jodogne <s.jodogne@gmail.com>
date Tue, 30 Oct 2018 13:53:29 +0100
parents 2a504fef4ed7
children ad0e7def3338
comparison
equal deleted inserted replaced
2906:2a504fef4ed7 2907:0204af4ece6a
517 517
518 #if ORTHANC_ENABLE_LOCALE == 1 518 #if ORTHANC_ENABLE_LOCALE == 1
519 std::string Toolbox::ConvertToUtf8(const std::string& source, 519 std::string Toolbox::ConvertToUtf8(const std::string& source,
520 Encoding sourceEncoding) 520 Encoding sourceEncoding)
521 { 521 {
522 if (sourceEncoding == Encoding_Utf8) 522 // The "::skip" flag makes boost skip invalid UTF-8
523 { 523 // characters. This can occur in badly-encoded DICOM files.
524 // Already in UTF-8: No conversion is required 524
525 return source;
526 }
527
528 if (sourceEncoding == Encoding_Ascii)
529 {
530 return ConvertToAscii(source);
531 }
532
533 const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
534
535 try 525 try
536 { 526 {
537 return boost::locale::conv::to_utf<char>(source, encoding); 527 if (sourceEncoding == Encoding_Utf8)
528 {
529 // Already in UTF-8: No conversion is required
530 return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
531 }
532 else if (sourceEncoding == Encoding_Ascii)
533 {
534 return ConvertToAscii(source);
535 }
536 else
537 {
538 const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
539 return boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip);
540 }
538 } 541 }
539 catch (std::runtime_error&) 542 catch (std::runtime_error&)
540 { 543 {
541 // Bad input string or bad encoding 544 // Bad input string or bad encoding
542 return ConvertToAscii(source); 545 return ConvertToAscii(source);
547 550
548 #if ORTHANC_ENABLE_LOCALE == 1 551 #if ORTHANC_ENABLE_LOCALE == 1
549 std::string Toolbox::ConvertFromUtf8(const std::string& source, 552 std::string Toolbox::ConvertFromUtf8(const std::string& source,
550 Encoding targetEncoding) 553 Encoding targetEncoding)
551 { 554 {
552 if (targetEncoding == Encoding_Utf8) 555 // The "::skip" flag makes boost skip invalid UTF-8
553 { 556 // characters. This can occur in badly-encoded DICOM files.
554 // Already in UTF-8: No conversion is required 557
555 return source;
556 }
557
558 if (targetEncoding == Encoding_Ascii)
559 {
560 return ConvertToAscii(source);
561 }
562
563 const char* encoding = GetBoostLocaleEncoding(targetEncoding);
564
565 try 558 try
566 { 559 {
567 return boost::locale::conv::from_utf<char>(source, encoding); 560 if (targetEncoding == Encoding_Utf8)
561 {
562 // Already in UTF-8: No conversion is required.
563 return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
564 }
565 else if (targetEncoding == Encoding_Ascii)
566 {
567 return ConvertToAscii(source);
568 }
569 else
570 {
571 const char* encoding = GetBoostLocaleEncoding(targetEncoding);
572 return boost::locale::conv::from_utf<char>(source, encoding, boost::locale::conv::skip);
573 }
568 } 574 }
569 catch (std::runtime_error&) 575 catch (std::runtime_error&)
570 { 576 {
571 // Bad input string or bad encoding 577 // Bad input string or bad encoding
572 return ConvertToAscii(source); 578 return ConvertToAscii(source);
1425 * uses std::string does not work properly. We need to apply it 1431 * uses std::string does not work properly. We need to apply it
1426 * one wide strings (std::wstring). This explains the two calls to 1432 * one wide strings (std::wstring). This explains the two calls to
1427 * "utf_to_utf" in order to convert to/from std::wstring. 1433 * "utf_to_utf" in order to convert to/from std::wstring.
1428 **/ 1434 **/
1429 1435
1430 std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source); 1436 std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip);
1431 w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_); 1437 w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_);
1432 return boost::locale::conv::utf_to_utf<char>(w); 1438 return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip);
1433 } 1439 }
1434 #endif 1440 #endif
1435 1441
1436 1442
1437 void Toolbox::InitializeOpenSsl() 1443 void Toolbox::InitializeOpenSsl()