Mercurial > hg > orthanc
comparison Core/Toolbox.cpp @ 2907:0204af4ece6a
Remove invalid characters from badly-encoded UTF-8 strings
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Tue, 30 Oct 2018 13:53:29 +0100 |
parents | 2a504fef4ed7 |
children | ad0e7def3338 |
comparison
equal
deleted
inserted
replaced
2906:2a504fef4ed7 | 2907:0204af4ece6a |
---|---|
517 | 517 |
518 #if ORTHANC_ENABLE_LOCALE == 1 | 518 #if ORTHANC_ENABLE_LOCALE == 1 |
519 std::string Toolbox::ConvertToUtf8(const std::string& source, | 519 std::string Toolbox::ConvertToUtf8(const std::string& source, |
520 Encoding sourceEncoding) | 520 Encoding sourceEncoding) |
521 { | 521 { |
522 if (sourceEncoding == Encoding_Utf8) | 522 // The "::skip" flag makes boost skip invalid UTF-8 |
523 { | 523 // characters. This can occur in badly-encoded DICOM files. |
524 // Already in UTF-8: No conversion is required | 524 |
525 return source; | |
526 } | |
527 | |
528 if (sourceEncoding == Encoding_Ascii) | |
529 { | |
530 return ConvertToAscii(source); | |
531 } | |
532 | |
533 const char* encoding = GetBoostLocaleEncoding(sourceEncoding); | |
534 | |
535 try | 525 try |
536 { | 526 { |
537 return boost::locale::conv::to_utf<char>(source, encoding); | 527 if (sourceEncoding == Encoding_Utf8) |
528 { | |
529 // Already in UTF-8: No conversion is required | |
530 return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip); | |
531 } | |
532 else if (sourceEncoding == Encoding_Ascii) | |
533 { | |
534 return ConvertToAscii(source); | |
535 } | |
536 else | |
537 { | |
538 const char* encoding = GetBoostLocaleEncoding(sourceEncoding); | |
539 return boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip); | |
540 } | |
538 } | 541 } |
539 catch (std::runtime_error&) | 542 catch (std::runtime_error&) |
540 { | 543 { |
541 // Bad input string or bad encoding | 544 // Bad input string or bad encoding |
542 return ConvertToAscii(source); | 545 return ConvertToAscii(source); |
547 | 550 |
548 #if ORTHANC_ENABLE_LOCALE == 1 | 551 #if ORTHANC_ENABLE_LOCALE == 1 |
549 std::string Toolbox::ConvertFromUtf8(const std::string& source, | 552 std::string Toolbox::ConvertFromUtf8(const std::string& source, |
550 Encoding targetEncoding) | 553 Encoding targetEncoding) |
551 { | 554 { |
552 if (targetEncoding == Encoding_Utf8) | 555 // The "::skip" flag makes boost skip invalid UTF-8 |
553 { | 556 // characters. This can occur in badly-encoded DICOM files. |
554 // Already in UTF-8: No conversion is required | 557 |
555 return source; | |
556 } | |
557 | |
558 if (targetEncoding == Encoding_Ascii) | |
559 { | |
560 return ConvertToAscii(source); | |
561 } | |
562 | |
563 const char* encoding = GetBoostLocaleEncoding(targetEncoding); | |
564 | |
565 try | 558 try |
566 { | 559 { |
567 return boost::locale::conv::from_utf<char>(source, encoding); | 560 if (targetEncoding == Encoding_Utf8) |
561 { | |
562 // Already in UTF-8: No conversion is required. | |
563 return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip); | |
564 } | |
565 else if (targetEncoding == Encoding_Ascii) | |
566 { | |
567 return ConvertToAscii(source); | |
568 } | |
569 else | |
570 { | |
571 const char* encoding = GetBoostLocaleEncoding(targetEncoding); | |
572 return boost::locale::conv::from_utf<char>(source, encoding, boost::locale::conv::skip); | |
573 } | |
568 } | 574 } |
569 catch (std::runtime_error&) | 575 catch (std::runtime_error&) |
570 { | 576 { |
571 // Bad input string or bad encoding | 577 // Bad input string or bad encoding |
572 return ConvertToAscii(source); | 578 return ConvertToAscii(source); |
1425 * uses std::string does not work properly. We need to apply it | 1431 * uses std::string does not work properly. We need to apply it |
1426 * one wide strings (std::wstring). This explains the two calls to | 1432 * one wide strings (std::wstring). This explains the two calls to |
1427 * "utf_to_utf" in order to convert to/from std::wstring. | 1433 * "utf_to_utf" in order to convert to/from std::wstring. |
1428 **/ | 1434 **/ |
1429 | 1435 |
1430 std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source); | 1436 std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip); |
1431 w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_); | 1437 w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_); |
1432 return boost::locale::conv::utf_to_utf<char>(w); | 1438 return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip); |
1433 } | 1439 } |
1434 #endif | 1440 #endif |
1435 | 1441 |
1436 | 1442 |
1437 void Toolbox::InitializeOpenSsl() | 1443 void Toolbox::InitializeOpenSsl() |