Mercurial > hg > orthanc
diff Core/Toolbox.cpp @ 2907:0204af4ece6a
Remove invalid characters from badly-encoded UTF-8 strings
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Tue, 30 Oct 2018 13:53:29 +0100 |
parents | 2a504fef4ed7 |
children | ad0e7def3338 |
line wrap: on
line diff
--- a/Core/Toolbox.cpp Tue Oct 30 12:29:55 2018 +0100 +++ b/Core/Toolbox.cpp Tue Oct 30 13:53:29 2018 +0100 @@ -519,22 +519,25 @@ std::string Toolbox::ConvertToUtf8(const std::string& source, Encoding sourceEncoding) { - if (sourceEncoding == Encoding_Utf8) - { - // Already in UTF-8: No conversion is required - return source; - } - - if (sourceEncoding == Encoding_Ascii) - { - return ConvertToAscii(source); - } - - const char* encoding = GetBoostLocaleEncoding(sourceEncoding); - + // The "::skip" flag makes boost skip invalid UTF-8 + // characters. This can occur in badly-encoded DICOM files. + try { - return boost::locale::conv::to_utf<char>(source, encoding); + if (sourceEncoding == Encoding_Utf8) + { + // Already in UTF-8: No conversion is required + return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip); + } + else if (sourceEncoding == Encoding_Ascii) + { + return ConvertToAscii(source); + } + else + { + const char* encoding = GetBoostLocaleEncoding(sourceEncoding); + return boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip); + } } catch (std::runtime_error&) { @@ -549,22 +552,25 @@ std::string Toolbox::ConvertFromUtf8(const std::string& source, Encoding targetEncoding) { - if (targetEncoding == Encoding_Utf8) - { - // Already in UTF-8: No conversion is required - return source; - } - - if (targetEncoding == Encoding_Ascii) - { - return ConvertToAscii(source); - } - - const char* encoding = GetBoostLocaleEncoding(targetEncoding); - + // The "::skip" flag makes boost skip invalid UTF-8 + // characters. This can occur in badly-encoded DICOM files. + try { - return boost::locale::conv::from_utf<char>(source, encoding); + if (targetEncoding == Encoding_Utf8) + { + // Already in UTF-8: No conversion is required. + return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip); + } + else if (targetEncoding == Encoding_Ascii) + { + return ConvertToAscii(source); + } + else + { + const char* encoding = GetBoostLocaleEncoding(targetEncoding); + return boost::locale::conv::from_utf<char>(source, encoding, boost::locale::conv::skip); + } } catch (std::runtime_error&) { @@ -1427,9 +1433,9 @@ * "utf_to_utf" in order to convert to/from std::wstring. **/ - std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source); + std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip); w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_); - return boost::locale::conv::utf_to_utf<char>(w); + return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip); } #endif