diff UnitTestsSources/UnitTestsMain.cpp @ 2907:0204af4ece6a

Remove invalid characters from badly-encoded UTF-8 strings
author Sebastien Jodogne <s.jodogne@gmail.com>
date Tue, 30 Oct 2018 13:53:29 +0100
parents 2a504fef4ed7
children 9d277f8ad698
line wrap: on
line diff
--- a/UnitTestsSources/UnitTestsMain.cpp	Tue Oct 30 12:29:55 2018 +0100
+++ b/UnitTestsSources/UnitTestsMain.cpp	Tue Oct 30 13:53:29 2018 +0100
@@ -444,6 +444,19 @@
   ASSERT_EQ(0x00, static_cast<unsigned char>(utf8[14]));  // Null-terminated string
 }
 
+
+TEST(Toolbox, FixUtf8)
+{
+  // This is a Latin-1 test string: "crane" with a circumflex accent
+  const unsigned char latin1[] = { 0x63, 0x72, 0xe2, 0x6e, 0x65 };
+
+  std::string s((char*) &latin1[0], sizeof(latin1) / sizeof(char));
+
+  ASSERT_EQ(s, Toolbox::ConvertFromUtf8(Toolbox::ConvertToUtf8(s, Encoding_Latin1), Encoding_Latin1));
+  ASSERT_EQ("cre", Toolbox::ConvertToUtf8(s, Encoding_Utf8));
+}
+
+
 TEST(Toolbox, UrlDecode)
 {
   std::string s;