# HG changeset patch # User Sebastien Jodogne # Date 1466669307 -7200 # Node ID 35ccba7adae954df41913ecac8d1e38d6bf1c76a # Parent 5b93382f88e181a357cdbb9758906d182b8bbb37 Toolbox::UriEncode diff -r 5b93382f88e1 -r 35ccba7adae9 Core/Toolbox.cpp --- a/Core/Toolbox.cpp Wed Jun 22 15:31:07 2016 +0200 +++ b/Core/Toolbox.cpp Thu Jun 23 10:08:27 2016 +0200 @@ -1506,4 +1506,63 @@ return fopen(path.c_str(), m); } + + + + static bool IsUnreservedCharacter(char c) + { + // This function checks whether "c" is an unserved character + // wrt. an URI percent-encoding + // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding%5Fin%5Fa%5FURI + + return ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '-' || + c == '_' || + c == '.' || + c == '~'); + } + + void Toolbox::UriEncode(std::string& target, + const std::string& source) + { + // Estimate the length of the percent-encoded URI + size_t length = 0; + + for (size_t i = 0; i < source.size(); i++) + { + if (IsUnreservedCharacter(source[i])) + { + length += 1; + } + else + { + // This character must be percent-encoded + length += 3; + } + } + + target.clear(); + target.reserve(length); + + for (size_t i = 0; i < source.size(); i++) + { + if (IsUnreservedCharacter(source[i])) + { + target.push_back(source[i]); + } + else + { + // This character must be percent-encoded + uint8_t byte = static_cast(source[i]); + uint8_t a = byte >> 4; + uint8_t b = byte & 0x0f; + + target.push_back('%'); + target.push_back(a < 10 ? a + '0' : a - 10 + 'A'); + target.push_back(b < 10 ? b + '0' : b - 10 + 'A'); + } + } + } } diff -r 5b93382f88e1 -r 35ccba7adae9 Core/Toolbox.h --- a/Core/Toolbox.h Wed Jun 22 15:31:07 2016 +0200 +++ b/Core/Toolbox.h Thu Jun 23 10:08:27 2016 +0200 @@ -199,5 +199,8 @@ FILE* OpenFile(const std::string& path, FileMode mode); + + void UriEncode(std::string& target, + const std::string& source); } } diff -r 5b93382f88e1 -r 35ccba7adae9 UnitTestsSources/UnitTestsMain.cpp --- a/UnitTestsSources/UnitTestsMain.cpp Wed Jun 22 15:31:07 2016 +0200 +++ b/UnitTestsSources/UnitTestsMain.cpp Thu Jun 23 10:08:27 2016 +0200 @@ -926,6 +926,25 @@ } +TEST(Toolbox, UriEncode) +{ + std::string s; + + // Unreserved characters must not be modified + std::string t = "aAzZ09.-~_"; + Toolbox::UriEncode(s, t); + ASSERT_EQ(t, s); + + Toolbox::UriEncode(s, "!#$&'()*+,/:;=?@[]"); ASSERT_EQ("%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D", s); + Toolbox::UriEncode(s, "%"); ASSERT_EQ("%25", s); + + // Encode characters from UTF-8. This is the test string from the + // file "../Resources/EncodingTests.py" + Toolbox::UriEncode(s, "\x54\x65\x73\x74\xc3\xa9\xc3\xa4\xc3\xb6\xc3\xb2\xd0\x94\xce\x98\xc4\x9d\xd7\x93\xd8\xb5\xc4\xb7\xd1\x9b\xe0\xb9\x9b\xef\xbe\x88\xc4\xb0"); + ASSERT_EQ("Test%C3%A9%C3%A4%C3%B6%C3%B2%D0%94%CE%98%C4%9D%D7%93%D8%B5%C4%B7%D1%9B%E0%B9%9B%EF%BE%88%C4%B0", s); +} + + int main(int argc, char **argv) { Logging::Initialize();