changeset 3326:b21d4cc8e5d1

speed up base64 decoding + added tests
author Alain Mazy <alain@mazy.be>
date Thu, 21 Mar 2019 11:41:03 +0100
parents 2e7c5c15ba25
children 0f17fd02d401
files CMakeLists.txt Core/Toolbox.cpp Resources/ThirdParty/base64/base64.cpp UnitTestsSources/ToolboxTests.cpp UnitTestsSources/UnitTestsMain.cpp
diffstat 5 files changed, 180 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/CMakeLists.txt	Thu Mar 21 09:57:31 2019 +0100
+++ b/CMakeLists.txt	Thu Mar 21 11:41:03 2019 +0100
@@ -123,6 +123,7 @@
   UnitTestsSources/SQLiteTests.cpp
   UnitTestsSources/ServerIndexTests.cpp
   UnitTestsSources/StreamTests.cpp
+  UnitTestsSources/ToolboxTests.cpp
   UnitTestsSources/UnitTestsMain.cpp
   UnitTestsSources/VersionsTests.cpp
   UnitTestsSources/ZipTests.cpp
@@ -332,6 +333,7 @@
   ${GOOGLE_TEST_SOURCES}
   ${ORTHANC_UNIT_TESTS_PCH}
   ${ORTHANC_UNIT_TESTS_SOURCES}
+  ${BOOST_EXTENDED_SOURCES}
   )
 
 target_link_libraries(UnitTests
--- a/Core/Toolbox.cpp	Thu Mar 21 09:57:31 2019 +0100
+++ b/Core/Toolbox.cpp	Thu Mar 21 11:41:03 2019 +0100
@@ -398,6 +398,7 @@
   void Toolbox::EncodeBase64(std::string& result, 
                              const std::string& data)
   {
+    result.clear();
     base64_encode(result, data);
   }
 
@@ -416,6 +417,7 @@
       }
     }
 
+    result.clear();
     base64_decode(result, data);
   }
 
--- a/Resources/ThirdParty/base64/base64.cpp	Thu Mar 21 09:57:31 2019 +0100
+++ b/Resources/ThirdParty/base64/base64.cpp	Thu Mar 21 11:41:03 2019 +0100
@@ -23,16 +23,19 @@
 
    René Nyffenegger rene.nyffenegger@adp-gmbh.ch
 
+   ------------------------------
+   This version has been modified (changed the interface + use another decoding algorithm
+   inspired from https://stackoverflow.com/a/34571089 which was faster)
 */
 
 #include "base64.h"
 #include <string.h>
+#include <vector>
 
 static const std::string base64_chars = 
-             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-             "abcdefghijklmnopqrstuvwxyz"
-             "0123456789+/";
-
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    "abcdefghijklmnopqrstuvwxyz"
+    "0123456789+/";
 
 static inline bool is_base64(unsigned char c) {
   return (isalnum(c) || (c == '+') || (c == '/'));
@@ -41,7 +44,7 @@
 void base64_encode(std::string& result, const std::string& stringToEncode)
 {
   const unsigned char* bytes_to_encode = reinterpret_cast<const unsigned char*>
-    (stringToEncode.size() > 0 ? &stringToEncode[0] : NULL);
+      (stringToEncode.size() > 0 ? &stringToEncode[0] : NULL);
   size_t in_len = stringToEncode.size();
   
   result.reserve(result.size() + in_len * 4 / 3 + 10);
@@ -84,8 +87,8 @@
   }
 }
 
-
-void base64_decode(std::string& result, const std::string& encoded_string) {
+// old code from René Nyffenegger.  This code is slower
+void base64_decode_old(std::string& result, const std::string& encoded_string) {
   size_t in_len = encoded_string.size();
   int i = 0;
   int j = 0;
@@ -125,3 +128,34 @@
       result += char_array_3[j];
   }
 }
+
+
+// new code from https://stackoverflow.com/a/34571089
+// note that the encoding algorithm from this page was slower (and bugged !)
+// this code is not using std::vector::find
+
+static std::vector<int> decode_indexes;
+
+void base64_decode(std::string& result, const std::string &stringToDecode) {
+
+  result.reserve(result.size() + stringToDecode.size() * 3 / 4 + 10);
+
+  if (decode_indexes.size() != 256) // initialize the first time we pass here
+  {
+    decode_indexes.assign(256, -1);
+    for (int i=0; i<64; ++i)
+      decode_indexes[base64_chars[i]] = i;
+  }
+
+  int val=0, valb=-8;
+  for (std::string::const_iterator c = stringToDecode.begin(); c != stringToDecode.end(); ++c) {
+    if (decode_indexes[*c] == -1)
+      break;
+    val = (val<<6) + decode_indexes[*c];
+    valb += 6;
+    if (valb>=0) {
+      result.push_back(char((val>>valb)&0xFF));
+      valb-=8;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/UnitTestsSources/ToolboxTests.cpp	Thu Mar 21 11:41:03 2019 +0100
@@ -0,0 +1,135 @@
+/**
+ * Orthanc - A Lightweight, RESTful DICOM Store
+ * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
+ * Department, University Hospital of Liege, Belgium
+ * Copyright (C) 2017-2019 Osimis S.A., Belgium
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * In addition, as a special exception, the copyright holders of this
+ * program give permission to link the code of its release with the
+ * OpenSSL project's "OpenSSL" library (or with modified versions of it
+ * that use the same license as the "OpenSSL" library), and distribute
+ * the linked executables. You must obey the GNU General Public License
+ * in all respects for all of the code used other than "OpenSSL". If you
+ * modify file(s) with this exception, you may extend this exception to
+ * your version of the file(s), but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files
+ * in the program, then also delete it here.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ **/
+
+
+#include "PrecompiledHeadersUnitTests.h"
+#include "gtest/gtest.h"
+#include "../Core/Toolbox.h"
+
+using namespace Orthanc;
+
+TEST(Toolbox, Base64_allByteValues)
+{
+  std::string toEncode;
+  std::string base64Result;
+  std::string decodedResult;
+
+  size_t size = 2*256;
+  toEncode.reserve(size);
+  for (size_t i = 0; i < size; i++)
+    toEncode.push_back(i % 256);
+
+  Toolbox::EncodeBase64(base64Result, toEncode);
+  Toolbox::DecodeBase64(decodedResult, base64Result);
+
+  ASSERT_EQ(toEncode, decodedResult);
+}
+
+TEST(Toolbox, Base64_multipleSizes)
+{
+  std::string toEncode;
+  std::string base64Result;
+  std::string decodedResult;
+
+  for (size_t size = 0; size <= 5; size++)
+  {
+    printf("base64, testing size %zu\n", size);
+    toEncode.clear();
+    toEncode.reserve(size);
+    for (size_t i = 0; i < size; i++)
+      toEncode.push_back(i % 256);
+
+    Toolbox::EncodeBase64(base64Result, toEncode);
+    Toolbox::DecodeBase64(decodedResult, base64Result);
+
+    ASSERT_EQ(toEncode, decodedResult);
+  }
+}
+
+static std::string EncodeBase64Bis(const std::string& s)
+{
+  std::string result;
+  Toolbox::EncodeBase64(result, s);
+  return result;
+}
+
+
+TEST(Toolbox, Base64)
+{
+  ASSERT_EQ("", EncodeBase64Bis(""));
+  ASSERT_EQ("YQ==", EncodeBase64Bis("a"));
+
+  const std::string hello = "SGVsbG8gd29ybGQ=";
+  ASSERT_EQ(hello, EncodeBase64Bis("Hello world"));
+
+  std::string decoded;
+  Toolbox::DecodeBase64(decoded, hello);
+  ASSERT_EQ("Hello world", decoded);
+
+  // Invalid character
+  ASSERT_THROW(Toolbox::DecodeBase64(decoded, "?"), OrthancException);
+
+  // All the allowed characters
+  Toolbox::DecodeBase64(decoded, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=");
+}
+
+
+#if 0 // enable only when compiling in Release with a C++ 11 compiler
+#include <chrono> // I had troubles to link with boost::chrono ...
+
+TEST(Toolbox, Base64_largeString)
+{
+  std::string toEncode;
+  std::string base64Result;
+  std::string decodedResult;
+
+  size_t size = 10 * 1024 * 1024;
+  toEncode.reserve(size);
+  for (size_t i = 0; i < size; i++)
+    toEncode.push_back(i % 256);
+
+  std::chrono::high_resolution_clock::time_point start;
+  std::chrono::high_resolution_clock::time_point afterEncoding;
+  std::chrono::high_resolution_clock::time_point afterDecoding;
+
+  start = std::chrono::high_resolution_clock::now();
+  Orthanc::Toolbox::EncodeBase64(base64Result, toEncode);
+  afterEncoding = std::chrono::high_resolution_clock::now();
+  Orthanc::Toolbox::DecodeBase64(decodedResult, base64Result);
+  afterDecoding = std::chrono::high_resolution_clock::now();
+
+  ASSERT_EQ(toEncode, decodedResult);
+
+  printf("encoding took %zu ms\n", (std::chrono::duration_cast<std::chrono::milliseconds>(afterEncoding - start)));
+  printf("decoding took %zu ms\n", (std::chrono::duration_cast<std::chrono::milliseconds>(afterDecoding - afterEncoding)));
+}
+#endif
--- a/UnitTestsSources/UnitTestsMain.cpp	Thu Mar 21 09:57:31 2019 +0100
+++ b/UnitTestsSources/UnitTestsMain.cpp	Thu Mar 21 11:41:03 2019 +0100
@@ -374,34 +374,6 @@
   ASSERT_EQ("da39a3ee-5e6b4b0d-3255bfef-95601890-afd80709", s);
 }
 
-
-static std::string EncodeBase64Bis(const std::string& s)
-{
-  std::string result;
-  Toolbox::EncodeBase64(result, s);
-  return result;
-}
-
-
-TEST(Toolbox, Base64)
-{
-  ASSERT_EQ("", EncodeBase64Bis(""));
-  ASSERT_EQ("YQ==", EncodeBase64Bis("a"));
-
-  const std::string hello = "SGVsbG8gd29ybGQ=";
-  ASSERT_EQ(hello, EncodeBase64Bis("Hello world"));
-
-  std::string decoded;
-  Toolbox::DecodeBase64(decoded, hello);
-  ASSERT_EQ("Hello world", decoded);
-
-  // Invalid character
-  ASSERT_THROW(Toolbox::DecodeBase64(decoded, "?"), OrthancException);
-
-  // All the allowed characters
-  Toolbox::DecodeBase64(decoded, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=");
-}
-
 TEST(Toolbox, PathToExecutable)
 {
   printf("[%s]\n", SystemToolbox::GetPathToExecutable().c_str());