changeset 3333:2a38e00a0638

Size of the Orthanc static binaries are reduced by compressing ICU data
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 27 Mar 2019 15:25:18 +0100
parents 79178122842c
children af81263c025c
files CMakeLists.txt Core/Toolbox.cpp NEWS Resources/CMake/LibIcuConfiguration.cmake Resources/ThirdParty/icu/Version.cmake
diffstat 5 files changed, 183 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/CMakeLists.txt	Fri Mar 22 14:06:38 2019 +0100
+++ b/CMakeLists.txt	Wed Mar 27 15:25:18 2019 +0100
@@ -193,6 +193,7 @@
     ${ORTHANC_EMBEDDED_FILES}
     ORTHANC_EXPLORER ${CMAKE_CURRENT_SOURCE_DIR}/OrthancExplorer
     ${DCMTK_DICTIONARIES}
+    ${LIBICU_RESOURCES}
     )
 else()
   add_definitions(
@@ -201,6 +202,7 @@
     )
   EmbedResources(
     ${ORTHANC_EMBEDDED_FILES}
+    ${LIBICU_RESOURCES}
     )
 endif()
 
--- a/Core/Toolbox.cpp	Fri Mar 22 14:06:38 2019 +0100
+++ b/Core/Toolbox.cpp	Wed Mar 27 15:25:18 2019 +0100
@@ -115,6 +115,36 @@
 }
 
 
+#if defined(ORTHANC_STATIC_ICU)
+#  if (ORTHANC_STATIC_ICU == 1 && ORTHANC_ENABLE_LOCALE == 1)
+#    include <EmbeddedResources.h>
+#    include <unicode/udata.h>
+#    include <unicode/uloc.h>
+#    include "Compression/GzipCompressor.h"
+
+static std::string  globalIcuData_;
+
+extern "C"
+{
+  // This is dummy content for the "icudt58_dat" (resp. "icudt63_dat")
+  // global variable from the autogenerated "icudt58l_dat.c"
+  // (resp. "icudt63l_dat.c") file that contains a huge C array. In
+  // Orthanc, this array is compressed using gzip and attached as a
+  // resource, then uncompressed during the launch of Orthanc by
+  // static function "InitializeIcu()".
+  struct
+  {
+    double bogus;
+    uint8_t *bytes;
+  } U_ICUDATA_ENTRY_POINT = { 0.0, NULL };
+}
+
+#  endif
+#endif
+ 
+
+
+
 
 namespace Orthanc
 {
@@ -530,6 +560,14 @@
                                      Encoding sourceEncoding,
                                      bool hasCodeExtensions)
   {
+#if ORTHANC_STATIC_ICU == 1
+    if (globalIcuData_.empty())
+    {
+      throw OrthancException(ErrorCode_BadSequenceOfCalls,
+                             "Call Toolbox::InitializeGlobalLocale()");
+    }
+#endif
+
     // The "::skip" flag makes boost skip invalid UTF-8
     // characters. This can occur in badly-encoded DICOM files.
     
@@ -581,6 +619,14 @@
   std::string Toolbox::ConvertFromUtf8(const std::string& source,
                                        Encoding targetEncoding)
   {
+#if ORTHANC_STATIC_ICU == 1
+    if (globalIcuData_.empty())
+    {
+      throw OrthancException(ErrorCode_BadSequenceOfCalls,
+                             "Call Toolbox::InitializeGlobalLocale()");
+    }
+#endif
+
     // The "::skip" flag makes boost skip invalid UTF-8
     // characters. This can occur in badly-encoded DICOM files.
     
@@ -1380,9 +1426,107 @@
 
     return (globalLocale_.get() != NULL);
   }
+
+  
+  static void InitializeIcu()
+  {
+#if ORTHANC_STATIC_ICU == 1
+    if (globalIcuData_.empty())
+    {
+      LOG(INFO) << "Setting up the ICU common data";
+
+      GzipCompressor compressor;
+      compressor.Uncompress(globalIcuData_,
+                            EmbeddedResources::GetFileResourceBuffer(EmbeddedResources::LIBICU_DATA),
+                            EmbeddedResources::GetFileResourceSize(EmbeddedResources::LIBICU_DATA));
+
+      std::string md5;
+      Toolbox::ComputeMD5(md5, globalIcuData_);
+
+      if (md5 != ORTHANC_ICU_DATA_MD5 ||
+          globalIcuData_.empty())
+      {
+        throw OrthancException(ErrorCode_InternalError,
+                               "Cannot decode the ICU common data");
+      }
+
+      // "ICU data is designed to be 16-aligned"
+      // http://userguide.icu-project.org/icudata#TOC-Alignment
+
+      {
+        static const size_t ALIGN = 16;
+
+        UErrorCode status = U_ZERO_ERROR;
+
+        if (reinterpret_cast<intptr_t>(globalIcuData_.c_str()) % ALIGN == 0)
+        {
+          // Data is already properly aligned
+          udata_setCommonData(globalIcuData_.c_str(), &status);  
+        }
+        else
+        {
+          std::string aligned;
+          aligned.resize(globalIcuData_.size() + ALIGN - 1);
+
+          intptr_t offset = reinterpret_cast<intptr_t>(aligned.c_str()) % ALIGN;
+          if (offset != 0)
+          {
+            offset = ALIGN - offset;
+          }
+
+          if (offset + globalIcuData_.size() > aligned.size())
+          {
+            throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
+          }
+
+          // We don't use "memcpy()", as it expects its data to be aligned
+          const uint8_t* p = reinterpret_cast<uint8_t*>(&globalIcuData_[0]);
+          uint8_t* q = reinterpret_cast<uint8_t*>(&aligned[0]) + offset;
+          for (size_t i = 0; i < globalIcuData_.size(); i++, p++, q++)
+          {
+            *q = *p;
+          }
+        
+          globalIcuData_.swap(aligned);
+
+          const uint8_t* data = reinterpret_cast<const uint8_t*>(globalIcuData_.c_str()) + offset;
+        
+          if (reinterpret_cast<intptr_t>(data) % ALIGN != 0)
+          {
+            throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
+          }
+          else
+          {
+            udata_setCommonData(data, &status);  
+          }
+        }
+
+        if (status != U_ZERO_ERROR)
+        {
+          throw OrthancException(ErrorCode_InternalError, "Cannot initialize ICU");
+        }
+      }
+
+      if (Toolbox::DetectEndianness() != Endianness_Little)
+      {
+        // TODO - The data table must be swapped (uint16_t)
+        throw OrthancException(ErrorCode_NotImplemented);
+      }
+
+      // "First-use of ICU from a single thread before the
+      // multi-threaded use of ICU begins", to make sure everything is
+      // properly initialized (should not be mandatory in our
+      // case). We let boost handle calls to "u_init()" and "u_cleanup()".
+      // http://userguide.icu-project.org/design#TOC-ICU-Initialization-and-Termination
+      uloc_getDefault();
+    }
+#endif
+  }
   
   void Toolbox::InitializeGlobalLocale(const char* locale)
   {
+    InitializeIcu();
+
     // Make Orthanc use English, United States locale
     // Linux: use "en_US.UTF-8"
     // Windows: use ""
@@ -1434,7 +1578,16 @@
 
   std::string Toolbox::ToUpperCaseWithAccents(const std::string& source)
   {
-    if (globalLocale_.get() == NULL)
+    bool error = (globalLocale_.get() == NULL);
+
+#if ORTHANC_STATIC_ICU == 1
+    if (globalIcuData_.empty())
+    {
+      error = true;
+    }
+#endif
+    
+    if (error)
     {
       throw OrthancException(ErrorCode_BadSequenceOfCalls,
                              "No global locale was set, call Toolbox::InitializeGlobalLocale()");
--- a/NEWS	Fri Mar 22 14:06:38 2019 +0100
+++ b/NEWS	Wed Mar 27 15:25:18 2019 +0100
@@ -1,6 +1,8 @@
 Pending changes in the mainline
 ===============================
 
+* Size of the Orthanc static binaries are reduced by compressing ICU data
+
 
 Version 1.5.6 (2019-03-01)
 ==========================
--- a/Resources/CMake/LibIcuConfiguration.cmake	Fri Mar 22 14:06:38 2019 +0100
+++ b/Resources/CMake/LibIcuConfiguration.cmake	Wed Mar 27 15:25:18 2019 +0100
@@ -10,31 +10,17 @@
   include(${CMAKE_CURRENT_LIST_DIR}/../ThirdParty/icu/Version.cmake)
   DownloadPackage(${LIBICU_MD5} ${LIBICU_URL} "${LIBICU_SOURCES_DIR}")
 
-  if (MSVC AND
-      CMAKE_SIZEOF_VOID_P EQUAL 8)
-    # In Visual Studio 2015 64bit, we get the following error if using
-    # the plain C version of the ICU data: "icudt58l_dat.c(1638339):
-    # fatal error C1060: compiler is out of heap space" => we use a
-    # precompiled binary generated using MinGW on Linux
-    DownloadCompressedFile(${LIBICU_DATA_WIN64_MD5} ${LIBICU_DATA_WIN64_URL} ${LIBICU_DATA_WIN64})
+  # Use the gzip-compressed data
+  DownloadFile(${LIBICU_DATA_COMPRESSED_MD5} ${LIBICU_DATA_URL})
+  set(LIBICU_RESOURCES
+    LIBICU_DATA  ${CMAKE_SOURCE_DIR}/ThirdPartyDownloads/${LIBICU_DATA}
+    )
 
-    set(LIBICU_LIBRARIES
-      ${CMAKE_BINARY_DIR}/${LIBICU_DATA_WIN64}
-      )
-  else()
-    # Use plain C data library
-    DownloadCompressedFile(${LIBICU_DATA_MD5} ${LIBICU_DATA_URL} ${LIBICU_DATA})
+  set_source_files_properties(
+    ${CMAKE_BINARY_DIR}/${LIBICU_DATA}
+    PROPERTIES COMPILE_DEFINITIONS "char16_t=uint16_t"
+    )
 
-    set_source_files_properties(
-      ${CMAKE_BINARY_DIR}/${LIBICU_DATA}
-      PROPERTIES COMPILE_DEFINITIONS "char16_t=uint16_t"
-      )
-
-    set(LIBICU_SOURCES
-      ${CMAKE_BINARY_DIR}/${LIBICU_DATA}
-      )
-  endif()
-  
   include_directories(BEFORE
     ${LIBICU_SOURCES_DIR}/source/common
     ${LIBICU_SOURCES_DIR}/source/i18n
@@ -50,6 +36,7 @@
 
     #-DUCONFIG_NO_SERVICE=1
     -DU_COMMON_IMPLEMENTATION
+    -DU_STATIC_IMPLEMENTATION
     -DU_ENABLE_DYLOAD=0
     -DU_HAVE_STD_STRING=1
     -DU_I18N_IMPLEMENTATION
@@ -57,6 +44,9 @@
     -DU_STATIC_IMPLEMENTATION=1
     #-DU_CHARSET_IS_UTF8
     -DUNISTR_FROM_STRING_EXPLICIT=
+
+    -DORTHANC_STATIC_ICU=1
+    -DORTHANC_ICU_DATA_MD5="${LIBICU_DATA_UNCOMPRESSED_MD5}"
     )
 
   if (CMAKE_SYSTEM_NAME STREQUAL "Windows")
@@ -83,4 +73,8 @@
   else()
     link_libraries(icuuc icui18n)
   endif()
+
+  add_definitions(
+    -DORTHANC_STATIC_ICU=0
+    )
 endif()
--- a/Resources/ThirdParty/icu/Version.cmake	Fri Mar 22 14:06:38 2019 +0100
+++ b/Resources/ThirdParty/icu/Version.cmake	Wed Mar 27 15:25:18 2019 +0100
@@ -14,24 +14,21 @@
 set(LIBICU_BASE_URL "http://orthanc.osimis.io/ThirdPartyDownloads")
 
 if (USE_LEGACY_LIBICU)
-  # This is the last version of icu that compiles with C++11
-  # support. It can be used for Linux Standard Base and Visual Studio 2008.
+  # This is the latest version of icu that compiles without C++11
+  # support. It is used for Linux Standard Base and Visual Studio 2008.
   set(LIBICU_URL "${LIBICU_BASE_URL}/icu4c-58_2-src.tgz")
   set(LIBICU_MD5 "fac212b32b7ec7ab007a12dff1f3aea1")
   set(LIBICU_DATA_VERSION "icudt58")
-  set(LIBICU_DATA_MD5 "ce2c7791ab637898553c121633155fb6")
-  set(LIBICU_DATA_WIN64_MD5 "8f7edfce3bff7edb0e5714cb66f891cb")
+  set(LIBICU_DATA_COMPRESSED_MD5 "a39b07b38195158c6c3070332cef2173")
+  set(LIBICU_DATA_UNCOMPRESSED_MD5 "54d2593cec5c6a4469373231658153ce")
 else()
   set(LIBICU_URL "${LIBICU_BASE_URL}/icu4c-63_1-src.tgz")
   set(LIBICU_MD5 "9e40f6055294284df958200e308bce50")
   set(LIBICU_DATA_VERSION "icudt63")
-  set(LIBICU_DATA_MD5 "92b5c73a1accd8ecf8c20c89bc6925a9")
-  set(LIBICU_DATA_WIN64_MD5 "edc00315f042c802547d8e4bd95b09f7")
+  set(LIBICU_DATA_COMPRESSED_MD5 "be495c0830de5f377fdfa8301a5faf3d")
+  set(LIBICU_DATA_UNCOMPRESSED_MD5 "99613c3f2ca9426c45dc554ad28cfb79")
 endif()
 
 set(LIBICU_SOURCES_DIR ${CMAKE_BINARY_DIR}/icu)
-set(LIBICU_DATA "${LIBICU_DATA_VERSION}${LIBICU_SUFFIX}_dat.c")
-set(LIBICU_DATA_URL "${LIBICU_BASE_URL}/${LIBICU_DATA}.gz")
-
-set(LIBICU_DATA_WIN64 "${LIBICU_DATA_VERSION}_dat-x86_64-mingw32.lib")
-set(LIBICU_DATA_WIN64_URL "${LIBICU_BASE_URL}/${LIBICU_DATA_WIN64}.gz")
+set(LIBICU_DATA "${LIBICU_DATA_VERSION}${LIBICU_SUFFIX}.dat.gz")
+set(LIBICU_DATA_URL "${LIBICU_BASE_URL}/${LIBICU_DATA}")