# HG changeset patch # User Sebastien Jodogne # Date 1553696718 -3600 # Node ID 2a38e00a063897643fc03de4dd2dab60dde319ce # Parent 79178122842c02ebededd9c9b8881d5e4a8c3b5c Size of the Orthanc static binaries are reduced by compressing ICU data diff -r 79178122842c -r 2a38e00a0638 CMakeLists.txt --- a/CMakeLists.txt Fri Mar 22 14:06:38 2019 +0100 +++ b/CMakeLists.txt Wed Mar 27 15:25:18 2019 +0100 @@ -193,6 +193,7 @@ ${ORTHANC_EMBEDDED_FILES} ORTHANC_EXPLORER ${CMAKE_CURRENT_SOURCE_DIR}/OrthancExplorer ${DCMTK_DICTIONARIES} + ${LIBICU_RESOURCES} ) else() add_definitions( @@ -201,6 +202,7 @@ ) EmbedResources( ${ORTHANC_EMBEDDED_FILES} + ${LIBICU_RESOURCES} ) endif() diff -r 79178122842c -r 2a38e00a0638 Core/Toolbox.cpp --- a/Core/Toolbox.cpp Fri Mar 22 14:06:38 2019 +0100 +++ b/Core/Toolbox.cpp Wed Mar 27 15:25:18 2019 +0100 @@ -115,6 +115,36 @@ } +#if defined(ORTHANC_STATIC_ICU) +# if (ORTHANC_STATIC_ICU == 1 && ORTHANC_ENABLE_LOCALE == 1) +# include +# include +# include +# include "Compression/GzipCompressor.h" + +static std::string globalIcuData_; + +extern "C" +{ + // This is dummy content for the "icudt58_dat" (resp. "icudt63_dat") + // global variable from the autogenerated "icudt58l_dat.c" + // (resp. "icudt63l_dat.c") file that contains a huge C array. In + // Orthanc, this array is compressed using gzip and attached as a + // resource, then uncompressed during the launch of Orthanc by + // static function "InitializeIcu()". + struct + { + double bogus; + uint8_t *bytes; + } U_ICUDATA_ENTRY_POINT = { 0.0, NULL }; +} + +# endif +#endif + + + + namespace Orthanc { @@ -530,6 +560,14 @@ Encoding sourceEncoding, bool hasCodeExtensions) { +#if ORTHANC_STATIC_ICU == 1 + if (globalIcuData_.empty()) + { + throw OrthancException(ErrorCode_BadSequenceOfCalls, + "Call Toolbox::InitializeGlobalLocale()"); + } +#endif + // The "::skip" flag makes boost skip invalid UTF-8 // characters. This can occur in badly-encoded DICOM files. @@ -581,6 +619,14 @@ std::string Toolbox::ConvertFromUtf8(const std::string& source, Encoding targetEncoding) { +#if ORTHANC_STATIC_ICU == 1 + if (globalIcuData_.empty()) + { + throw OrthancException(ErrorCode_BadSequenceOfCalls, + "Call Toolbox::InitializeGlobalLocale()"); + } +#endif + // The "::skip" flag makes boost skip invalid UTF-8 // characters. This can occur in badly-encoded DICOM files. @@ -1380,9 +1426,107 @@ return (globalLocale_.get() != NULL); } + + + static void InitializeIcu() + { +#if ORTHANC_STATIC_ICU == 1 + if (globalIcuData_.empty()) + { + LOG(INFO) << "Setting up the ICU common data"; + + GzipCompressor compressor; + compressor.Uncompress(globalIcuData_, + EmbeddedResources::GetFileResourceBuffer(EmbeddedResources::LIBICU_DATA), + EmbeddedResources::GetFileResourceSize(EmbeddedResources::LIBICU_DATA)); + + std::string md5; + Toolbox::ComputeMD5(md5, globalIcuData_); + + if (md5 != ORTHANC_ICU_DATA_MD5 || + globalIcuData_.empty()) + { + throw OrthancException(ErrorCode_InternalError, + "Cannot decode the ICU common data"); + } + + // "ICU data is designed to be 16-aligned" + // http://userguide.icu-project.org/icudata#TOC-Alignment + + { + static const size_t ALIGN = 16; + + UErrorCode status = U_ZERO_ERROR; + + if (reinterpret_cast(globalIcuData_.c_str()) % ALIGN == 0) + { + // Data is already properly aligned + udata_setCommonData(globalIcuData_.c_str(), &status); + } + else + { + std::string aligned; + aligned.resize(globalIcuData_.size() + ALIGN - 1); + + intptr_t offset = reinterpret_cast(aligned.c_str()) % ALIGN; + if (offset != 0) + { + offset = ALIGN - offset; + } + + if (offset + globalIcuData_.size() > aligned.size()) + { + throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary"); + } + + // We don't use "memcpy()", as it expects its data to be aligned + const uint8_t* p = reinterpret_cast(&globalIcuData_[0]); + uint8_t* q = reinterpret_cast(&aligned[0]) + offset; + for (size_t i = 0; i < globalIcuData_.size(); i++, p++, q++) + { + *q = *p; + } + + globalIcuData_.swap(aligned); + + const uint8_t* data = reinterpret_cast(globalIcuData_.c_str()) + offset; + + if (reinterpret_cast(data) % ALIGN != 0) + { + throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary"); + } + else + { + udata_setCommonData(data, &status); + } + } + + if (status != U_ZERO_ERROR) + { + throw OrthancException(ErrorCode_InternalError, "Cannot initialize ICU"); + } + } + + if (Toolbox::DetectEndianness() != Endianness_Little) + { + // TODO - The data table must be swapped (uint16_t) + throw OrthancException(ErrorCode_NotImplemented); + } + + // "First-use of ICU from a single thread before the + // multi-threaded use of ICU begins", to make sure everything is + // properly initialized (should not be mandatory in our + // case). We let boost handle calls to "u_init()" and "u_cleanup()". + // http://userguide.icu-project.org/design#TOC-ICU-Initialization-and-Termination + uloc_getDefault(); + } +#endif + } void Toolbox::InitializeGlobalLocale(const char* locale) { + InitializeIcu(); + // Make Orthanc use English, United States locale // Linux: use "en_US.UTF-8" // Windows: use "" @@ -1434,7 +1578,16 @@ std::string Toolbox::ToUpperCaseWithAccents(const std::string& source) { - if (globalLocale_.get() == NULL) + bool error = (globalLocale_.get() == NULL); + +#if ORTHANC_STATIC_ICU == 1 + if (globalIcuData_.empty()) + { + error = true; + } +#endif + + if (error) { throw OrthancException(ErrorCode_BadSequenceOfCalls, "No global locale was set, call Toolbox::InitializeGlobalLocale()"); diff -r 79178122842c -r 2a38e00a0638 NEWS --- a/NEWS Fri Mar 22 14:06:38 2019 +0100 +++ b/NEWS Wed Mar 27 15:25:18 2019 +0100 @@ -1,6 +1,8 @@ Pending changes in the mainline =============================== +* Size of the Orthanc static binaries are reduced by compressing ICU data + Version 1.5.6 (2019-03-01) ========================== diff -r 79178122842c -r 2a38e00a0638 Resources/CMake/LibIcuConfiguration.cmake --- a/Resources/CMake/LibIcuConfiguration.cmake Fri Mar 22 14:06:38 2019 +0100 +++ b/Resources/CMake/LibIcuConfiguration.cmake Wed Mar 27 15:25:18 2019 +0100 @@ -10,31 +10,17 @@ include(${CMAKE_CURRENT_LIST_DIR}/../ThirdParty/icu/Version.cmake) DownloadPackage(${LIBICU_MD5} ${LIBICU_URL} "${LIBICU_SOURCES_DIR}") - if (MSVC AND - CMAKE_SIZEOF_VOID_P EQUAL 8) - # In Visual Studio 2015 64bit, we get the following error if using - # the plain C version of the ICU data: "icudt58l_dat.c(1638339): - # fatal error C1060: compiler is out of heap space" => we use a - # precompiled binary generated using MinGW on Linux - DownloadCompressedFile(${LIBICU_DATA_WIN64_MD5} ${LIBICU_DATA_WIN64_URL} ${LIBICU_DATA_WIN64}) + # Use the gzip-compressed data + DownloadFile(${LIBICU_DATA_COMPRESSED_MD5} ${LIBICU_DATA_URL}) + set(LIBICU_RESOURCES + LIBICU_DATA ${CMAKE_SOURCE_DIR}/ThirdPartyDownloads/${LIBICU_DATA} + ) - set(LIBICU_LIBRARIES - ${CMAKE_BINARY_DIR}/${LIBICU_DATA_WIN64} - ) - else() - # Use plain C data library - DownloadCompressedFile(${LIBICU_DATA_MD5} ${LIBICU_DATA_URL} ${LIBICU_DATA}) + set_source_files_properties( + ${CMAKE_BINARY_DIR}/${LIBICU_DATA} + PROPERTIES COMPILE_DEFINITIONS "char16_t=uint16_t" + ) - set_source_files_properties( - ${CMAKE_BINARY_DIR}/${LIBICU_DATA} - PROPERTIES COMPILE_DEFINITIONS "char16_t=uint16_t" - ) - - set(LIBICU_SOURCES - ${CMAKE_BINARY_DIR}/${LIBICU_DATA} - ) - endif() - include_directories(BEFORE ${LIBICU_SOURCES_DIR}/source/common ${LIBICU_SOURCES_DIR}/source/i18n @@ -50,6 +36,7 @@ #-DUCONFIG_NO_SERVICE=1 -DU_COMMON_IMPLEMENTATION + -DU_STATIC_IMPLEMENTATION -DU_ENABLE_DYLOAD=0 -DU_HAVE_STD_STRING=1 -DU_I18N_IMPLEMENTATION @@ -57,6 +44,9 @@ -DU_STATIC_IMPLEMENTATION=1 #-DU_CHARSET_IS_UTF8 -DUNISTR_FROM_STRING_EXPLICIT= + + -DORTHANC_STATIC_ICU=1 + -DORTHANC_ICU_DATA_MD5="${LIBICU_DATA_UNCOMPRESSED_MD5}" ) if (CMAKE_SYSTEM_NAME STREQUAL "Windows") @@ -83,4 +73,8 @@ else() link_libraries(icuuc icui18n) endif() + + add_definitions( + -DORTHANC_STATIC_ICU=0 + ) endif() diff -r 79178122842c -r 2a38e00a0638 Resources/ThirdParty/icu/Version.cmake --- a/Resources/ThirdParty/icu/Version.cmake Fri Mar 22 14:06:38 2019 +0100 +++ b/Resources/ThirdParty/icu/Version.cmake Wed Mar 27 15:25:18 2019 +0100 @@ -14,24 +14,21 @@ set(LIBICU_BASE_URL "http://orthanc.osimis.io/ThirdPartyDownloads") if (USE_LEGACY_LIBICU) - # This is the last version of icu that compiles with C++11 - # support. It can be used for Linux Standard Base and Visual Studio 2008. + # This is the latest version of icu that compiles without C++11 + # support. It is used for Linux Standard Base and Visual Studio 2008. set(LIBICU_URL "${LIBICU_BASE_URL}/icu4c-58_2-src.tgz") set(LIBICU_MD5 "fac212b32b7ec7ab007a12dff1f3aea1") set(LIBICU_DATA_VERSION "icudt58") - set(LIBICU_DATA_MD5 "ce2c7791ab637898553c121633155fb6") - set(LIBICU_DATA_WIN64_MD5 "8f7edfce3bff7edb0e5714cb66f891cb") + set(LIBICU_DATA_COMPRESSED_MD5 "a39b07b38195158c6c3070332cef2173") + set(LIBICU_DATA_UNCOMPRESSED_MD5 "54d2593cec5c6a4469373231658153ce") else() set(LIBICU_URL "${LIBICU_BASE_URL}/icu4c-63_1-src.tgz") set(LIBICU_MD5 "9e40f6055294284df958200e308bce50") set(LIBICU_DATA_VERSION "icudt63") - set(LIBICU_DATA_MD5 "92b5c73a1accd8ecf8c20c89bc6925a9") - set(LIBICU_DATA_WIN64_MD5 "edc00315f042c802547d8e4bd95b09f7") + set(LIBICU_DATA_COMPRESSED_MD5 "be495c0830de5f377fdfa8301a5faf3d") + set(LIBICU_DATA_UNCOMPRESSED_MD5 "99613c3f2ca9426c45dc554ad28cfb79") endif() set(LIBICU_SOURCES_DIR ${CMAKE_BINARY_DIR}/icu) -set(LIBICU_DATA "${LIBICU_DATA_VERSION}${LIBICU_SUFFIX}_dat.c") -set(LIBICU_DATA_URL "${LIBICU_BASE_URL}/${LIBICU_DATA}.gz") - -set(LIBICU_DATA_WIN64 "${LIBICU_DATA_VERSION}_dat-x86_64-mingw32.lib") -set(LIBICU_DATA_WIN64_URL "${LIBICU_BASE_URL}/${LIBICU_DATA_WIN64}.gz") +set(LIBICU_DATA "${LIBICU_DATA_VERSION}${LIBICU_SUFFIX}.dat.gz") +set(LIBICU_DATA_URL "${LIBICU_BASE_URL}/${LIBICU_DATA}")