comparison Core/Toolbox.cpp @ 3333:2a38e00a0638

Size of the Orthanc static binaries are reduced by compressing ICU data
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 27 Mar 2019 15:25:18 +0100
parents f0c92ecd09c8
children a2862d4484f5
comparison
equal deleted inserted replaced
3332:79178122842c 3333:2a38e00a0638
113 # include <uuid/uuid.h> 113 # include <uuid/uuid.h>
114 #endif 114 #endif
115 } 115 }
116 116
117 117
118 #if defined(ORTHANC_STATIC_ICU)
119 # if (ORTHANC_STATIC_ICU == 1 && ORTHANC_ENABLE_LOCALE == 1)
120 # include <EmbeddedResources.h>
121 # include <unicode/udata.h>
122 # include <unicode/uloc.h>
123 # include "Compression/GzipCompressor.h"
124
125 static std::string globalIcuData_;
126
127 extern "C"
128 {
129 // This is dummy content for the "icudt58_dat" (resp. "icudt63_dat")
130 // global variable from the autogenerated "icudt58l_dat.c"
131 // (resp. "icudt63l_dat.c") file that contains a huge C array. In
132 // Orthanc, this array is compressed using gzip and attached as a
133 // resource, then uncompressed during the launch of Orthanc by
134 // static function "InitializeIcu()".
135 struct
136 {
137 double bogus;
138 uint8_t *bytes;
139 } U_ICUDATA_ENTRY_POINT = { 0.0, NULL };
140 }
141
142 # endif
143 #endif
144
145
146
147
118 148
119 namespace Orthanc 149 namespace Orthanc
120 { 150 {
121 void Toolbox::LinesIterator::FindEndOfLine() 151 void Toolbox::LinesIterator::FindEndOfLine()
122 { 152 {
528 // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2 558 // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2
529 std::string Toolbox::ConvertToUtf8(const std::string& source, 559 std::string Toolbox::ConvertToUtf8(const std::string& source,
530 Encoding sourceEncoding, 560 Encoding sourceEncoding,
531 bool hasCodeExtensions) 561 bool hasCodeExtensions)
532 { 562 {
563 #if ORTHANC_STATIC_ICU == 1
564 if (globalIcuData_.empty())
565 {
566 throw OrthancException(ErrorCode_BadSequenceOfCalls,
567 "Call Toolbox::InitializeGlobalLocale()");
568 }
569 #endif
570
533 // The "::skip" flag makes boost skip invalid UTF-8 571 // The "::skip" flag makes boost skip invalid UTF-8
534 // characters. This can occur in badly-encoded DICOM files. 572 // characters. This can occur in badly-encoded DICOM files.
535 573
536 try 574 try
537 { 575 {
579 617
580 #if ORTHANC_ENABLE_LOCALE == 1 618 #if ORTHANC_ENABLE_LOCALE == 1
581 std::string Toolbox::ConvertFromUtf8(const std::string& source, 619 std::string Toolbox::ConvertFromUtf8(const std::string& source,
582 Encoding targetEncoding) 620 Encoding targetEncoding)
583 { 621 {
622 #if ORTHANC_STATIC_ICU == 1
623 if (globalIcuData_.empty())
624 {
625 throw OrthancException(ErrorCode_BadSequenceOfCalls,
626 "Call Toolbox::InitializeGlobalLocale()");
627 }
628 #endif
629
584 // The "::skip" flag makes boost skip invalid UTF-8 630 // The "::skip" flag makes boost skip invalid UTF-8
585 // characters. This can occur in badly-encoded DICOM files. 631 // characters. This can occur in badly-encoded DICOM files.
586 632
587 try 633 try
588 { 634 {
1378 { 1424 {
1379 } 1425 }
1380 1426
1381 return (globalLocale_.get() != NULL); 1427 return (globalLocale_.get() != NULL);
1382 } 1428 }
1429
1430
1431 static void InitializeIcu()
1432 {
1433 #if ORTHANC_STATIC_ICU == 1
1434 if (globalIcuData_.empty())
1435 {
1436 LOG(INFO) << "Setting up the ICU common data";
1437
1438 GzipCompressor compressor;
1439 compressor.Uncompress(globalIcuData_,
1440 EmbeddedResources::GetFileResourceBuffer(EmbeddedResources::LIBICU_DATA),
1441 EmbeddedResources::GetFileResourceSize(EmbeddedResources::LIBICU_DATA));
1442
1443 std::string md5;
1444 Toolbox::ComputeMD5(md5, globalIcuData_);
1445
1446 if (md5 != ORTHANC_ICU_DATA_MD5 ||
1447 globalIcuData_.empty())
1448 {
1449 throw OrthancException(ErrorCode_InternalError,
1450 "Cannot decode the ICU common data");
1451 }
1452
1453 // "ICU data is designed to be 16-aligned"
1454 // http://userguide.icu-project.org/icudata#TOC-Alignment
1455
1456 {
1457 static const size_t ALIGN = 16;
1458
1459 UErrorCode status = U_ZERO_ERROR;
1460
1461 if (reinterpret_cast<intptr_t>(globalIcuData_.c_str()) % ALIGN == 0)
1462 {
1463 // Data is already properly aligned
1464 udata_setCommonData(globalIcuData_.c_str(), &status);
1465 }
1466 else
1467 {
1468 std::string aligned;
1469 aligned.resize(globalIcuData_.size() + ALIGN - 1);
1470
1471 intptr_t offset = reinterpret_cast<intptr_t>(aligned.c_str()) % ALIGN;
1472 if (offset != 0)
1473 {
1474 offset = ALIGN - offset;
1475 }
1476
1477 if (offset + globalIcuData_.size() > aligned.size())
1478 {
1479 throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
1480 }
1481
1482 // We don't use "memcpy()", as it expects its data to be aligned
1483 const uint8_t* p = reinterpret_cast<uint8_t*>(&globalIcuData_[0]);
1484 uint8_t* q = reinterpret_cast<uint8_t*>(&aligned[0]) + offset;
1485 for (size_t i = 0; i < globalIcuData_.size(); i++, p++, q++)
1486 {
1487 *q = *p;
1488 }
1489
1490 globalIcuData_.swap(aligned);
1491
1492 const uint8_t* data = reinterpret_cast<const uint8_t*>(globalIcuData_.c_str()) + offset;
1493
1494 if (reinterpret_cast<intptr_t>(data) % ALIGN != 0)
1495 {
1496 throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
1497 }
1498 else
1499 {
1500 udata_setCommonData(data, &status);
1501 }
1502 }
1503
1504 if (status != U_ZERO_ERROR)
1505 {
1506 throw OrthancException(ErrorCode_InternalError, "Cannot initialize ICU");
1507 }
1508 }
1509
1510 if (Toolbox::DetectEndianness() != Endianness_Little)
1511 {
1512 // TODO - The data table must be swapped (uint16_t)
1513 throw OrthancException(ErrorCode_NotImplemented);
1514 }
1515
1516 // "First-use of ICU from a single thread before the
1517 // multi-threaded use of ICU begins", to make sure everything is
1518 // properly initialized (should not be mandatory in our
1519 // case). We let boost handle calls to "u_init()" and "u_cleanup()".
1520 // http://userguide.icu-project.org/design#TOC-ICU-Initialization-and-Termination
1521 uloc_getDefault();
1522 }
1523 #endif
1524 }
1383 1525
1384 void Toolbox::InitializeGlobalLocale(const char* locale) 1526 void Toolbox::InitializeGlobalLocale(const char* locale)
1385 { 1527 {
1528 InitializeIcu();
1529
1386 // Make Orthanc use English, United States locale 1530 // Make Orthanc use English, United States locale
1387 // Linux: use "en_US.UTF-8" 1531 // Linux: use "en_US.UTF-8"
1388 // Windows: use "" 1532 // Windows: use ""
1389 // Wine: use NULL 1533 // Wine: use NULL
1390 1534
1432 } 1576 }
1433 1577
1434 1578
1435 std::string Toolbox::ToUpperCaseWithAccents(const std::string& source) 1579 std::string Toolbox::ToUpperCaseWithAccents(const std::string& source)
1436 { 1580 {
1437 if (globalLocale_.get() == NULL) 1581 bool error = (globalLocale_.get() == NULL);
1582
1583 #if ORTHANC_STATIC_ICU == 1
1584 if (globalIcuData_.empty())
1585 {
1586 error = true;
1587 }
1588 #endif
1589
1590 if (error)
1438 { 1591 {
1439 throw OrthancException(ErrorCode_BadSequenceOfCalls, 1592 throw OrthancException(ErrorCode_BadSequenceOfCalls,
1440 "No global locale was set, call Toolbox::InitializeGlobalLocale()"); 1593 "No global locale was set, call Toolbox::InitializeGlobalLocale()");
1441 } 1594 }
1442 1595