comparison OrthancFramework/Sources/Toolbox.cpp @ 4044:d25f4c0fa160 framework

splitting code into OrthancFramework and OrthancServer
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 10 Jun 2020 20:30:34 +0200
parents Core/Toolbox.cpp@058b5ade8acd
children 9214e3a7b0a2
comparison
equal deleted inserted replaced
4043:6c6239aec462 4044:d25f4c0fa160
1 /**
2 * Orthanc - A Lightweight, RESTful DICOM Store
3 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
4 * Department, University Hospital of Liege, Belgium
5 * Copyright (C) 2017-2020 Osimis S.A., Belgium
6 *
7 * This program is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
11 *
12 * In addition, as a special exception, the copyright holders of this
13 * program give permission to link the code of its release with the
14 * OpenSSL project's "OpenSSL" library (or with modified versions of it
15 * that use the same license as the "OpenSSL" library), and distribute
16 * the linked executables. You must obey the GNU General Public License
17 * in all respects for all of the code used other than "OpenSSL". If you
18 * modify file(s) with this exception, you may extend this exception to
19 * your version of the file(s), but you are not obligated to do so. If
20 * you do not wish to do so, delete this exception statement from your
21 * version. If you delete this exception statement from all source files
22 * in the program, then also delete it here.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 * General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program. If not, see <http://www.gnu.org/licenses/>.
31 **/
32
33
34 #include "PrecompiledHeaders.h"
35 #include "Toolbox.h"
36
37 #include "Compatibility.h"
38 #include "OrthancException.h"
39 #include "Logging.h"
40
41 #include <boost/algorithm/string/case_conv.hpp>
42 #include <boost/algorithm/string/replace.hpp>
43 #include <boost/lexical_cast.hpp>
44 #include <boost/regex.hpp>
45
46 #if BOOST_VERSION >= 106600
47 # include <boost/uuid/detail/sha1.hpp>
48 #else
49 # include <boost/uuid/sha1.hpp>
50 #endif
51
52 #include <string>
53 #include <stdint.h>
54 #include <string.h>
55 #include <algorithm>
56 #include <ctype.h>
57
58
59 #if ORTHANC_ENABLE_MD5 == 1
60 // TODO - Could be replaced by <boost/uuid/detail/md5.hpp> starting
61 // with Boost >= 1.66.0
62 # include "../Resources/ThirdParty/md5/md5.h"
63 #endif
64
65 #if ORTHANC_ENABLE_BASE64 == 1
66 # include "../Resources/ThirdParty/base64/base64.h"
67 #endif
68
69 #if ORTHANC_ENABLE_LOCALE == 1
70 # include <boost/locale.hpp>
71 #endif
72
73 #if ORTHANC_ENABLE_SSL == 1
74 // For OpenSSL initialization and finalization
75 # include <openssl/conf.h>
76 # include <openssl/engine.h>
77 # include <openssl/err.h>
78 # include <openssl/evp.h>
79 # include <openssl/ssl.h>
80 #endif
81
82
83 #if defined(_MSC_VER) && (_MSC_VER < 1800)
84 // Patch for the missing "_strtoll" symbol when compiling with Visual Studio < 2013
85 extern "C"
86 {
87 int64_t _strtoi64(const char *nptr, char **endptr, int base);
88 int64_t strtoll(const char *nptr, char **endptr, int base)
89 {
90 return _strtoi64(nptr, endptr, base);
91 }
92 }
93 #endif
94
95
96 #if defined(_WIN32)
97 # include <windows.h> // For ::Sleep
98 #endif
99
100
101 #if ORTHANC_ENABLE_PUGIXML == 1
102 # include "ChunkedBuffer.h"
103 #endif
104
105
106 // Inclusions for UUID
107 // http://stackoverflow.com/a/1626302
108
109 extern "C"
110 {
111 #if defined(_WIN32)
112 # include <rpc.h>
113 #else
114 # include <uuid/uuid.h>
115 #endif
116 }
117
118
119 #if defined(ORTHANC_STATIC_ICU)
120 # if (ORTHANC_STATIC_ICU == 1 && ORTHANC_ENABLE_LOCALE == 1)
121 # include <OrthancFrameworkResources.h>
122 # include <unicode/udata.h>
123 # include <unicode/uloc.h>
124 # include "Compression/GzipCompressor.h"
125
126 static std::string globalIcuData_;
127
128 extern "C"
129 {
130 // This is dummy content for the "icudt58_dat" (resp. "icudt63_dat")
131 // global variable from the autogenerated "icudt58l_dat.c"
132 // (resp. "icudt63l_dat.c") file that contains a huge C array. In
133 // Orthanc, this array is compressed using gzip and attached as a
134 // resource, then uncompressed during the launch of Orthanc by
135 // static function "InitializeIcu()".
136 struct
137 {
138 double bogus;
139 uint8_t *bytes;
140 } U_ICUDATA_ENTRY_POINT = { 0.0, NULL };
141 }
142
143 # if defined(__LSB_VERSION__)
144 extern "C"
145 {
146 /**
147 * The "tzname" global variable is declared as "extern" but is not
148 * defined in any compilation module, if using Linux Standard Base,
149 * as soon as OpenSSL or cURL is in use on Ubuntu >= 18.04 (glibc >=
150 * 2.27). The variable "__tzname" is always properly declared *and*
151 * defined. The reason is unclear, and is maybe a bug in the gcc 4.8
152 * linker that is used by LSB if facing a weak symbol (as "tzname").
153 * This makes Orthanc crash if the timezone is set to UTC.
154 * https://groups.google.com/d/msg/orthanc-users/0m8sxxwSm1E/2p8du_89CAAJ
155 **/
156 char *tzname[2] = { (char *) "GMT", (char *) "GMT" };
157 }
158 # endif
159
160 # endif
161 #endif
162
163
164
165 #if defined(__unix__) && ORTHANC_SANDBOXED != 1
166 # include "SystemToolbox.h" // Check out "InitializeGlobalLocale()"
167 #endif
168
169
170
171 namespace Orthanc
172 {
173 void Toolbox::LinesIterator::FindEndOfLine()
174 {
175 lineEnd_ = lineStart_;
176
177 while (lineEnd_ < content_.size() &&
178 content_[lineEnd_] != '\n' &&
179 content_[lineEnd_] != '\r')
180 {
181 lineEnd_ += 1;
182 }
183 }
184
185
186 Toolbox::LinesIterator::LinesIterator(const std::string& content) :
187 content_(content),
188 lineStart_(0)
189 {
190 FindEndOfLine();
191 }
192
193
194 bool Toolbox::LinesIterator::GetLine(std::string& target) const
195 {
196 assert(lineStart_ <= content_.size() &&
197 lineEnd_ <= content_.size() &&
198 lineStart_ <= lineEnd_);
199
200 if (lineStart_ == content_.size())
201 {
202 return false;
203 }
204 else
205 {
206 target = content_.substr(lineStart_, lineEnd_ - lineStart_);
207 return true;
208 }
209 }
210
211
212 void Toolbox::LinesIterator::Next()
213 {
214 lineStart_ = lineEnd_;
215
216 if (lineStart_ != content_.size())
217 {
218 assert(content_[lineStart_] == '\r' ||
219 content_[lineStart_] == '\n');
220
221 char second;
222
223 if (content_[lineStart_] == '\r')
224 {
225 second = '\n';
226 }
227 else
228 {
229 second = '\r';
230 }
231
232 lineStart_ += 1;
233
234 if (lineStart_ < content_.size() &&
235 content_[lineStart_] == second)
236 {
237 lineStart_ += 1;
238 }
239
240 FindEndOfLine();
241 }
242 }
243
244
245 void Toolbox::ToUpperCase(std::string& s)
246 {
247 std::transform(s.begin(), s.end(), s.begin(), toupper);
248 }
249
250
251 void Toolbox::ToLowerCase(std::string& s)
252 {
253 std::transform(s.begin(), s.end(), s.begin(), tolower);
254 }
255
256
257 void Toolbox::ToUpperCase(std::string& result,
258 const std::string& source)
259 {
260 result = source;
261 ToUpperCase(result);
262 }
263
264 void Toolbox::ToLowerCase(std::string& result,
265 const std::string& source)
266 {
267 result = source;
268 ToLowerCase(result);
269 }
270
271
272 void Toolbox::SplitUriComponents(UriComponents& components,
273 const std::string& uri)
274 {
275 static const char URI_SEPARATOR = '/';
276
277 components.clear();
278
279 if (uri.size() == 0 ||
280 uri[0] != URI_SEPARATOR)
281 {
282 throw OrthancException(ErrorCode_UriSyntax);
283 }
284
285 // Count the number of slashes in the URI to make an assumption
286 // about the number of components in the URI
287 unsigned int estimatedSize = 0;
288 for (unsigned int i = 0; i < uri.size(); i++)
289 {
290 if (uri[i] == URI_SEPARATOR)
291 estimatedSize++;
292 }
293
294 components.reserve(estimatedSize - 1);
295
296 unsigned int start = 1;
297 unsigned int end = 1;
298 while (end < uri.size())
299 {
300 // This is the loop invariant
301 assert(uri[start - 1] == '/' && (end >= start));
302
303 if (uri[end] == '/')
304 {
305 components.push_back(std::string(&uri[start], end - start));
306 end++;
307 start = end;
308 }
309 else
310 {
311 end++;
312 }
313 }
314
315 if (start < uri.size())
316 {
317 components.push_back(std::string(&uri[start], end - start));
318 }
319
320 for (size_t i = 0; i < components.size(); i++)
321 {
322 if (components[i].size() == 0)
323 {
324 // Empty component, as in: "/coucou//e"
325 throw OrthancException(ErrorCode_UriSyntax);
326 }
327 }
328 }
329
330
331 void Toolbox::TruncateUri(UriComponents& target,
332 const UriComponents& source,
333 size_t fromLevel)
334 {
335 target.clear();
336
337 if (source.size() > fromLevel)
338 {
339 target.resize(source.size() - fromLevel);
340
341 size_t j = 0;
342 for (size_t i = fromLevel; i < source.size(); i++, j++)
343 {
344 target[j] = source[i];
345 }
346
347 assert(j == target.size());
348 }
349 }
350
351
352
353 bool Toolbox::IsChildUri(const UriComponents& baseUri,
354 const UriComponents& testedUri)
355 {
356 if (testedUri.size() < baseUri.size())
357 {
358 return false;
359 }
360
361 for (size_t i = 0; i < baseUri.size(); i++)
362 {
363 if (baseUri[i] != testedUri[i])
364 return false;
365 }
366
367 return true;
368 }
369
370
371 std::string Toolbox::FlattenUri(const UriComponents& components,
372 size_t fromLevel)
373 {
374 if (components.size() <= fromLevel)
375 {
376 return "/";
377 }
378 else
379 {
380 std::string r;
381
382 for (size_t i = fromLevel; i < components.size(); i++)
383 {
384 r += "/" + components[i];
385 }
386
387 return r;
388 }
389 }
390
391
392 #if ORTHANC_ENABLE_MD5 == 1
393 static char GetHexadecimalCharacter(uint8_t value)
394 {
395 assert(value < 16);
396
397 if (value < 10)
398 {
399 return value + '0';
400 }
401 else
402 {
403 return (value - 10) + 'a';
404 }
405 }
406
407
408 void Toolbox::ComputeMD5(std::string& result,
409 const std::string& data)
410 {
411 if (data.size() > 0)
412 {
413 ComputeMD5(result, &data[0], data.size());
414 }
415 else
416 {
417 ComputeMD5(result, NULL, 0);
418 }
419 }
420
421
422 void Toolbox::ComputeMD5(std::string& result,
423 const void* data,
424 size_t size)
425 {
426 md5_state_s state;
427 md5_init(&state);
428
429 if (size > 0)
430 {
431 md5_append(&state,
432 reinterpret_cast<const md5_byte_t*>(data),
433 static_cast<int>(size));
434 }
435
436 md5_byte_t actualHash[16];
437 md5_finish(&state, actualHash);
438
439 result.resize(32);
440 for (unsigned int i = 0; i < 16; i++)
441 {
442 result[2 * i] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] / 16));
443 result[2 * i + 1] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] % 16));
444 }
445 }
446 #endif
447
448
449 #if ORTHANC_ENABLE_BASE64 == 1
450 void Toolbox::EncodeBase64(std::string& result,
451 const std::string& data)
452 {
453 result.clear();
454 base64_encode(result, data);
455 }
456
457 void Toolbox::DecodeBase64(std::string& result,
458 const std::string& data)
459 {
460 for (size_t i = 0; i < data.length(); i++)
461 {
462 if (!isalnum(data[i]) &&
463 data[i] != '+' &&
464 data[i] != '/' &&
465 data[i] != '=')
466 {
467 // This is not a valid character for a Base64 string
468 throw OrthancException(ErrorCode_BadFileFormat);
469 }
470 }
471
472 result.clear();
473 base64_decode(result, data);
474 }
475
476
477 bool Toolbox::DecodeDataUriScheme(std::string& mime,
478 std::string& content,
479 const std::string& source)
480 {
481 boost::regex pattern("data:([^;]+);base64,([a-zA-Z0-9=+/]*)",
482 boost::regex::icase /* case insensitive search */);
483
484 boost::cmatch what;
485 if (regex_match(source.c_str(), what, pattern))
486 {
487 mime = what[1];
488 DecodeBase64(content, what[2]);
489 return true;
490 }
491 else
492 {
493 return false;
494 }
495 }
496
497
498 void Toolbox::EncodeDataUriScheme(std::string& result,
499 const std::string& mime,
500 const std::string& content)
501 {
502 result = "data:" + mime + ";base64,";
503 base64_encode(result, content);
504 }
505
506 #endif
507
508
509 #if ORTHANC_ENABLE_LOCALE == 1
510 static const char* GetBoostLocaleEncoding(const Encoding sourceEncoding)
511 {
512 switch (sourceEncoding)
513 {
514 case Encoding_Utf8:
515 return "UTF-8";
516
517 case Encoding_Ascii:
518 return "ASCII";
519
520 case Encoding_Latin1:
521 return "ISO-8859-1";
522
523 case Encoding_Latin2:
524 return "ISO-8859-2";
525
526 case Encoding_Latin3:
527 return "ISO-8859-3";
528
529 case Encoding_Latin4:
530 return "ISO-8859-4";
531
532 case Encoding_Latin5:
533 return "ISO-8859-9";
534
535 case Encoding_Cyrillic:
536 return "ISO-8859-5";
537
538 case Encoding_Windows1251:
539 return "WINDOWS-1251";
540
541 case Encoding_Arabic:
542 return "ISO-8859-6";
543
544 case Encoding_Greek:
545 return "ISO-8859-7";
546
547 case Encoding_Hebrew:
548 return "ISO-8859-8";
549
550 case Encoding_Japanese:
551 return "SHIFT-JIS";
552
553 case Encoding_Chinese:
554 return "GB18030";
555
556 case Encoding_Thai:
557 #if BOOST_LOCALE_WITH_ICU == 1
558 return "tis620.2533";
559 #else
560 return "TIS620.2533-0";
561 #endif
562
563 case Encoding_Korean:
564 return "ISO-IR-149";
565
566 case Encoding_JapaneseKanji:
567 return "JIS";
568
569 case Encoding_SimplifiedChinese:
570 return "GB2312";
571
572 default:
573 throw OrthancException(ErrorCode_NotImplemented);
574 }
575 }
576 #endif
577
578
579 #if ORTHANC_ENABLE_LOCALE == 1
580 // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2
581 std::string Toolbox::ConvertToUtf8(const std::string& source,
582 Encoding sourceEncoding,
583 bool hasCodeExtensions)
584 {
585 #if ORTHANC_STATIC_ICU == 1
586 if (globalIcuData_.empty())
587 {
588 throw OrthancException(ErrorCode_BadSequenceOfCalls,
589 "Call Toolbox::InitializeGlobalLocale()");
590 }
591 #endif
592
593 // The "::skip" flag makes boost skip invalid UTF-8
594 // characters. This can occur in badly-encoded DICOM files.
595
596 try
597 {
598 if (sourceEncoding == Encoding_Ascii)
599 {
600 return ConvertToAscii(source);
601 }
602 else
603 {
604 std::string s;
605
606 if (sourceEncoding == Encoding_Utf8)
607 {
608 // Already in UTF-8: No conversion is required, but we ensure
609 // the output is correctly encoded
610 s = boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
611 }
612 else
613 {
614 const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
615 s = boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip);
616 }
617
618 if (hasCodeExtensions)
619 {
620 std::string t;
621 RemoveIso2022EscapeSequences(t, s);
622 return t;
623 }
624 else
625 {
626 return s;
627 }
628 }
629 }
630 catch (std::runtime_error& e)
631 {
632 // Bad input string or bad encoding
633 LOG(INFO) << e.what();
634 return ConvertToAscii(source);
635 }
636 }
637 #endif
638
639
640 #if ORTHANC_ENABLE_LOCALE == 1
641 std::string Toolbox::ConvertFromUtf8(const std::string& source,
642 Encoding targetEncoding)
643 {
644 #if ORTHANC_STATIC_ICU == 1
645 if (globalIcuData_.empty())
646 {
647 throw OrthancException(ErrorCode_BadSequenceOfCalls,
648 "Call Toolbox::InitializeGlobalLocale()");
649 }
650 #endif
651
652 // The "::skip" flag makes boost skip invalid UTF-8
653 // characters. This can occur in badly-encoded DICOM files.
654
655 try
656 {
657 if (targetEncoding == Encoding_Utf8)
658 {
659 // Already in UTF-8: No conversion is required.
660 return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
661 }
662 else if (targetEncoding == Encoding_Ascii)
663 {
664 return ConvertToAscii(source);
665 }
666 else
667 {
668 const char* encoding = GetBoostLocaleEncoding(targetEncoding);
669 return boost::locale::conv::from_utf<char>(source, encoding, boost::locale::conv::skip);
670 }
671 }
672 catch (std::runtime_error&)
673 {
674 // Bad input string or bad encoding
675 return ConvertToAscii(source);
676 }
677 }
678 #endif
679
680
681 static bool IsAsciiCharacter(uint8_t c)
682 {
683 return (c != 0 &&
684 c <= 127 &&
685 (c == '\n' || !iscntrl(c)));
686 }
687
688
689 bool Toolbox::IsAsciiString(const void* data,
690 size_t size)
691 {
692 const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
693
694 for (size_t i = 0; i < size; i++, p++)
695 {
696 if (!IsAsciiCharacter(*p))
697 {
698 return false;
699 }
700 }
701
702 return true;
703 }
704
705
706 bool Toolbox::IsAsciiString(const std::string& s)
707 {
708 return IsAsciiString(s.c_str(), s.size());
709 }
710
711
712 std::string Toolbox::ConvertToAscii(const std::string& source)
713 {
714 std::string result;
715
716 result.reserve(source.size() + 1);
717 for (size_t i = 0; i < source.size(); i++)
718 {
719 if (IsAsciiCharacter(source[i]))
720 {
721 result.push_back(source[i]);
722 }
723 }
724
725 return result;
726 }
727
728
729 void Toolbox::ComputeSHA1(std::string& result,
730 const void* data,
731 size_t size)
732 {
733 boost::uuids::detail::sha1 sha1;
734
735 if (size > 0)
736 {
737 sha1.process_bytes(data, size);
738 }
739
740 unsigned int digest[5];
741
742 // Sanity check for the memory layout: A SHA-1 digest is 160 bits wide
743 assert(sizeof(unsigned int) == 4 && sizeof(digest) == (160 / 8));
744
745 sha1.get_digest(digest);
746
747 result.resize(8 * 5 + 4);
748 sprintf(&result[0], "%08x-%08x-%08x-%08x-%08x",
749 digest[0],
750 digest[1],
751 digest[2],
752 digest[3],
753 digest[4]);
754 }
755
756 void Toolbox::ComputeSHA1(std::string& result,
757 const std::string& data)
758 {
759 if (data.size() > 0)
760 {
761 ComputeSHA1(result, data.c_str(), data.size());
762 }
763 else
764 {
765 ComputeSHA1(result, NULL, 0);
766 }
767 }
768
769
770 bool Toolbox::IsSHA1(const void* str,
771 size_t size)
772 {
773 if (size == 0)
774 {
775 return false;
776 }
777
778 const char* start = reinterpret_cast<const char*>(str);
779 const char* end = start + size;
780
781 // Trim the beginning of the string
782 while (start < end)
783 {
784 if (*start == '\0' ||
785 isspace(*start))
786 {
787 start++;
788 }
789 else
790 {
791 break;
792 }
793 }
794
795 // Trim the trailing of the string
796 while (start < end)
797 {
798 if (*(end - 1) == '\0' ||
799 isspace(*(end - 1)))
800 {
801 end--;
802 }
803 else
804 {
805 break;
806 }
807 }
808
809 if (end - start != 44)
810 {
811 return false;
812 }
813
814 for (unsigned int i = 0; i < 44; i++)
815 {
816 if (i == 8 ||
817 i == 17 ||
818 i == 26 ||
819 i == 35)
820 {
821 if (start[i] != '-')
822 return false;
823 }
824 else
825 {
826 if (!isalnum(start[i]))
827 return false;
828 }
829 }
830
831 return true;
832 }
833
834
835 bool Toolbox::IsSHA1(const std::string& s)
836 {
837 if (s.size() == 0)
838 {
839 return false;
840 }
841 else
842 {
843 return IsSHA1(s.c_str(), s.size());
844 }
845 }
846
847
848 std::string Toolbox::StripSpaces(const std::string& source)
849 {
850 size_t first = 0;
851
852 while (first < source.length() &&
853 isspace(source[first]))
854 {
855 first++;
856 }
857
858 if (first == source.length())
859 {
860 // String containing only spaces
861 return "";
862 }
863
864 size_t last = source.length();
865 while (last > first &&
866 isspace(source[last - 1]))
867 {
868 last--;
869 }
870
871 assert(first <= last);
872 return source.substr(first, last - first);
873 }
874
875
876 static char Hex2Dec(char c)
877 {
878 return ((c >= '0' && c <= '9') ? c - '0' :
879 ((c >= 'a' && c <= 'f') ? c - 'a' + 10 : c - 'A' + 10));
880 }
881
882 void Toolbox::UrlDecode(std::string& s)
883 {
884 // http://en.wikipedia.org/wiki/Percent-encoding
885 // http://www.w3schools.com/tags/ref_urlencode.asp
886 // http://stackoverflow.com/questions/154536/encode-decode-urls-in-c
887
888 if (s.size() == 0)
889 {
890 return;
891 }
892
893 size_t source = 0;
894 size_t target = 0;
895
896 while (source < s.size())
897 {
898 if (s[source] == '%' &&
899 source + 2 < s.size() &&
900 isalnum(s[source + 1]) &&
901 isalnum(s[source + 2]))
902 {
903 s[target] = (Hex2Dec(s[source + 1]) << 4) | Hex2Dec(s[source + 2]);
904 source += 3;
905 target += 1;
906 }
907 else
908 {
909 if (s[source] == '+')
910 s[target] = ' ';
911 else
912 s[target] = s[source];
913
914 source++;
915 target++;
916 }
917 }
918
919 s.resize(target);
920 }
921
922
923 Endianness Toolbox::DetectEndianness()
924 {
925 // http://sourceforge.net/p/predef/wiki/Endianness/
926
927 uint32_t bufferView;
928
929 uint8_t* buffer = reinterpret_cast<uint8_t*>(&bufferView);
930
931 buffer[0] = 0x00;
932 buffer[1] = 0x01;
933 buffer[2] = 0x02;
934 buffer[3] = 0x03;
935
936 switch (bufferView)
937 {
938 case 0x00010203:
939 return Endianness_Big;
940
941 case 0x03020100:
942 return Endianness_Little;
943
944 default:
945 throw OrthancException(ErrorCode_NotImplemented);
946 }
947 }
948
949 std::string Toolbox::WildcardToRegularExpression(const std::string& source)
950 {
951 // TODO - Speed up this with a regular expression
952
953 std::string result = source;
954
955 // Escape all special characters
956 boost::replace_all(result, "\\", "\\\\");
957 boost::replace_all(result, "^", "\\^");
958 boost::replace_all(result, ".", "\\.");
959 boost::replace_all(result, "$", "\\$");
960 boost::replace_all(result, "|", "\\|");
961 boost::replace_all(result, "(", "\\(");
962 boost::replace_all(result, ")", "\\)");
963 boost::replace_all(result, "[", "\\[");
964 boost::replace_all(result, "]", "\\]");
965 boost::replace_all(result, "+", "\\+");
966 boost::replace_all(result, "/", "\\/");
967 boost::replace_all(result, "{", "\\{");
968 boost::replace_all(result, "}", "\\}");
969
970 // Convert wildcards '*' and '?' to their regex equivalents
971 boost::replace_all(result, "?", ".");
972 boost::replace_all(result, "*", ".*");
973
974 return result;
975 }
976
977
978 void Toolbox::TokenizeString(std::vector<std::string>& result,
979 const std::string& value,
980 char separator)
981 {
982 size_t countSeparators = 0;
983
984 for (size_t i = 0; i < value.size(); i++)
985 {
986 if (value[i] == separator)
987 {
988 countSeparators++;
989 }
990 }
991
992 result.clear();
993 result.reserve(countSeparators + 1);
994
995 std::string currentItem;
996
997 for (size_t i = 0; i < value.size(); i++)
998 {
999 if (value[i] == separator)
1000 {
1001 result.push_back(currentItem);
1002 currentItem.clear();
1003 }
1004 else
1005 {
1006 currentItem.push_back(value[i]);
1007 }
1008 }
1009
1010 result.push_back(currentItem);
1011 }
1012
1013
1014 #if ORTHANC_ENABLE_PUGIXML == 1
1015 class ChunkedBufferWriter : public pugi::xml_writer
1016 {
1017 private:
1018 ChunkedBuffer buffer_;
1019
1020 public:
1021 virtual void write(const void *data, size_t size)
1022 {
1023 if (size > 0)
1024 {
1025 buffer_.AddChunk(reinterpret_cast<const char*>(data), size);
1026 }
1027 }
1028
1029 void Flatten(std::string& s)
1030 {
1031 buffer_.Flatten(s);
1032 }
1033 };
1034
1035
1036 static void JsonToXmlInternal(pugi::xml_node& target,
1037 const Json::Value& source,
1038 const std::string& arrayElement)
1039 {
1040 // http://jsoncpp.sourceforge.net/value_8h_source.html#l00030
1041
1042 switch (source.type())
1043 {
1044 case Json::nullValue:
1045 {
1046 target.append_child(pugi::node_pcdata).set_value("null");
1047 break;
1048 }
1049
1050 case Json::intValue:
1051 {
1052 std::string s = boost::lexical_cast<std::string>(source.asInt());
1053 target.append_child(pugi::node_pcdata).set_value(s.c_str());
1054 break;
1055 }
1056
1057 case Json::uintValue:
1058 {
1059 std::string s = boost::lexical_cast<std::string>(source.asUInt());
1060 target.append_child(pugi::node_pcdata).set_value(s.c_str());
1061 break;
1062 }
1063
1064 case Json::realValue:
1065 {
1066 std::string s = boost::lexical_cast<std::string>(source.asFloat());
1067 target.append_child(pugi::node_pcdata).set_value(s.c_str());
1068 break;
1069 }
1070
1071 case Json::stringValue:
1072 {
1073 target.append_child(pugi::node_pcdata).set_value(source.asString().c_str());
1074 break;
1075 }
1076
1077 case Json::booleanValue:
1078 {
1079 target.append_child(pugi::node_pcdata).set_value(source.asBool() ? "true" : "false");
1080 break;
1081 }
1082
1083 case Json::arrayValue:
1084 {
1085 for (Json::Value::ArrayIndex i = 0; i < source.size(); i++)
1086 {
1087 pugi::xml_node node = target.append_child();
1088 node.set_name(arrayElement.c_str());
1089 JsonToXmlInternal(node, source[i], arrayElement);
1090 }
1091 break;
1092 }
1093
1094 case Json::objectValue:
1095 {
1096 Json::Value::Members members = source.getMemberNames();
1097
1098 for (size_t i = 0; i < members.size(); i++)
1099 {
1100 pugi::xml_node node = target.append_child();
1101 node.set_name(members[i].c_str());
1102 JsonToXmlInternal(node, source[members[i]], arrayElement);
1103 }
1104
1105 break;
1106 }
1107
1108 default:
1109 throw OrthancException(ErrorCode_NotImplemented);
1110 }
1111 }
1112
1113
1114 void Toolbox::JsonToXml(std::string& target,
1115 const Json::Value& source,
1116 const std::string& rootElement,
1117 const std::string& arrayElement)
1118 {
1119 pugi::xml_document doc;
1120
1121 pugi::xml_node n = doc.append_child(rootElement.c_str());
1122 JsonToXmlInternal(n, source, arrayElement);
1123
1124 pugi::xml_node decl = doc.prepend_child(pugi::node_declaration);
1125 decl.append_attribute("version").set_value("1.0");
1126 decl.append_attribute("encoding").set_value("utf-8");
1127
1128 XmlToString(target, doc);
1129 }
1130
1131 void Toolbox::XmlToString(std::string& target,
1132 const pugi::xml_document& source)
1133 {
1134 ChunkedBufferWriter writer;
1135 source.save(writer, " ", pugi::format_default, pugi::encoding_utf8);
1136 writer.Flatten(target);
1137 }
1138 #endif
1139
1140
1141
1142 bool Toolbox::IsInteger(const std::string& str)
1143 {
1144 std::string s = StripSpaces(str);
1145
1146 if (s.size() == 0)
1147 {
1148 return false;
1149 }
1150
1151 size_t pos = 0;
1152 if (s[0] == '-')
1153 {
1154 if (s.size() == 1)
1155 {
1156 return false;
1157 }
1158
1159 pos = 1;
1160 }
1161
1162 while (pos < s.size())
1163 {
1164 if (!isdigit(s[pos]))
1165 {
1166 return false;
1167 }
1168
1169 pos++;
1170 }
1171
1172 return true;
1173 }
1174
1175
1176 void Toolbox::CopyJsonWithoutComments(Json::Value& target,
1177 const Json::Value& source)
1178 {
1179 switch (source.type())
1180 {
1181 case Json::nullValue:
1182 target = Json::nullValue;
1183 break;
1184
1185 case Json::intValue:
1186 target = source.asInt64();
1187 break;
1188
1189 case Json::uintValue:
1190 target = source.asUInt64();
1191 break;
1192
1193 case Json::realValue:
1194 target = source.asDouble();
1195 break;
1196
1197 case Json::stringValue:
1198 target = source.asString();
1199 break;
1200
1201 case Json::booleanValue:
1202 target = source.asBool();
1203 break;
1204
1205 case Json::arrayValue:
1206 {
1207 target = Json::arrayValue;
1208 for (Json::Value::ArrayIndex i = 0; i < source.size(); i++)
1209 {
1210 Json::Value& item = target.append(Json::nullValue);
1211 CopyJsonWithoutComments(item, source[i]);
1212 }
1213
1214 break;
1215 }
1216
1217 case Json::objectValue:
1218 {
1219 target = Json::objectValue;
1220 Json::Value::Members members = source.getMemberNames();
1221 for (Json::Value::ArrayIndex i = 0; i < members.size(); i++)
1222 {
1223 const std::string item = members[i];
1224 CopyJsonWithoutComments(target[item], source[item]);
1225 }
1226
1227 break;
1228 }
1229
1230 default:
1231 break;
1232 }
1233 }
1234
1235
1236 bool Toolbox::StartsWith(const std::string& str,
1237 const std::string& prefix)
1238 {
1239 if (str.size() < prefix.size())
1240 {
1241 return false;
1242 }
1243 else
1244 {
1245 return str.compare(0, prefix.size(), prefix) == 0;
1246 }
1247 }
1248
1249
1250 static bool IsUnreservedCharacter(char c)
1251 {
1252 // This function checks whether "c" is an unserved character
1253 // wrt. an URI percent-encoding
1254 // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding%5Fin%5Fa%5FURI
1255
1256 return ((c >= 'A' && c <= 'Z') ||
1257 (c >= 'a' && c <= 'z') ||
1258 (c >= '0' && c <= '9') ||
1259 c == '-' ||
1260 c == '_' ||
1261 c == '.' ||
1262 c == '~');
1263 }
1264
1265 void Toolbox::UriEncode(std::string& target,
1266 const std::string& source)
1267 {
1268 // Estimate the length of the percent-encoded URI
1269 size_t length = 0;
1270
1271 for (size_t i = 0; i < source.size(); i++)
1272 {
1273 if (IsUnreservedCharacter(source[i]))
1274 {
1275 length += 1;
1276 }
1277 else
1278 {
1279 // This character must be percent-encoded
1280 length += 3;
1281 }
1282 }
1283
1284 target.clear();
1285 target.reserve(length);
1286
1287 for (size_t i = 0; i < source.size(); i++)
1288 {
1289 if (IsUnreservedCharacter(source[i]))
1290 {
1291 target.push_back(source[i]);
1292 }
1293 else
1294 {
1295 // This character must be percent-encoded
1296 uint8_t byte = static_cast<uint8_t>(source[i]);
1297 uint8_t a = byte >> 4;
1298 uint8_t b = byte & 0x0f;
1299
1300 target.push_back('%');
1301 target.push_back(a < 10 ? a + '0' : a - 10 + 'A');
1302 target.push_back(b < 10 ? b + '0' : b - 10 + 'A');
1303 }
1304 }
1305 }
1306
1307
1308 static bool HasField(const Json::Value& json,
1309 const std::string& key,
1310 Json::ValueType expectedType)
1311 {
1312 if (json.type() != Json::objectValue ||
1313 !json.isMember(key))
1314 {
1315 return false;
1316 }
1317 else if (json[key].type() == expectedType)
1318 {
1319 return true;
1320 }
1321 else
1322 {
1323 throw OrthancException(ErrorCode_BadParameterType);
1324 }
1325 }
1326
1327
1328 std::string Toolbox::GetJsonStringField(const Json::Value& json,
1329 const std::string& key,
1330 const std::string& defaultValue)
1331 {
1332 if (HasField(json, key, Json::stringValue))
1333 {
1334 return json[key].asString();
1335 }
1336 else
1337 {
1338 return defaultValue;
1339 }
1340 }
1341
1342
1343 bool Toolbox::GetJsonBooleanField(const ::Json::Value& json,
1344 const std::string& key,
1345 bool defaultValue)
1346 {
1347 if (HasField(json, key, Json::booleanValue))
1348 {
1349 return json[key].asBool();
1350 }
1351 else
1352 {
1353 return defaultValue;
1354 }
1355 }
1356
1357
1358 int Toolbox::GetJsonIntegerField(const ::Json::Value& json,
1359 const std::string& key,
1360 int defaultValue)
1361 {
1362 if (HasField(json, key, Json::intValue))
1363 {
1364 return json[key].asInt();
1365 }
1366 else
1367 {
1368 return defaultValue;
1369 }
1370 }
1371
1372
1373 unsigned int Toolbox::GetJsonUnsignedIntegerField(const ::Json::Value& json,
1374 const std::string& key,
1375 unsigned int defaultValue)
1376 {
1377 int v = GetJsonIntegerField(json, key, defaultValue);
1378
1379 if (v < 0)
1380 {
1381 throw OrthancException(ErrorCode_ParameterOutOfRange);
1382 }
1383 else
1384 {
1385 return static_cast<unsigned int>(v);
1386 }
1387 }
1388
1389
1390 bool Toolbox::IsUuid(const std::string& str)
1391 {
1392 if (str.size() != 36)
1393 {
1394 return false;
1395 }
1396
1397 for (size_t i = 0; i < str.length(); i++)
1398 {
1399 if (i == 8 || i == 13 || i == 18 || i == 23)
1400 {
1401 if (str[i] != '-')
1402 return false;
1403 }
1404 else
1405 {
1406 if (!isalnum(str[i]))
1407 return false;
1408 }
1409 }
1410
1411 return true;
1412 }
1413
1414
1415 bool Toolbox::StartsWithUuid(const std::string& str)
1416 {
1417 if (str.size() < 36)
1418 {
1419 return false;
1420 }
1421
1422 if (str.size() == 36)
1423 {
1424 return IsUuid(str);
1425 }
1426
1427 assert(str.size() > 36);
1428 if (!isspace(str[36]))
1429 {
1430 return false;
1431 }
1432
1433 return IsUuid(str.substr(0, 36));
1434 }
1435
1436
1437 #if ORTHANC_ENABLE_LOCALE == 1
1438 static std::unique_ptr<std::locale> globalLocale_;
1439
1440 static bool SetGlobalLocale(const char* locale)
1441 {
1442 try
1443 {
1444 if (locale == NULL)
1445 {
1446 LOG(WARNING) << "Falling back to system-wide default locale";
1447 globalLocale_.reset(new std::locale());
1448 }
1449 else
1450 {
1451 LOG(INFO) << "Using locale: \"" << locale << "\" for case-insensitive comparison of strings";
1452 globalLocale_.reset(new std::locale(locale));
1453 }
1454 }
1455 catch (std::runtime_error& e)
1456 {
1457 LOG(ERROR) << "Cannot set globale locale to "
1458 << (locale ? std::string(locale) : "(null)")
1459 << ": " << e.what();
1460 globalLocale_.reset(NULL);
1461 }
1462
1463 return (globalLocale_.get() != NULL);
1464 }
1465
1466
1467 static void InitializeIcu()
1468 {
1469 #if ORTHANC_STATIC_ICU == 1
1470 if (globalIcuData_.empty())
1471 {
1472 LOG(INFO) << "Setting up the ICU common data";
1473
1474 GzipCompressor compressor;
1475 compressor.Uncompress(globalIcuData_,
1476 FrameworkResources::GetFileResourceBuffer(FrameworkResources::LIBICU_DATA),
1477 FrameworkResources::GetFileResourceSize(FrameworkResources::LIBICU_DATA));
1478
1479 std::string md5;
1480 Toolbox::ComputeMD5(md5, globalIcuData_);
1481
1482 if (md5 != ORTHANC_ICU_DATA_MD5 ||
1483 globalIcuData_.empty())
1484 {
1485 throw OrthancException(ErrorCode_InternalError,
1486 "Cannot decode the ICU common data");
1487 }
1488
1489 // "ICU data is designed to be 16-aligned"
1490 // http://userguide.icu-project.org/icudata#TOC-Alignment
1491
1492 {
1493 static const size_t ALIGN = 16;
1494
1495 UErrorCode status = U_ZERO_ERROR;
1496
1497 if (reinterpret_cast<intptr_t>(globalIcuData_.c_str()) % ALIGN == 0)
1498 {
1499 // Data is already properly aligned
1500 udata_setCommonData(globalIcuData_.c_str(), &status);
1501 }
1502 else
1503 {
1504 std::string aligned;
1505 aligned.resize(globalIcuData_.size() + ALIGN - 1);
1506
1507 intptr_t offset = reinterpret_cast<intptr_t>(aligned.c_str()) % ALIGN;
1508 if (offset != 0)
1509 {
1510 offset = ALIGN - offset;
1511 }
1512
1513 if (offset + globalIcuData_.size() > aligned.size())
1514 {
1515 throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
1516 }
1517
1518 // We don't use "memcpy()", as it expects its data to be aligned
1519 const uint8_t* p = reinterpret_cast<uint8_t*>(&globalIcuData_[0]);
1520 uint8_t* q = reinterpret_cast<uint8_t*>(&aligned[0]) + offset;
1521 for (size_t i = 0; i < globalIcuData_.size(); i++, p++, q++)
1522 {
1523 *q = *p;
1524 }
1525
1526 globalIcuData_.swap(aligned);
1527
1528 const uint8_t* data = reinterpret_cast<const uint8_t*>(globalIcuData_.c_str()) + offset;
1529
1530 if (reinterpret_cast<intptr_t>(data) % ALIGN != 0)
1531 {
1532 throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
1533 }
1534 else
1535 {
1536 udata_setCommonData(data, &status);
1537 }
1538 }
1539
1540 if (status != U_ZERO_ERROR)
1541 {
1542 throw OrthancException(ErrorCode_InternalError, "Cannot initialize ICU");
1543 }
1544 }
1545
1546 if (Toolbox::DetectEndianness() != Endianness_Little)
1547 {
1548 // TODO - The data table must be swapped (uint16_t)
1549 throw OrthancException(ErrorCode_NotImplemented);
1550 }
1551
1552 // "First-use of ICU from a single thread before the
1553 // multi-threaded use of ICU begins", to make sure everything is
1554 // properly initialized (should not be mandatory in our
1555 // case). We let boost handle calls to "u_init()" and "u_cleanup()".
1556 // http://userguide.icu-project.org/design#TOC-ICU-Initialization-and-Termination
1557 uloc_getDefault();
1558 }
1559 #endif
1560 }
1561
1562 void Toolbox::InitializeGlobalLocale(const char* locale)
1563 {
1564 InitializeIcu();
1565
1566 #if defined(__unix__) && ORTHANC_SANDBOXED != 1
1567 static const char* LOCALTIME = "/etc/localtime";
1568
1569 if (!SystemToolbox::IsExistingFile(LOCALTIME))
1570 {
1571 // Check out file
1572 // "boost_1_69_0/libs/locale/src/icu/time_zone.cpp": Direct
1573 // access is made to this file if ICU is not used. Crash arises
1574 // in Boost if the file is a symbolic link to a non-existing
1575 // file (such as in Ubuntu 16.04 base Docker image).
1576 throw OrthancException(
1577 ErrorCode_InternalError,
1578 "On UNIX-like systems, the file " + std::string(LOCALTIME) +
1579 " must be present on the filesystem (install \"tzdata\" package on Debian)");
1580 }
1581 #endif
1582
1583 // Make Orthanc use English, United States locale
1584 // Linux: use "en_US.UTF-8"
1585 // Windows: use ""
1586 // Wine: use NULL
1587
1588 #if defined(__MINGW32__)
1589 // Visibly, there is no support of locales in MinGW yet
1590 // http://mingw.5.n7.nabble.com/How-to-use-std-locale-global-with-MinGW-correct-td33048.html
1591 static const char* DEFAULT_LOCALE = NULL;
1592 #elif defined(_WIN32)
1593 // For Windows: use default locale (using "en_US" does not work)
1594 static const char* DEFAULT_LOCALE = "";
1595 #else
1596 // For Linux & cie
1597 static const char* DEFAULT_LOCALE = "en_US.UTF-8";
1598 #endif
1599
1600 bool ok;
1601
1602 if (locale == NULL)
1603 {
1604 ok = SetGlobalLocale(DEFAULT_LOCALE);
1605
1606 #if defined(__MINGW32__)
1607 LOG(WARNING) << "This is a MinGW build, case-insensitive comparison of "
1608 << "strings with accents will not work outside of Wine";
1609 #endif
1610 }
1611 else
1612 {
1613 ok = SetGlobalLocale(locale);
1614 }
1615
1616 if (!ok &&
1617 !SetGlobalLocale(NULL))
1618 {
1619 throw OrthancException(ErrorCode_InternalError,
1620 "Cannot initialize global locale");
1621 }
1622
1623 }
1624
1625
1626 void Toolbox::FinalizeGlobalLocale()
1627 {
1628 globalLocale_.reset();
1629 }
1630
1631
1632 std::string Toolbox::ToUpperCaseWithAccents(const std::string& source)
1633 {
1634 bool error = (globalLocale_.get() == NULL);
1635
1636 #if ORTHANC_STATIC_ICU == 1
1637 if (globalIcuData_.empty())
1638 {
1639 error = true;
1640 }
1641 #endif
1642
1643 if (error)
1644 {
1645 throw OrthancException(ErrorCode_BadSequenceOfCalls,
1646 "No global locale was set, call Toolbox::InitializeGlobalLocale()");
1647 }
1648
1649 /**
1650 * A few notes about locales:
1651 *
1652 * (1) We don't use "case folding":
1653 * http://www.boost.org/doc/libs/1_64_0/libs/locale/doc/html/conversions.html
1654 *
1655 * Characters are made uppercase one by one. This is because, in
1656 * static builds, we are using iconv, which is visibly not
1657 * supported correctly (TODO: Understand why). Case folding seems
1658 * to be working correctly if using the default backend under
1659 * Linux (ICU or POSIX?). If one wishes to use case folding, one
1660 * would use:
1661 *
1662 * boost::locale::generator gen;
1663 * std::locale::global(gen(DEFAULT_LOCALE));
1664 * return boost::locale::to_upper(source);
1665 *
1666 * (2) The function "boost::algorithm::to_upper_copy" does not
1667 * make use of the "std::locale::global()". We therefore create a
1668 * global variable "globalLocale_".
1669 *
1670 * (3) The variant of "boost::algorithm::to_upper_copy()" that
1671 * uses std::string does not work properly. We need to apply it
1672 * one wide strings (std::wstring). This explains the two calls to
1673 * "utf_to_utf" in order to convert to/from std::wstring.
1674 **/
1675
1676 std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip);
1677 w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_);
1678 return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip);
1679 }
1680 #endif
1681
1682
1683
1684 #if ORTHANC_ENABLE_SSL == 0
1685 /**
1686 * OpenSSL is disabled
1687 **/
1688 void Toolbox::InitializeOpenSsl()
1689 {
1690 }
1691
1692 void Toolbox::FinalizeOpenSsl()
1693 {
1694 }
1695
1696
1697 #elif (ORTHANC_ENABLE_SSL == 1 && \
1698 OPENSSL_VERSION_NUMBER < 0x10100000L)
1699 /**
1700 * OpenSSL < 1.1.0
1701 **/
1702 void Toolbox::InitializeOpenSsl()
1703 {
1704 // https://wiki.openssl.org/index.php/Library_Initialization
1705 SSL_library_init();
1706 SSL_load_error_strings();
1707 OpenSSL_add_all_algorithms();
1708 ERR_load_crypto_strings();
1709 }
1710
1711 void Toolbox::FinalizeOpenSsl()
1712 {
1713 // Finalize OpenSSL
1714 // https://wiki.openssl.org/index.php/Library_Initialization#Cleanup
1715 #ifdef FIPS_mode_set
1716 FIPS_mode_set(0);
1717 #endif
1718
1719 #if !defined(OPENSSL_NO_ENGINE)
1720 ENGINE_cleanup();
1721 #endif
1722
1723 CONF_modules_unload(1);
1724 EVP_cleanup();
1725 CRYPTO_cleanup_all_ex_data();
1726 ERR_remove_state(0);
1727 ERR_free_strings();
1728 }
1729
1730
1731 #elif (ORTHANC_ENABLE_SSL == 1 && \
1732 OPENSSL_VERSION_NUMBER >= 0x10100000L)
1733 /**
1734 * OpenSSL >= 1.1.0. In this case, the initialization is
1735 * automatically done by the functions of OpenSSL.
1736 * https://wiki.openssl.org/index.php/Library_Initialization
1737 **/
1738 void Toolbox::InitializeOpenSsl()
1739 {
1740 }
1741
1742 void Toolbox::FinalizeOpenSsl()
1743 {
1744 }
1745
1746 #else
1747 # error "Support your platform here"
1748 #endif
1749
1750
1751
1752 std::string Toolbox::GenerateUuid()
1753 {
1754 #ifdef WIN32
1755 UUID uuid;
1756 UuidCreate ( &uuid );
1757
1758 unsigned char * str;
1759 UuidToStringA ( &uuid, &str );
1760
1761 std::string s( ( char* ) str );
1762
1763 RpcStringFreeA ( &str );
1764 #else
1765 uuid_t uuid;
1766 uuid_generate_random ( uuid );
1767 char s[37];
1768 uuid_unparse ( uuid, s );
1769 #endif
1770 return s;
1771 }
1772
1773
1774 namespace
1775 {
1776 // Anonymous namespace to avoid clashes between compilation modules
1777
1778 class VariableFormatter
1779 {
1780 public:
1781 typedef std::map<std::string, std::string> Dictionary;
1782
1783 private:
1784 const Dictionary& dictionary_;
1785
1786 public:
1787 VariableFormatter(const Dictionary& dictionary) :
1788 dictionary_(dictionary)
1789 {
1790 }
1791
1792 template<typename Out>
1793 Out operator()(const boost::smatch& what,
1794 Out out) const
1795 {
1796 if (!what[1].str().empty())
1797 {
1798 // Variable without a default value
1799 Dictionary::const_iterator found = dictionary_.find(what[1]);
1800
1801 if (found != dictionary_.end())
1802 {
1803 const std::string& value = found->second;
1804 out = std::copy(value.begin(), value.end(), out);
1805 }
1806 }
1807 else
1808 {
1809 // Variable with a default value
1810 std::string key;
1811 std::string defaultValue;
1812
1813 if (!what[2].str().empty())
1814 {
1815 key = what[2].str();
1816 defaultValue = what[3].str();
1817 }
1818 else if (!what[4].str().empty())
1819 {
1820 key = what[4].str();
1821 defaultValue = what[5].str();
1822 }
1823 else if (!what[6].str().empty())
1824 {
1825 key = what[6].str();
1826 defaultValue = what[7].str();
1827 }
1828 else
1829 {
1830 throw OrthancException(ErrorCode_InternalError);
1831 }
1832
1833 Dictionary::const_iterator found = dictionary_.find(key);
1834
1835 if (found == dictionary_.end())
1836 {
1837 out = std::copy(defaultValue.begin(), defaultValue.end(), out);
1838 }
1839 else
1840 {
1841 const std::string& value = found->second;
1842 out = std::copy(value.begin(), value.end(), out);
1843 }
1844 }
1845
1846 return out;
1847 }
1848 };
1849 }
1850
1851
1852 std::string Toolbox::SubstituteVariables(const std::string& source,
1853 const std::map<std::string, std::string>& dictionary)
1854 {
1855 const boost::regex pattern("\\$\\{([^:]*?)\\}|" // ${what[1]}
1856 "\\$\\{([^:]*?):-([^'\"]*?)\\}|" // ${what[2]:-what[3]}
1857 "\\$\\{([^:]*?):-\"([^\"]*?)\"\\}|" // ${what[4]:-"what[5]"}
1858 "\\$\\{([^:]*?):-'([^']*?)'\\}"); // ${what[6]:-'what[7]'}
1859
1860 VariableFormatter formatter(dictionary);
1861
1862 return boost::regex_replace(source, pattern, formatter);
1863 }
1864
1865
1866 namespace Iso2022
1867 {
1868 /**
1869 Returns whether the string s contains a single-byte control message
1870 at index i
1871 **/
1872 static inline bool IsControlMessage1(const std::string& s, size_t i)
1873 {
1874 if (i < s.size())
1875 {
1876 char c = s[i];
1877 return
1878 (c == '\x0f') || // Locking shift zero
1879 (c == '\x0e'); // Locking shift one
1880 }
1881 else
1882 {
1883 return false;
1884 }
1885 }
1886
1887 /**
1888 Returns whether the string s contains a double-byte control message
1889 at index i
1890 **/
1891 static inline size_t IsControlMessage2(const std::string& s, size_t i)
1892 {
1893 if (i + 1 < s.size())
1894 {
1895 char c1 = s[i];
1896 char c2 = s[i + 1];
1897 return (c1 == 0x1b) && (
1898 (c2 == '\x6e') || // Locking shift two
1899 (c2 == '\x6f') || // Locking shift three
1900 (c2 == '\x4e') || // Single shift two (alt)
1901 (c2 == '\x4f') || // Single shift three (alt)
1902 (c2 == '\x7c') || // Locking shift three right
1903 (c2 == '\x7d') || // Locking shift two right
1904 (c2 == '\x7e') // Locking shift one right
1905 );
1906 }
1907 else
1908 {
1909 return false;
1910 }
1911 }
1912
1913 /**
1914 Returns whether the string s contains a triple-byte control message
1915 at index i
1916 **/
1917 static inline size_t IsControlMessage3(const std::string& s, size_t i)
1918 {
1919 if (i + 2 < s.size())
1920 {
1921 char c1 = s[i];
1922 char c2 = s[i + 1];
1923 char c3 = s[i + 2];
1924 return ((c1 == '\x8e' && c2 == 0x1b && c3 == '\x4e') ||
1925 (c1 == '\x8f' && c2 == 0x1b && c3 == '\x4f'));
1926 }
1927 else
1928 {
1929 return false;
1930 }
1931 }
1932
1933 /**
1934 This function returns true if the index i in the supplied string s:
1935 - is valid
1936 - contains the c character
1937 This function returns false otherwise.
1938 **/
1939 static inline bool TestCharValue(
1940 const std::string& s, size_t i, char c)
1941 {
1942 if (i < s.size())
1943 return s[i] == c;
1944 else
1945 return false;
1946 }
1947
1948 /**
1949 This function returns true if the index i in the supplied string s:
1950 - is valid
1951 - has a c character that is >= cMin and <= cMax (included)
1952 This function returns false otherwise.
1953 **/
1954 static inline bool TestCharRange(
1955 const std::string& s, size_t i, char cMin, char cMax)
1956 {
1957 if (i < s.size())
1958 return (s[i] >= cMin) && (s[i] <= cMax);
1959 else
1960 return false;
1961 }
1962
1963 /**
1964 This function returns the total length in bytes of the escape sequence
1965 located in string s at index i, if there is one, or 0 otherwise.
1966 **/
1967 static inline size_t GetEscapeSequenceLength(const std::string& s, size_t i)
1968 {
1969 if (TestCharValue(s, i, 0x1b))
1970 {
1971 size_t j = i+1;
1972
1973 // advance reading cursor while we are in a sequence
1974 while (TestCharRange(s, j, '\x20', '\x2f'))
1975 ++j;
1976
1977 // check there is a valid termination byte AND we're long enough (there
1978 // must be at least one byte between 0x20 and 0x2f
1979 if (TestCharRange(s, j, '\x30', '\x7f') && (j - i) >= 2)
1980 return j - i + 1;
1981 else
1982 return 0;
1983 }
1984 else
1985 return 0;
1986 }
1987 }
1988
1989
1990
1991 /**
1992 This function will strip all ISO/IEC 2022 control codes and escape
1993 sequences.
1994 Please see https://en.wikipedia.org/wiki/ISO/IEC_2022 (as of 2019-02)
1995 for a list of those.
1996
1997 Please note that this operation is potentially destructive, because
1998 it removes the character set information from the byte stream.
1999
2000 However, in the case where the encoding is unique, then suppressing
2001 the escape sequences allows one to provide us with a clean string after
2002 conversion to utf-8 with boost.
2003 **/
2004 void Toolbox::RemoveIso2022EscapeSequences(std::string& dest, const std::string& src)
2005 {
2006 // we need AT MOST the same size as the source string in the output
2007 dest.clear();
2008 if (dest.capacity() < src.size())
2009 dest.reserve(src.size());
2010
2011 size_t i = 0;
2012
2013 // uint8_t view to the string
2014 while (i < src.size())
2015 {
2016 size_t j = i;
2017
2018 // The i index will only be incremented if a message is detected
2019 // in that case, the message is skipped and the index is set to the
2020 // next position to read
2021 if (Iso2022::IsControlMessage1(src, i))
2022 i += 1;
2023 else if (Iso2022::IsControlMessage2(src, i))
2024 i += 2;
2025 else if (Iso2022::IsControlMessage3(src, i))
2026 i += 3;
2027 else
2028 i += Iso2022::GetEscapeSequenceLength(src, i);
2029
2030 // if the index was NOT incremented, this means there was no message at
2031 // this location: we then may copy the character at this index and
2032 // increment the index to point to the next read position
2033 if (j == i)
2034 {
2035 dest.push_back(src[i]);
2036 i++;
2037 }
2038 }
2039 }
2040
2041
2042 void Toolbox::Utf8ToUnicodeCharacter(uint32_t& unicode,
2043 size_t& length,
2044 const std::string& utf8,
2045 size_t position)
2046 {
2047 // https://en.wikipedia.org/wiki/UTF-8
2048
2049 static const uint8_t MASK_IS_1_BYTE = 0x80; // printf '0x%x\n' "$((2#10000000))"
2050 static const uint8_t TEST_IS_1_BYTE = 0x00;
2051
2052 static const uint8_t MASK_IS_2_BYTES = 0xe0; // printf '0x%x\n' "$((2#11100000))"
2053 static const uint8_t TEST_IS_2_BYTES = 0xc0; // printf '0x%x\n' "$((2#11000000))"
2054
2055 static const uint8_t MASK_IS_3_BYTES = 0xf0; // printf '0x%x\n' "$((2#11110000))"
2056 static const uint8_t TEST_IS_3_BYTES = 0xe0; // printf '0x%x\n' "$((2#11100000))"
2057
2058 static const uint8_t MASK_IS_4_BYTES = 0xf8; // printf '0x%x\n' "$((2#11111000))"
2059 static const uint8_t TEST_IS_4_BYTES = 0xf0; // printf '0x%x\n' "$((2#11110000))"
2060
2061 static const uint8_t MASK_CONTINUATION = 0xc0; // printf '0x%x\n' "$((2#11000000))"
2062 static const uint8_t TEST_CONTINUATION = 0x80; // printf '0x%x\n' "$((2#10000000))"
2063
2064 if (position >= utf8.size())
2065 {
2066 throw OrthancException(ErrorCode_ParameterOutOfRange);
2067 }
2068
2069 assert(sizeof(uint8_t) == sizeof(char));
2070 const uint8_t* buffer = reinterpret_cast<const uint8_t*>(utf8.c_str()) + position;
2071
2072 if ((buffer[0] & MASK_IS_1_BYTE) == TEST_IS_1_BYTE)
2073 {
2074 length = 1;
2075 unicode = buffer[0] & ~MASK_IS_1_BYTE;
2076 }
2077 else if ((buffer[0] & MASK_IS_2_BYTES) == TEST_IS_2_BYTES &&
2078 position + 1 < utf8.size() &&
2079 (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION)
2080 {
2081 length = 2;
2082 uint32_t a = buffer[0] & ~MASK_IS_2_BYTES;
2083 uint32_t b = buffer[1] & ~MASK_CONTINUATION;
2084 unicode = (a << 6) | b;
2085 }
2086 else if ((buffer[0] & MASK_IS_3_BYTES) == TEST_IS_3_BYTES &&
2087 position + 2 < utf8.size() &&
2088 (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION &&
2089 (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION)
2090 {
2091 length = 3;
2092 uint32_t a = buffer[0] & ~MASK_IS_3_BYTES;
2093 uint32_t b = buffer[1] & ~MASK_CONTINUATION;
2094 uint32_t c = buffer[2] & ~MASK_CONTINUATION;
2095 unicode = (a << 12) | (b << 6) | c;
2096 }
2097 else if ((buffer[0] & MASK_IS_4_BYTES) == TEST_IS_4_BYTES &&
2098 position + 3 < utf8.size() &&
2099 (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION &&
2100 (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION &&
2101 (buffer[3] & MASK_CONTINUATION) == TEST_CONTINUATION)
2102 {
2103 length = 4;
2104 uint32_t a = buffer[0] & ~MASK_IS_4_BYTES;
2105 uint32_t b = buffer[1] & ~MASK_CONTINUATION;
2106 uint32_t c = buffer[2] & ~MASK_CONTINUATION;
2107 uint32_t d = buffer[3] & ~MASK_CONTINUATION;
2108 unicode = (a << 18) | (b << 12) | (c << 6) | d;
2109 }
2110 else
2111 {
2112 // This is not a valid UTF-8 encoding
2113 throw OrthancException(ErrorCode_BadFileFormat, "Invalid UTF-8 string");
2114 }
2115 }
2116
2117
2118 std::string Toolbox::LargeHexadecimalToDecimal(const std::string& hex)
2119 {
2120 /**
2121 * NB: Focus of the code below is *not* efficiency, but
2122 * readability!
2123 **/
2124
2125 for (size_t i = 0; i < hex.size(); i++)
2126 {
2127 const char c = hex[i];
2128 if (!((c >= 'A' && c <= 'F') ||
2129 (c >= 'a' && c <= 'f') ||
2130 (c >= '0' && c <= '9')))
2131 {
2132 throw Orthanc::OrthancException(Orthanc::ErrorCode_ParameterOutOfRange,
2133 "Not an hexadecimal number");
2134 }
2135 }
2136
2137 std::vector<uint8_t> decimal;
2138 decimal.push_back(0);
2139
2140 for (size_t i = 0; i < hex.size(); i++)
2141 {
2142 uint8_t hexDigit = static_cast<uint8_t>(Hex2Dec(hex[i]));
2143 assert(hexDigit <= 15);
2144
2145 for (size_t j = 0; j < decimal.size(); j++)
2146 {
2147 uint8_t val = static_cast<uint8_t>(decimal[j]) * 16 + hexDigit; // Maximum: 9 * 16 + 15
2148 assert(val <= 159 /* == 9 * 16 + 15 */);
2149
2150 decimal[j] = val % 10;
2151 hexDigit = val / 10;
2152 assert(hexDigit <= 15 /* == 159 / 10 */);
2153 }
2154
2155 while (hexDigit > 0)
2156 {
2157 decimal.push_back(hexDigit % 10);
2158 hexDigit /= 10;
2159 }
2160 }
2161
2162 size_t start = 0;
2163 while (start < decimal.size() &&
2164 decimal[start] == '0')
2165 {
2166 start++;
2167 }
2168
2169 std::string s;
2170 s.reserve(decimal.size() - start);
2171
2172 for (size_t i = decimal.size(); i > start; i--)
2173 {
2174 s.push_back(decimal[i - 1] + '0');
2175 }
2176
2177 return s;
2178 }
2179
2180
2181 std::string Toolbox::GenerateDicomPrivateUniqueIdentifier()
2182 {
2183 /**
2184 * REFERENCE: "Creating a Privately Defined Unique Identifier
2185 * (Informative)" / "UUID Derived UID"
2186 * http://dicom.nema.org/medical/dicom/2019a/output/chtml/part05/sect_B.2.html
2187 * https://stackoverflow.com/a/46316162/881731
2188 **/
2189
2190 std::string uuid = GenerateUuid();
2191 assert(IsUuid(uuid) && uuid.size() == 36);
2192
2193 /**
2194 * After removing the four dashes ("-") out of the 36-character
2195 * UUID, we get a large hexadecimal number with 32 characters,
2196 * each of those characters lying in the range [0,16[. The large
2197 * number is thus in the [0,16^32[ = [0,256^16[ range. This number
2198 * has a maximum of 39 decimal digits, as can be seen in Python:
2199 *
2200 * # python -c 'import math; print(math.log(16**32))/math.log(10))'
2201 * 38.531839445
2202 *
2203 * We now to convert the large hexadecimal number to a decimal
2204 * number with up to 39 digits, remove the leading zeros, then
2205 * prefix it with "2.25."
2206 **/
2207
2208 // Remove the dashes
2209 std::string hex = (uuid.substr(0, 8) +
2210 uuid.substr(9, 4) +
2211 uuid.substr(14, 4) +
2212 uuid.substr(19, 4) +
2213 uuid.substr(24, 12));
2214 assert(hex.size() == 32);
2215
2216 return "2.25." + LargeHexadecimalToDecimal(hex);
2217 }
2218 }
2219
2220
2221
2222 OrthancLinesIterator* OrthancLinesIterator_Create(const std::string& content)
2223 {
2224 return reinterpret_cast<OrthancLinesIterator*>(new Orthanc::Toolbox::LinesIterator(content));
2225 }
2226
2227
2228 bool OrthancLinesIterator_GetLine(std::string& target,
2229 const OrthancLinesIterator* iterator)
2230 {
2231 if (iterator != NULL)
2232 {
2233 return reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator)->GetLine(target);
2234 }
2235 else
2236 {
2237 return false;
2238 }
2239 }
2240
2241
2242 void OrthancLinesIterator_Next(OrthancLinesIterator* iterator)
2243 {
2244 if (iterator != NULL)
2245 {
2246 reinterpret_cast<Orthanc::Toolbox::LinesIterator*>(iterator)->Next();
2247 }
2248 }
2249
2250
2251 void OrthancLinesIterator_Free(OrthancLinesIterator* iterator)
2252 {
2253 if (iterator != NULL)
2254 {
2255 delete reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator);
2256 }
2257 }