Mercurial > hg > orthanc
view OrthancFramework/Sources/Toolbox.cpp @ 4200:7112a8af0b63
cppcheck
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Wed, 16 Sep 2020 20:53:31 +0200 |
parents | bf7b9edf6b81 |
children | 4d42408da117 |
line wrap: on
line source
/** * Orthanc - A Lightweight, RESTful DICOM Store * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics * Department, University Hospital of Liege, Belgium * Copyright (C) 2017-2020 Osimis S.A., Belgium * * This program is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation, either version 3 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/>. **/ #include "PrecompiledHeaders.h" #include "Toolbox.h" #include "Compatibility.h" #include "OrthancException.h" #include "Logging.h" #include <boost/algorithm/string/case_conv.hpp> #include <boost/algorithm/string/replace.hpp> #include <boost/lexical_cast.hpp> #include <boost/regex.hpp> #if BOOST_VERSION >= 106600 # include <boost/uuid/detail/sha1.hpp> #else # include <boost/uuid/sha1.hpp> #endif #include <string> #include <stdint.h> #include <string.h> #include <algorithm> #include <ctype.h> #if ORTHANC_ENABLE_MD5 == 1 // TODO - Could be replaced by <boost/uuid/detail/md5.hpp> starting // with Boost >= 1.66.0 # include "../Resources/ThirdParty/md5/md5.h" #endif #if ORTHANC_ENABLE_BASE64 == 1 # include "../Resources/ThirdParty/base64/base64.h" #endif #if ORTHANC_ENABLE_LOCALE == 1 # include <boost/locale.hpp> #endif #if ORTHANC_ENABLE_SSL == 1 // For OpenSSL initialization and finalization # include <openssl/conf.h> # include <openssl/engine.h> # include <openssl/err.h> # include <openssl/evp.h> # include <openssl/ssl.h> #endif #if defined(_MSC_VER) && (_MSC_VER < 1800) // Patch for the missing "_strtoll" symbol when compiling with Visual Studio < 2013 extern "C" { int64_t _strtoi64(const char *nptr, char **endptr, int base); int64_t strtoll(const char *nptr, char **endptr, int base) { return _strtoi64(nptr, endptr, base); } } #endif #if defined(_WIN32) # include <windows.h> // For ::Sleep #endif #if ORTHANC_ENABLE_PUGIXML == 1 # include "ChunkedBuffer.h" #endif // Inclusions for UUID // http://stackoverflow.com/a/1626302 extern "C" { #if defined(_WIN32) # include <rpc.h> #else # include <uuid/uuid.h> #endif } #if defined(ORTHANC_STATIC_ICU) # if (ORTHANC_STATIC_ICU == 1 && ORTHANC_ENABLE_LOCALE == 1) # include <OrthancFrameworkResources.h> # include <unicode/udata.h> # include <unicode/uloc.h> # include "Compression/GzipCompressor.h" static std::string globalIcuData_; extern "C" { // This is dummy content for the "icudt58_dat" (resp. "icudt63_dat") // global variable from the autogenerated "icudt58l_dat.c" // (resp. "icudt63l_dat.c") file that contains a huge C array. In // Orthanc, this array is compressed using gzip and attached as a // resource, then uncompressed during the launch of Orthanc by // static function "InitializeIcu()". struct { double bogus; uint8_t *bytes; } U_ICUDATA_ENTRY_POINT = { 0.0, NULL }; } # if defined(__LSB_VERSION__) extern "C" { /** * The "tzname" global variable is declared as "extern" but is not * defined in any compilation module, if using Linux Standard Base, * as soon as OpenSSL or cURL is in use on Ubuntu >= 18.04 (glibc >= * 2.27). The variable "__tzname" is always properly declared *and* * defined. The reason is unclear, and is maybe a bug in the gcc 4.8 * linker that is used by LSB if facing a weak symbol (as "tzname"). * This makes Orthanc crash if the timezone is set to UTC. * https://groups.google.com/d/msg/orthanc-users/0m8sxxwSm1E/2p8du_89CAAJ **/ char *tzname[2] = { (char *) "GMT", (char *) "GMT" }; } # endif # endif #endif #if defined(__unix__) && ORTHANC_SANDBOXED != 1 # include "SystemToolbox.h" // Check out "InitializeGlobalLocale()" #endif namespace Orthanc { void Toolbox::LinesIterator::FindEndOfLine() { lineEnd_ = lineStart_; while (lineEnd_ < content_.size() && content_[lineEnd_] != '\n' && content_[lineEnd_] != '\r') { lineEnd_ += 1; } } Toolbox::LinesIterator::LinesIterator(const std::string& content) : content_(content), lineStart_(0) { FindEndOfLine(); } bool Toolbox::LinesIterator::GetLine(std::string& target) const { assert(lineStart_ <= content_.size() && lineEnd_ <= content_.size() && lineStart_ <= lineEnd_); if (lineStart_ == content_.size()) { return false; } else { target = content_.substr(lineStart_, lineEnd_ - lineStart_); return true; } } void Toolbox::LinesIterator::Next() { lineStart_ = lineEnd_; if (lineStart_ != content_.size()) { assert(content_[lineStart_] == '\r' || content_[lineStart_] == '\n'); char second; if (content_[lineStart_] == '\r') { second = '\n'; } else { second = '\r'; } lineStart_ += 1; if (lineStart_ < content_.size() && content_[lineStart_] == second) { lineStart_ += 1; } FindEndOfLine(); } } void Toolbox::ToUpperCase(std::string& s) { std::transform(s.begin(), s.end(), s.begin(), toupper); } void Toolbox::ToLowerCase(std::string& s) { std::transform(s.begin(), s.end(), s.begin(), tolower); } void Toolbox::ToUpperCase(std::string& result, const std::string& source) { result = source; ToUpperCase(result); } void Toolbox::ToLowerCase(std::string& result, const std::string& source) { result = source; ToLowerCase(result); } void Toolbox::SplitUriComponents(UriComponents& components, const std::string& uri) { static const char URI_SEPARATOR = '/'; components.clear(); if (uri.size() == 0 || uri[0] != URI_SEPARATOR) { throw OrthancException(ErrorCode_UriSyntax); } // Count the number of slashes in the URI to make an assumption // about the number of components in the URI unsigned int estimatedSize = 0; for (unsigned int i = 0; i < uri.size(); i++) { if (uri[i] == URI_SEPARATOR) estimatedSize++; } components.reserve(estimatedSize - 1); unsigned int start = 1; unsigned int end = 1; while (end < uri.size()) { // This is the loop invariant assert(uri[start - 1] == '/' && (end >= start)); if (uri[end] == '/') { components.push_back(std::string(&uri[start], end - start)); end++; start = end; } else { end++; } } if (start < uri.size()) { components.push_back(std::string(&uri[start], end - start)); } for (size_t i = 0; i < components.size(); i++) { if (components[i].size() == 0) { // Empty component, as in: "/coucou//e" throw OrthancException(ErrorCode_UriSyntax); } } } void Toolbox::TruncateUri(UriComponents& target, const UriComponents& source, size_t fromLevel) { target.clear(); if (source.size() > fromLevel) { target.resize(source.size() - fromLevel); size_t j = 0; for (size_t i = fromLevel; i < source.size(); i++, j++) { target[j] = source[i]; } assert(j == target.size()); } } bool Toolbox::IsChildUri(const UriComponents& baseUri, const UriComponents& testedUri) { if (testedUri.size() < baseUri.size()) { return false; } for (size_t i = 0; i < baseUri.size(); i++) { if (baseUri[i] != testedUri[i]) return false; } return true; } std::string Toolbox::FlattenUri(const UriComponents& components, size_t fromLevel) { if (components.size() <= fromLevel) { return "/"; } else { std::string r; for (size_t i = fromLevel; i < components.size(); i++) { r += "/" + components[i]; } return r; } } #if ORTHANC_ENABLE_MD5 == 1 static char GetHexadecimalCharacter(uint8_t value) { assert(value < 16); if (value < 10) { return value + '0'; } else { return (value - 10) + 'a'; } } void Toolbox::ComputeMD5(std::string& result, const std::string& data) { if (data.size() > 0) { ComputeMD5(result, &data[0], data.size()); } else { ComputeMD5(result, NULL, 0); } } void Toolbox::ComputeMD5(std::string& result, const void* data, size_t size) { md5_state_s state; md5_init(&state); if (size > 0) { md5_append(&state, reinterpret_cast<const md5_byte_t*>(data), static_cast<int>(size)); } md5_byte_t actualHash[16]; md5_finish(&state, actualHash); result.resize(32); for (unsigned int i = 0; i < 16; i++) { result[2 * i] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] / 16)); result[2 * i + 1] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] % 16)); } } #endif #if ORTHANC_ENABLE_BASE64 == 1 void Toolbox::EncodeBase64(std::string& result, const std::string& data) { result.clear(); base64_encode(result, data); } void Toolbox::DecodeBase64(std::string& result, const std::string& data) { for (size_t i = 0; i < data.length(); i++) { if (!isalnum(data[i]) && data[i] != '+' && data[i] != '/' && data[i] != '=') { // This is not a valid character for a Base64 string throw OrthancException(ErrorCode_BadFileFormat); } } result.clear(); base64_decode(result, data); } bool Toolbox::DecodeDataUriScheme(std::string& mime, std::string& content, const std::string& source) { boost::regex pattern("data:([^;]+);base64,([a-zA-Z0-9=+/]*)", boost::regex::icase /* case insensitive search */); boost::cmatch what; if (regex_match(source.c_str(), what, pattern)) { mime = what[1]; DecodeBase64(content, what[2]); return true; } else { return false; } } void Toolbox::EncodeDataUriScheme(std::string& result, const std::string& mime, const std::string& content) { result = "data:" + mime + ";base64,"; base64_encode(result, content); } #endif #if ORTHANC_ENABLE_LOCALE == 1 static const char* GetBoostLocaleEncoding(const Encoding sourceEncoding) { switch (sourceEncoding) { case Encoding_Utf8: return "UTF-8"; case Encoding_Ascii: return "ASCII"; case Encoding_Latin1: return "ISO-8859-1"; case Encoding_Latin2: return "ISO-8859-2"; case Encoding_Latin3: return "ISO-8859-3"; case Encoding_Latin4: return "ISO-8859-4"; case Encoding_Latin5: return "ISO-8859-9"; case Encoding_Cyrillic: return "ISO-8859-5"; case Encoding_Windows1251: return "WINDOWS-1251"; case Encoding_Arabic: return "ISO-8859-6"; case Encoding_Greek: return "ISO-8859-7"; case Encoding_Hebrew: return "ISO-8859-8"; case Encoding_Japanese: return "SHIFT-JIS"; case Encoding_Chinese: return "GB18030"; case Encoding_Thai: #if BOOST_LOCALE_WITH_ICU == 1 return "tis620.2533"; #else return "TIS620.2533-0"; #endif case Encoding_Korean: return "ISO-IR-149"; case Encoding_JapaneseKanji: return "JIS"; case Encoding_SimplifiedChinese: return "GB2312"; default: throw OrthancException(ErrorCode_NotImplemented); } } #endif #if ORTHANC_ENABLE_LOCALE == 1 // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2 std::string Toolbox::ConvertToUtf8(const std::string& source, Encoding sourceEncoding, bool hasCodeExtensions) { #if ORTHANC_STATIC_ICU == 1 if (globalIcuData_.empty()) { throw OrthancException(ErrorCode_BadSequenceOfCalls, "Call Toolbox::InitializeGlobalLocale()"); } #endif // The "::skip" flag makes boost skip invalid UTF-8 // characters. This can occur in badly-encoded DICOM files. try { if (sourceEncoding == Encoding_Ascii) { return ConvertToAscii(source); } else { std::string s; if (sourceEncoding == Encoding_Utf8) { // Already in UTF-8: No conversion is required, but we ensure // the output is correctly encoded s = boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip); } else { const char* encoding = GetBoostLocaleEncoding(sourceEncoding); s = boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip); } if (hasCodeExtensions) { std::string t; RemoveIso2022EscapeSequences(t, s); return t; } else { return s; } } } catch (std::runtime_error& e) { // Bad input string or bad encoding LOG(INFO) << e.what(); return ConvertToAscii(source); } } #endif #if ORTHANC_ENABLE_LOCALE == 1 std::string Toolbox::ConvertFromUtf8(const std::string& source, Encoding targetEncoding) { #if ORTHANC_STATIC_ICU == 1 if (globalIcuData_.empty()) { throw OrthancException(ErrorCode_BadSequenceOfCalls, "Call Toolbox::InitializeGlobalLocale()"); } #endif // The "::skip" flag makes boost skip invalid UTF-8 // characters. This can occur in badly-encoded DICOM files. try { if (targetEncoding == Encoding_Utf8) { // Already in UTF-8: No conversion is required. return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip); } else if (targetEncoding == Encoding_Ascii) { return ConvertToAscii(source); } else { const char* encoding = GetBoostLocaleEncoding(targetEncoding); return boost::locale::conv::from_utf<char>(source, encoding, boost::locale::conv::skip); } } catch (std::runtime_error&) { // Bad input string or bad encoding return ConvertToAscii(source); } } #endif static bool IsAsciiCharacter(uint8_t c) { return (c != 0 && c <= 127 && (c == '\n' || !iscntrl(c))); } bool Toolbox::IsAsciiString(const void* data, size_t size) { const uint8_t* p = reinterpret_cast<const uint8_t*>(data); for (size_t i = 0; i < size; i++, p++) { if (!IsAsciiCharacter(*p)) { return false; } } return true; } bool Toolbox::IsAsciiString(const std::string& s) { return IsAsciiString(s.c_str(), s.size()); } std::string Toolbox::ConvertToAscii(const std::string& source) { std::string result; result.reserve(source.size() + 1); for (size_t i = 0; i < source.size(); i++) { if (IsAsciiCharacter(source[i])) { result.push_back(source[i]); } } return result; } void Toolbox::ComputeSHA1(std::string& result, const void* data, size_t size) { boost::uuids::detail::sha1 sha1; if (size > 0) { sha1.process_bytes(data, size); } unsigned int digest[5]; // Sanity check for the memory layout: A SHA-1 digest is 160 bits wide assert(sizeof(unsigned int) == 4 && sizeof(digest) == (160 / 8)); sha1.get_digest(digest); result.resize(8 * 5 + 4); sprintf(&result[0], "%08x-%08x-%08x-%08x-%08x", digest[0], digest[1], digest[2], digest[3], digest[4]); } void Toolbox::ComputeSHA1(std::string& result, const std::string& data) { if (data.size() > 0) { ComputeSHA1(result, data.c_str(), data.size()); } else { ComputeSHA1(result, NULL, 0); } } bool Toolbox::IsSHA1(const void* str, size_t size) { if (size == 0) { return false; } const char* start = reinterpret_cast<const char*>(str); const char* end = start + size; // Trim the beginning of the string while (start < end) { if (*start == '\0' || isspace(*start)) { start++; } else { break; } } // Trim the trailing of the string while (start < end) { if (*(end - 1) == '\0' || isspace(*(end - 1))) { end--; } else { break; } } if (end - start != 44) { return false; } for (unsigned int i = 0; i < 44; i++) { if (i == 8 || i == 17 || i == 26 || i == 35) { if (start[i] != '-') return false; } else { if (!isalnum(start[i])) return false; } } return true; } bool Toolbox::IsSHA1(const std::string& s) { if (s.size() == 0) { return false; } else { return IsSHA1(s.c_str(), s.size()); } } std::string Toolbox::StripSpaces(const std::string& source) { size_t first = 0; while (first < source.length() && isspace(source[first])) { first++; } if (first == source.length()) { // String containing only spaces return ""; } size_t last = source.length(); while (last > first && isspace(source[last - 1])) { last--; } assert(first <= last); return source.substr(first, last - first); } static char Hex2Dec(char c) { return ((c >= '0' && c <= '9') ? c - '0' : ((c >= 'a' && c <= 'f') ? c - 'a' + 10 : c - 'A' + 10)); } void Toolbox::UrlDecode(std::string& s) { // http://en.wikipedia.org/wiki/Percent-encoding // http://www.w3schools.com/tags/ref_urlencode.asp // http://stackoverflow.com/questions/154536/encode-decode-urls-in-c if (s.size() == 0) { return; } size_t source = 0; size_t target = 0; while (source < s.size()) { if (s[source] == '%' && source + 2 < s.size() && isalnum(s[source + 1]) && isalnum(s[source + 2])) { s[target] = (Hex2Dec(s[source + 1]) << 4) | Hex2Dec(s[source + 2]); source += 3; target += 1; } else { if (s[source] == '+') s[target] = ' '; else s[target] = s[source]; source++; target++; } } s.resize(target); } Endianness Toolbox::DetectEndianness() { // http://sourceforge.net/p/predef/wiki/Endianness/ uint32_t bufferView; uint8_t* buffer = reinterpret_cast<uint8_t*>(&bufferView); buffer[0] = 0x00; buffer[1] = 0x01; buffer[2] = 0x02; buffer[3] = 0x03; switch (bufferView) { case 0x00010203: return Endianness_Big; case 0x03020100: return Endianness_Little; default: throw OrthancException(ErrorCode_NotImplemented); } } std::string Toolbox::WildcardToRegularExpression(const std::string& source) { // TODO - Speed up this with a regular expression std::string result = source; // Escape all special characters boost::replace_all(result, "\\", "\\\\"); boost::replace_all(result, "^", "\\^"); boost::replace_all(result, ".", "\\."); boost::replace_all(result, "$", "\\$"); boost::replace_all(result, "|", "\\|"); boost::replace_all(result, "(", "\\("); boost::replace_all(result, ")", "\\)"); boost::replace_all(result, "[", "\\["); boost::replace_all(result, "]", "\\]"); boost::replace_all(result, "+", "\\+"); boost::replace_all(result, "/", "\\/"); boost::replace_all(result, "{", "\\{"); boost::replace_all(result, "}", "\\}"); // Convert wildcards '*' and '?' to their regex equivalents boost::replace_all(result, "?", "."); boost::replace_all(result, "*", ".*"); return result; } void Toolbox::TokenizeString(std::vector<std::string>& result, const std::string& value, char separator) { size_t countSeparators = 0; for (size_t i = 0; i < value.size(); i++) { if (value[i] == separator) { countSeparators++; } } result.clear(); result.reserve(countSeparators + 1); std::string currentItem; for (size_t i = 0; i < value.size(); i++) { if (value[i] == separator) { result.push_back(currentItem); currentItem.clear(); } else { currentItem.push_back(value[i]); } } result.push_back(currentItem); } #if ORTHANC_ENABLE_PUGIXML == 1 class ChunkedBufferWriter : public pugi::xml_writer { private: ChunkedBuffer buffer_; public: virtual void write(const void *data, size_t size) { if (size > 0) { buffer_.AddChunk(reinterpret_cast<const char*>(data), size); } } void Flatten(std::string& s) { buffer_.Flatten(s); } }; static void JsonToXmlInternal(pugi::xml_node& target, const Json::Value& source, const std::string& arrayElement) { // http://jsoncpp.sourceforge.net/value_8h_source.html#l00030 switch (source.type()) { case Json::nullValue: { target.append_child(pugi::node_pcdata).set_value("null"); break; } case Json::intValue: { std::string s = boost::lexical_cast<std::string>(source.asInt()); target.append_child(pugi::node_pcdata).set_value(s.c_str()); break; } case Json::uintValue: { std::string s = boost::lexical_cast<std::string>(source.asUInt()); target.append_child(pugi::node_pcdata).set_value(s.c_str()); break; } case Json::realValue: { std::string s = boost::lexical_cast<std::string>(source.asFloat()); target.append_child(pugi::node_pcdata).set_value(s.c_str()); break; } case Json::stringValue: { target.append_child(pugi::node_pcdata).set_value(source.asString().c_str()); break; } case Json::booleanValue: { target.append_child(pugi::node_pcdata).set_value(source.asBool() ? "true" : "false"); break; } case Json::arrayValue: { for (Json::Value::ArrayIndex i = 0; i < source.size(); i++) { pugi::xml_node node = target.append_child(); node.set_name(arrayElement.c_str()); JsonToXmlInternal(node, source[i], arrayElement); } break; } case Json::objectValue: { Json::Value::Members members = source.getMemberNames(); for (size_t i = 0; i < members.size(); i++) { pugi::xml_node node = target.append_child(); node.set_name(members[i].c_str()); JsonToXmlInternal(node, source[members[i]], arrayElement); } break; } default: throw OrthancException(ErrorCode_NotImplemented); } } void Toolbox::JsonToXml(std::string& target, const Json::Value& source, const std::string& rootElement, const std::string& arrayElement) { pugi::xml_document doc; pugi::xml_node n = doc.append_child(rootElement.c_str()); JsonToXmlInternal(n, source, arrayElement); pugi::xml_node decl = doc.prepend_child(pugi::node_declaration); decl.append_attribute("version").set_value("1.0"); decl.append_attribute("encoding").set_value("utf-8"); XmlToString(target, doc); } void Toolbox::XmlToString(std::string& target, const pugi::xml_document& source) { ChunkedBufferWriter writer; source.save(writer, " ", pugi::format_default, pugi::encoding_utf8); writer.Flatten(target); } #endif bool Toolbox::IsInteger(const std::string& str) { std::string s = StripSpaces(str); if (s.size() == 0) { return false; } size_t pos = 0; if (s[0] == '-') { if (s.size() == 1) { return false; } pos = 1; } while (pos < s.size()) { if (!isdigit(s[pos])) { return false; } pos++; } return true; } void Toolbox::CopyJsonWithoutComments(Json::Value& target, const Json::Value& source) { switch (source.type()) { case Json::nullValue: target = Json::nullValue; break; case Json::intValue: target = source.asInt64(); break; case Json::uintValue: target = source.asUInt64(); break; case Json::realValue: target = source.asDouble(); break; case Json::stringValue: target = source.asString(); break; case Json::booleanValue: target = source.asBool(); break; case Json::arrayValue: { target = Json::arrayValue; for (Json::Value::ArrayIndex i = 0; i < source.size(); i++) { Json::Value& item = target.append(Json::nullValue); CopyJsonWithoutComments(item, source[i]); } break; } case Json::objectValue: { target = Json::objectValue; Json::Value::Members members = source.getMemberNames(); for (Json::Value::ArrayIndex i = 0; i < members.size(); i++) { const std::string item = members[i]; CopyJsonWithoutComments(target[item], source[item]); } break; } default: break; } } bool Toolbox::StartsWith(const std::string& str, const std::string& prefix) { if (str.size() < prefix.size()) { return false; } else { return str.compare(0, prefix.size(), prefix) == 0; } } static bool IsUnreservedCharacter(char c) { // This function checks whether "c" is an unserved character // wrt. an URI percent-encoding // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding%5Fin%5Fa%5FURI return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.' || c == '~'); } void Toolbox::UriEncode(std::string& target, const std::string& source) { // Estimate the length of the percent-encoded URI size_t length = 0; for (size_t i = 0; i < source.size(); i++) { if (IsUnreservedCharacter(source[i])) { length += 1; } else { // This character must be percent-encoded length += 3; } } target.clear(); target.reserve(length); for (size_t i = 0; i < source.size(); i++) { if (IsUnreservedCharacter(source[i])) { target.push_back(source[i]); } else { // This character must be percent-encoded uint8_t byte = static_cast<uint8_t>(source[i]); uint8_t a = byte >> 4; uint8_t b = byte & 0x0f; target.push_back('%'); target.push_back(a < 10 ? a + '0' : a - 10 + 'A'); target.push_back(b < 10 ? b + '0' : b - 10 + 'A'); } } } static bool HasField(const Json::Value& json, const std::string& key, Json::ValueType expectedType) { if (json.type() != Json::objectValue || !json.isMember(key)) { return false; } else if (json[key].type() == expectedType) { return true; } else { throw OrthancException(ErrorCode_BadParameterType); } } std::string Toolbox::GetJsonStringField(const Json::Value& json, const std::string& key, const std::string& defaultValue) { if (HasField(json, key, Json::stringValue)) { return json[key].asString(); } else { return defaultValue; } } bool Toolbox::GetJsonBooleanField(const ::Json::Value& json, const std::string& key, bool defaultValue) { if (HasField(json, key, Json::booleanValue)) { return json[key].asBool(); } else { return defaultValue; } } int Toolbox::GetJsonIntegerField(const ::Json::Value& json, const std::string& key, int defaultValue) { if (HasField(json, key, Json::intValue)) { return json[key].asInt(); } else { return defaultValue; } } unsigned int Toolbox::GetJsonUnsignedIntegerField(const ::Json::Value& json, const std::string& key, unsigned int defaultValue) { int v = GetJsonIntegerField(json, key, defaultValue); if (v < 0) { throw OrthancException(ErrorCode_ParameterOutOfRange); } else { return static_cast<unsigned int>(v); } } bool Toolbox::IsUuid(const std::string& str) { if (str.size() != 36) { return false; } for (size_t i = 0; i < str.length(); i++) { if (i == 8 || i == 13 || i == 18 || i == 23) { if (str[i] != '-') return false; } else { if (!isalnum(str[i])) return false; } } return true; } bool Toolbox::StartsWithUuid(const std::string& str) { if (str.size() < 36) { return false; } if (str.size() == 36) { return IsUuid(str); } assert(str.size() > 36); if (!isspace(str[36])) { return false; } return IsUuid(str.substr(0, 36)); } #if ORTHANC_ENABLE_LOCALE == 1 static std::unique_ptr<std::locale> globalLocale_; static bool SetGlobalLocale(const char* locale) { try { if (locale == NULL) { LOG(WARNING) << "Falling back to system-wide default locale"; globalLocale_.reset(new std::locale()); } else { LOG(INFO) << "Using locale: \"" << locale << "\" for case-insensitive comparison of strings"; globalLocale_.reset(new std::locale(locale)); } } catch (std::runtime_error& e) { LOG(ERROR) << "Cannot set globale locale to " << (locale ? std::string(locale) : "(null)") << ": " << e.what(); globalLocale_.reset(NULL); } return (globalLocale_.get() != NULL); } static void InitializeIcu() { #if ORTHANC_STATIC_ICU == 1 if (globalIcuData_.empty()) { LOG(INFO) << "Setting up the ICU common data"; GzipCompressor compressor; compressor.Uncompress(globalIcuData_, FrameworkResources::GetFileResourceBuffer(FrameworkResources::LIBICU_DATA), FrameworkResources::GetFileResourceSize(FrameworkResources::LIBICU_DATA)); std::string md5; Toolbox::ComputeMD5(md5, globalIcuData_); if (md5 != ORTHANC_ICU_DATA_MD5 || globalIcuData_.empty()) { throw OrthancException(ErrorCode_InternalError, "Cannot decode the ICU common data"); } // "ICU data is designed to be 16-aligned" // http://userguide.icu-project.org/icudata#TOC-Alignment { static const size_t ALIGN = 16; UErrorCode status = U_ZERO_ERROR; if (reinterpret_cast<intptr_t>(globalIcuData_.c_str()) % ALIGN == 0) { // Data is already properly aligned udata_setCommonData(globalIcuData_.c_str(), &status); } else { std::string aligned; aligned.resize(globalIcuData_.size() + ALIGN - 1); intptr_t offset = reinterpret_cast<intptr_t>(aligned.c_str()) % ALIGN; if (offset != 0) { offset = ALIGN - offset; } if (offset + globalIcuData_.size() > aligned.size()) { throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary"); } // We don't use "memcpy()", as it expects its data to be aligned const uint8_t* p = reinterpret_cast<uint8_t*>(&globalIcuData_[0]); uint8_t* q = reinterpret_cast<uint8_t*>(&aligned[0]) + offset; for (size_t i = 0; i < globalIcuData_.size(); i++, p++, q++) { *q = *p; } globalIcuData_.swap(aligned); const uint8_t* data = reinterpret_cast<const uint8_t*>(globalIcuData_.c_str()) + offset; if (reinterpret_cast<intptr_t>(data) % ALIGN != 0) { throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary"); } else { udata_setCommonData(data, &status); } } if (status != U_ZERO_ERROR) { throw OrthancException(ErrorCode_InternalError, "Cannot initialize ICU"); } } if (Toolbox::DetectEndianness() != Endianness_Little) { // TODO - The data table must be swapped (uint16_t) throw OrthancException(ErrorCode_NotImplemented); } // "First-use of ICU from a single thread before the // multi-threaded use of ICU begins", to make sure everything is // properly initialized (should not be mandatory in our // case). We let boost handle calls to "u_init()" and "u_cleanup()". // http://userguide.icu-project.org/design#TOC-ICU-Initialization-and-Termination uloc_getDefault(); } #endif } void Toolbox::InitializeGlobalLocale(const char* locale) { InitializeIcu(); #if defined(__unix__) && ORTHANC_SANDBOXED != 1 static const char* LOCALTIME = "/etc/localtime"; if (!SystemToolbox::IsExistingFile(LOCALTIME)) { // Check out file // "boost_1_69_0/libs/locale/src/icu/time_zone.cpp": Direct // access is made to this file if ICU is not used. Crash arises // in Boost if the file is a symbolic link to a non-existing // file (such as in Ubuntu 16.04 base Docker image). throw OrthancException( ErrorCode_InternalError, "On UNIX-like systems, the file " + std::string(LOCALTIME) + " must be present on the filesystem (install \"tzdata\" package on Debian)"); } #endif // Make Orthanc use English, United States locale // Linux: use "en_US.UTF-8" // Windows: use "" // Wine: use NULL #if defined(__MINGW32__) // Visibly, there is no support of locales in MinGW yet // http://mingw.5.n7.nabble.com/How-to-use-std-locale-global-with-MinGW-correct-td33048.html static const char* DEFAULT_LOCALE = NULL; #elif defined(_WIN32) // For Windows: use default locale (using "en_US" does not work) static const char* DEFAULT_LOCALE = ""; #else // For Linux & cie static const char* DEFAULT_LOCALE = "en_US.UTF-8"; #endif bool ok; if (locale == NULL) { ok = SetGlobalLocale(DEFAULT_LOCALE); #if defined(__MINGW32__) LOG(WARNING) << "This is a MinGW build, case-insensitive comparison of " << "strings with accents will not work outside of Wine"; #endif } else { ok = SetGlobalLocale(locale); } if (!ok && !SetGlobalLocale(NULL)) { throw OrthancException(ErrorCode_InternalError, "Cannot initialize global locale"); } } void Toolbox::FinalizeGlobalLocale() { globalLocale_.reset(); } std::string Toolbox::ToUpperCaseWithAccents(const std::string& source) { bool error = (globalLocale_.get() == NULL); #if ORTHANC_STATIC_ICU == 1 if (globalIcuData_.empty()) { error = true; } #endif if (error) { throw OrthancException(ErrorCode_BadSequenceOfCalls, "No global locale was set, call Toolbox::InitializeGlobalLocale()"); } /** * A few notes about locales: * * (1) We don't use "case folding": * http://www.boost.org/doc/libs/1_64_0/libs/locale/doc/html/conversions.html * * Characters are made uppercase one by one. This is because, in * static builds, we are using iconv, which is visibly not * supported correctly (TODO: Understand why). Case folding seems * to be working correctly if using the default backend under * Linux (ICU or POSIX?). If one wishes to use case folding, one * would use: * * boost::locale::generator gen; * std::locale::global(gen(DEFAULT_LOCALE)); * return boost::locale::to_upper(source); * * (2) The function "boost::algorithm::to_upper_copy" does not * make use of the "std::locale::global()". We therefore create a * global variable "globalLocale_". * * (3) The variant of "boost::algorithm::to_upper_copy()" that * uses std::string does not work properly. We need to apply it * one wide strings (std::wstring). This explains the two calls to * "utf_to_utf" in order to convert to/from std::wstring. **/ std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip); w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_); return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip); } #endif #if ORTHANC_ENABLE_SSL == 0 /** * OpenSSL is disabled **/ void Toolbox::InitializeOpenSsl() { } void Toolbox::FinalizeOpenSsl() { } #elif (ORTHANC_ENABLE_SSL == 1 && \ OPENSSL_VERSION_NUMBER < 0x10100000L) /** * OpenSSL < 1.1.0 **/ void Toolbox::InitializeOpenSsl() { // https://wiki.openssl.org/index.php/Library_Initialization SSL_library_init(); SSL_load_error_strings(); OpenSSL_add_all_algorithms(); ERR_load_crypto_strings(); } void Toolbox::FinalizeOpenSsl() { // Finalize OpenSSL // https://wiki.openssl.org/index.php/Library_Initialization#Cleanup #ifdef FIPS_mode_set FIPS_mode_set(0); #endif #if !defined(OPENSSL_NO_ENGINE) ENGINE_cleanup(); #endif CONF_modules_unload(1); EVP_cleanup(); CRYPTO_cleanup_all_ex_data(); ERR_remove_state(0); ERR_free_strings(); } #elif (ORTHANC_ENABLE_SSL == 1 && \ OPENSSL_VERSION_NUMBER >= 0x10100000L) /** * OpenSSL >= 1.1.0. In this case, the initialization is * automatically done by the functions of OpenSSL. * https://wiki.openssl.org/index.php/Library_Initialization **/ void Toolbox::InitializeOpenSsl() { } void Toolbox::FinalizeOpenSsl() { } #else # error "Support your platform here" #endif std::string Toolbox::GenerateUuid() { #ifdef WIN32 UUID uuid; UuidCreate ( &uuid ); unsigned char * str; UuidToStringA ( &uuid, &str ); std::string s( ( char* ) str ); RpcStringFreeA ( &str ); #else uuid_t uuid; uuid_generate_random ( uuid ); char s[37]; uuid_unparse ( uuid, s ); #endif return s; } namespace { // Anonymous namespace to avoid clashes between compilation modules class VariableFormatter { public: typedef std::map<std::string, std::string> Dictionary; private: const Dictionary& dictionary_; public: VariableFormatter(const Dictionary& dictionary) : dictionary_(dictionary) { } template<typename Out> Out operator()(const boost::smatch& what, Out out) const { if (!what[1].str().empty()) { // Variable without a default value Dictionary::const_iterator found = dictionary_.find(what[1]); if (found != dictionary_.end()) { const std::string& value = found->second; out = std::copy(value.begin(), value.end(), out); } } else { // Variable with a default value std::string key; std::string defaultValue; if (!what[2].str().empty()) { key = what[2].str(); defaultValue = what[3].str(); } else if (!what[4].str().empty()) { key = what[4].str(); defaultValue = what[5].str(); } else if (!what[6].str().empty()) { key = what[6].str(); defaultValue = what[7].str(); } else { throw OrthancException(ErrorCode_InternalError); } Dictionary::const_iterator found = dictionary_.find(key); if (found == dictionary_.end()) { out = std::copy(defaultValue.begin(), defaultValue.end(), out); } else { const std::string& value = found->second; out = std::copy(value.begin(), value.end(), out); } } return out; } }; } std::string Toolbox::SubstituteVariables(const std::string& source, const std::map<std::string, std::string>& dictionary) { const boost::regex pattern("\\$\\{([^:]*?)\\}|" // ${what[1]} "\\$\\{([^:]*?):-([^'\"]*?)\\}|" // ${what[2]:-what[3]} "\\$\\{([^:]*?):-\"([^\"]*?)\"\\}|" // ${what[4]:-"what[5]"} "\\$\\{([^:]*?):-'([^']*?)'\\}"); // ${what[6]:-'what[7]'} VariableFormatter formatter(dictionary); return boost::regex_replace(source, pattern, formatter); } namespace Iso2022 { /** Returns whether the string s contains a single-byte control message at index i **/ static inline bool IsControlMessage1(const std::string& s, size_t i) { if (i < s.size()) { char c = s[i]; return (c == '\x0f') || // Locking shift zero (c == '\x0e'); // Locking shift one } else { return false; } } /** Returns whether the string s contains a double-byte control message at index i **/ static inline size_t IsControlMessage2(const std::string& s, size_t i) { if (i + 1 < s.size()) { char c1 = s[i]; char c2 = s[i + 1]; return (c1 == 0x1b) && ( (c2 == '\x6e') || // Locking shift two (c2 == '\x6f') || // Locking shift three (c2 == '\x4e') || // Single shift two (alt) (c2 == '\x4f') || // Single shift three (alt) (c2 == '\x7c') || // Locking shift three right (c2 == '\x7d') || // Locking shift two right (c2 == '\x7e') // Locking shift one right ); } else { return false; } } /** Returns whether the string s contains a triple-byte control message at index i **/ static inline size_t IsControlMessage3(const std::string& s, size_t i) { if (i + 2 < s.size()) { char c1 = s[i]; char c2 = s[i + 1]; char c3 = s[i + 2]; return ((c1 == '\x8e' && c2 == 0x1b && c3 == '\x4e') || (c1 == '\x8f' && c2 == 0x1b && c3 == '\x4f')); } else { return false; } } /** This function returns true if the index i in the supplied string s: - is valid - contains the c character This function returns false otherwise. **/ static inline bool TestCharValue( const std::string& s, size_t i, char c) { if (i < s.size()) return s[i] == c; else return false; } /** This function returns true if the index i in the supplied string s: - is valid - has a c character that is >= cMin and <= cMax (included) This function returns false otherwise. **/ static inline bool TestCharRange( const std::string& s, size_t i, char cMin, char cMax) { if (i < s.size()) return (s[i] >= cMin) && (s[i] <= cMax); else return false; } /** This function returns the total length in bytes of the escape sequence located in string s at index i, if there is one, or 0 otherwise. **/ static inline size_t GetEscapeSequenceLength(const std::string& s, size_t i) { if (TestCharValue(s, i, 0x1b)) { size_t j = i+1; // advance reading cursor while we are in a sequence while (TestCharRange(s, j, '\x20', '\x2f')) ++j; // check there is a valid termination byte AND we're long enough (there // must be at least one byte between 0x20 and 0x2f if (TestCharRange(s, j, '\x30', '\x7f') && (j - i) >= 2) return j - i + 1; else return 0; } else return 0; } } /** This function will strip all ISO/IEC 2022 control codes and escape sequences. Please see https://en.wikipedia.org/wiki/ISO/IEC_2022 (as of 2019-02) for a list of those. Please note that this operation is potentially destructive, because it removes the character set information from the byte stream. However, in the case where the encoding is unique, then suppressing the escape sequences allows one to provide us with a clean string after conversion to utf-8 with boost. **/ void Toolbox::RemoveIso2022EscapeSequences(std::string& dest, const std::string& src) { // we need AT MOST the same size as the source string in the output dest.clear(); if (dest.capacity() < src.size()) dest.reserve(src.size()); size_t i = 0; // uint8_t view to the string while (i < src.size()) { size_t j = i; // The i index will only be incremented if a message is detected // in that case, the message is skipped and the index is set to the // next position to read if (Iso2022::IsControlMessage1(src, i)) i += 1; else if (Iso2022::IsControlMessage2(src, i)) i += 2; else if (Iso2022::IsControlMessage3(src, i)) i += 3; else i += Iso2022::GetEscapeSequenceLength(src, i); // if the index was NOT incremented, this means there was no message at // this location: we then may copy the character at this index and // increment the index to point to the next read position if (j == i) { dest.push_back(src[i]); i++; } } } void Toolbox::Utf8ToUnicodeCharacter(uint32_t& unicode, size_t& length, const std::string& utf8, size_t position) { // https://en.wikipedia.org/wiki/UTF-8 static const uint8_t MASK_IS_1_BYTE = 0x80; // printf '0x%x\n' "$((2#10000000))" static const uint8_t TEST_IS_1_BYTE = 0x00; static const uint8_t MASK_IS_2_BYTES = 0xe0; // printf '0x%x\n' "$((2#11100000))" static const uint8_t TEST_IS_2_BYTES = 0xc0; // printf '0x%x\n' "$((2#11000000))" static const uint8_t MASK_IS_3_BYTES = 0xf0; // printf '0x%x\n' "$((2#11110000))" static const uint8_t TEST_IS_3_BYTES = 0xe0; // printf '0x%x\n' "$((2#11100000))" static const uint8_t MASK_IS_4_BYTES = 0xf8; // printf '0x%x\n' "$((2#11111000))" static const uint8_t TEST_IS_4_BYTES = 0xf0; // printf '0x%x\n' "$((2#11110000))" static const uint8_t MASK_CONTINUATION = 0xc0; // printf '0x%x\n' "$((2#11000000))" static const uint8_t TEST_CONTINUATION = 0x80; // printf '0x%x\n' "$((2#10000000))" if (position >= utf8.size()) { throw OrthancException(ErrorCode_ParameterOutOfRange); } assert(sizeof(uint8_t) == sizeof(char)); const uint8_t* buffer = reinterpret_cast<const uint8_t*>(utf8.c_str()) + position; if ((buffer[0] & MASK_IS_1_BYTE) == TEST_IS_1_BYTE) { length = 1; unicode = buffer[0] & ~MASK_IS_1_BYTE; } else if ((buffer[0] & MASK_IS_2_BYTES) == TEST_IS_2_BYTES && position + 1 < utf8.size() && (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION) { length = 2; uint32_t a = buffer[0] & ~MASK_IS_2_BYTES; uint32_t b = buffer[1] & ~MASK_CONTINUATION; unicode = (a << 6) | b; } else if ((buffer[0] & MASK_IS_3_BYTES) == TEST_IS_3_BYTES && position + 2 < utf8.size() && (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION && (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION) { length = 3; uint32_t a = buffer[0] & ~MASK_IS_3_BYTES; uint32_t b = buffer[1] & ~MASK_CONTINUATION; uint32_t c = buffer[2] & ~MASK_CONTINUATION; unicode = (a << 12) | (b << 6) | c; } else if ((buffer[0] & MASK_IS_4_BYTES) == TEST_IS_4_BYTES && position + 3 < utf8.size() && (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION && (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION && (buffer[3] & MASK_CONTINUATION) == TEST_CONTINUATION) { length = 4; uint32_t a = buffer[0] & ~MASK_IS_4_BYTES; uint32_t b = buffer[1] & ~MASK_CONTINUATION; uint32_t c = buffer[2] & ~MASK_CONTINUATION; uint32_t d = buffer[3] & ~MASK_CONTINUATION; unicode = (a << 18) | (b << 12) | (c << 6) | d; } else { // This is not a valid UTF-8 encoding throw OrthancException(ErrorCode_BadFileFormat, "Invalid UTF-8 string"); } } std::string Toolbox::LargeHexadecimalToDecimal(const std::string& hex) { /** * NB: Focus of the code below is *not* efficiency, but * readability! **/ for (size_t i = 0; i < hex.size(); i++) { const char c = hex[i]; if (!((c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') || (c >= '0' && c <= '9'))) { throw Orthanc::OrthancException(Orthanc::ErrorCode_ParameterOutOfRange, "Not an hexadecimal number"); } } std::vector<uint8_t> decimal; decimal.push_back(0); for (size_t i = 0; i < hex.size(); i++) { uint8_t hexDigit = static_cast<uint8_t>(Hex2Dec(hex[i])); assert(hexDigit <= 15); for (size_t j = 0; j < decimal.size(); j++) { uint8_t val = static_cast<uint8_t>(decimal[j]) * 16 + hexDigit; // Maximum: 9 * 16 + 15 assert(val <= 159 /* == 9 * 16 + 15 */); decimal[j] = val % 10; hexDigit = val / 10; assert(hexDigit <= 15 /* == 159 / 10 */); } while (hexDigit > 0) { decimal.push_back(hexDigit % 10); hexDigit /= 10; } } size_t start = 0; while (start < decimal.size() && decimal[start] == '0') { start++; } std::string s; s.reserve(decimal.size() - start); for (size_t i = decimal.size(); i > start; i--) { s.push_back(decimal[i - 1] + '0'); } return s; } std::string Toolbox::GenerateDicomPrivateUniqueIdentifier() { /** * REFERENCE: "Creating a Privately Defined Unique Identifier * (Informative)" / "UUID Derived UID" * http://dicom.nema.org/medical/dicom/2019a/output/chtml/part05/sect_B.2.html * https://stackoverflow.com/a/46316162/881731 **/ std::string uuid = GenerateUuid(); assert(IsUuid(uuid) && uuid.size() == 36); /** * After removing the four dashes ("-") out of the 36-character * UUID, we get a large hexadecimal number with 32 characters, * each of those characters lying in the range [0,16[. The large * number is thus in the [0,16^32[ = [0,256^16[ range. This number * has a maximum of 39 decimal digits, as can be seen in Python: * * # python -c 'import math; print(math.log(16**32))/math.log(10))' * 38.531839445 * * We now to convert the large hexadecimal number to a decimal * number with up to 39 digits, remove the leading zeros, then * prefix it with "2.25." **/ // Remove the dashes std::string hex = (uuid.substr(0, 8) + uuid.substr(9, 4) + uuid.substr(14, 4) + uuid.substr(19, 4) + uuid.substr(24, 12)); assert(hex.size() == 32); return "2.25." + LargeHexadecimalToDecimal(hex); } void Toolbox::SimplifyDicomAsJson(Json::Value& target, const Json::Value& source, DicomToJsonFormat format) { if (!source.isObject()) { throw Orthanc::OrthancException(Orthanc::ErrorCode_BadFileFormat); } target = Json::objectValue; Json::Value::Members members = source.getMemberNames(); for (size_t i = 0; i < members.size(); i++) { const Json::Value& v = source[members[i]]; const std::string& type = v["Type"].asString(); std::string name; switch (format) { case DicomToJsonFormat_Human: name = v["Name"].asString(); break; case DicomToJsonFormat_Short: name = members[i]; break; default: throw OrthancException(ErrorCode_ParameterOutOfRange); } if (type == "String") { target[name] = v["Value"].asString(); } else if (type == "TooLong" || type == "Null") { target[name] = Json::nullValue; } else if (type == "Sequence") { const Json::Value& array = v["Value"]; assert(array.isArray()); Json::Value children = Json::arrayValue; for (Json::Value::ArrayIndex i = 0; i < array.size(); i++) { Json::Value c; SimplifyDicomAsJson(c, array[i], format); children.append(c); } target[name] = children; } else { assert(0); } } } } OrthancLinesIterator* OrthancLinesIterator_Create(const std::string& content) { return reinterpret_cast<OrthancLinesIterator*>(new Orthanc::Toolbox::LinesIterator(content)); } bool OrthancLinesIterator_GetLine(std::string& target, const OrthancLinesIterator* iterator) { if (iterator != NULL) { return reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator)->GetLine(target); } else { return false; } } void OrthancLinesIterator_Next(OrthancLinesIterator* iterator) { if (iterator != NULL) { reinterpret_cast<Orthanc::Toolbox::LinesIterator*>(iterator)->Next(); } } void OrthancLinesIterator_Free(OrthancLinesIterator* iterator) { if (iterator != NULL) { delete reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator); } }