view OrthancFramework/Sources/Toolbox.cpp @ 5873:c8788f8f5322 default tip

todo
author Alain Mazy <am@orthanc.team>
date Mon, 18 Nov 2024 15:16:16 +0100
parents 122fd5f97d39
children
line wrap: on
line source

/**
 * Orthanc - A Lightweight, RESTful DICOM Store
 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
 * Department, University Hospital of Liege, Belgium
 * Copyright (C) 2017-2023 Osimis S.A., Belgium
 * Copyright (C) 2024-2024 Orthanc Team SRL, Belgium
 * Copyright (C) 2021-2024 Sebastien Jodogne, ICTEAM UCLouvain, Belgium
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program. If not, see
 * <http://www.gnu.org/licenses/>.
 **/


#include "PrecompiledHeaders.h"
#include "Toolbox.h"

#include "Compatibility.h"
#include "OrthancException.h"
#include "Logging.h"

#include <json/reader.h>
#include <json/version.h>
#include <json/writer.h>

#if !defined(JSONCPP_VERSION_MAJOR) || !defined(JSONCPP_VERSION_MINOR)
#  error Cannot access the version of JsonCpp
#endif

#if !defined(ORTHANC_ENABLE_ICU)
#  define ORTHANC_ENABLE_ICU 1
#endif


/**
 * We use deprecated "Json::Reader", "Json::StyledWriter" and
 * "Json::FastWriter" if JsonCpp < 1.7.0. This choice is rather
 * arbitrary, but if Json >= 1.9.0, gcc generates explicit deprecation
 * warnings (clang was warning in earlier versions). For reference,
 * these classes seem to have been deprecated since JsonCpp 1.4.0 (on
 * February 2015) by the following changeset:
 * https://github.com/open-source-parsers/jsoncpp/commit/8df98f6112890d6272734975dd6d70cf8999bb22
 **/
#if (JSONCPP_VERSION_MAJOR >= 2 ||                                      \
     (JSONCPP_VERSION_MAJOR == 1 && JSONCPP_VERSION_MINOR >= 8))
#  define JSONCPP_USE_DEPRECATED 0
#else
#  define JSONCPP_USE_DEPRECATED 1
#endif


#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/join.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/regex.hpp>

#if BOOST_VERSION >= 106600
#  include <boost/uuid/detail/sha1.hpp>
#else
#  include <boost/uuid/sha1.hpp>
#endif
 
#include <string>
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <ctype.h>


#if ORTHANC_ENABLE_MD5 == 1
// TODO - Could be replaced by <boost/uuid/detail/md5.hpp> starting
// with Boost >= 1.66.0
#  include "../Resources/ThirdParty/md5/md5.h"
#endif

#if ORTHANC_ENABLE_BASE64 == 1
#  include "../Resources/ThirdParty/base64/base64.h"
#endif

#if ORTHANC_ENABLE_LOCALE == 1
#  include <boost/locale.hpp>
#endif

#if ORTHANC_ENABLE_SSL == 1
// For OpenSSL initialization and finalization
#  include <openssl/conf.h>
#  include <openssl/engine.h>
#  include <openssl/err.h>
#  include <openssl/evp.h>
#  include <openssl/ssl.h>

#  if OPENSSL_VERSION_NUMBER < 0x30000000L
#    if defined(_MSC_VER)
#      pragma message("You are linking Orthanc against OpenSSL 1.x, whose license is incompatible with the GPLv3+ used by Orthanc >= 1.10.0. Please update to OpenSSL 3.x, that uses the Apache 2 license.")
#    else
#      warning You are linking Orthanc against OpenSSL 1.x, whose license is incompatible with the GPLv3+ used by Orthanc >= 1.10.0. Please update to OpenSSL 3.x, that uses the Apache 2 license.
#    endif
#  endif

#endif


#if defined(_MSC_VER) && (_MSC_VER < 1800)
// Patch for the missing "_strtoll" symbol when compiling with Visual Studio < 2013
extern "C"
{
  int64_t _strtoi64(const char *nptr, char **endptr, int base);
  int64_t strtoll(const char *nptr, char **endptr, int base)
  {
    return _strtoi64(nptr, endptr, base);
  } 
}
#endif


#if defined(_WIN32)
#  include <windows.h>   // For ::Sleep
#endif


#if ORTHANC_ENABLE_PUGIXML == 1
#  include "ChunkedBuffer.h"
#endif


// Inclusions for UUID
// http://stackoverflow.com/a/1626302

extern "C"
{
#if defined(_WIN32)
#  include <rpc.h>
#else
#  include <uuid/uuid.h>
#endif
}


#if defined(ORTHANC_STATIC_ICU)

#  if (ORTHANC_STATIC_ICU == 1) && (ORTHANC_ENABLE_ICU == 1)
#    if !defined(ORTHANC_FRAMEWORK_INCLUDE_RESOURCES) || (ORTHANC_FRAMEWORK_INCLUDE_RESOURCES == 1)
#      include <OrthancFrameworkResources.h>
#    endif
#  endif

#  if (ORTHANC_STATIC_ICU == 1 && ORTHANC_ENABLE_LOCALE == 1)
#    include <unicode/udata.h>
#    include <unicode/uloc.h>
#    include "Compression/GzipCompressor.h"

static std::string  globalIcuData_;

extern "C"
{
  // This is dummy content for the "icudt58_dat" (resp. "icudt63_dat")
  // global variable from the autogenerated "icudt58l_dat.c"
  // (resp. "icudt63l_dat.c") file that contains a huge C array. In
  // Orthanc, this array is compressed using gzip and attached as a
  // resource, then uncompressed during the launch of Orthanc by
  // static function "InitializeIcu()".
  struct
  {
    double bogus;
    uint8_t *bytes;
  } U_ICUDATA_ENTRY_POINT = { 0.0, NULL };
}

#    if defined(__LSB_VERSION__)
extern "C"
{
  /**
   * The "tzname" global variable is declared as "extern" but is not
   * defined in any compilation module, if using Linux Standard Base,
   * as soon as OpenSSL or cURL is in use on Ubuntu >= 18.04 (glibc >=
   * 2.27). The variable "__tzname" is always properly declared *and*
   * defined. The reason is unclear, and is maybe a bug in the gcc 4.8
   * linker that is used by LSB if facing a weak symbol (as "tzname").
   * This makes Orthanc crash if the timezone is set to UTC.
   * https://groups.google.com/d/msg/orthanc-users/0m8sxxwSm1E/2p8du_89CAAJ
   **/
  char *tzname[2] = { (char *) "GMT", (char *) "GMT" };
}
#    endif

#  endif
#endif
 


#if defined(__unix__) && ORTHANC_SANDBOXED != 1
#  include "SystemToolbox.h"  // Check out "InitializeGlobalLocale()"
#endif



namespace Orthanc
{
  void Toolbox::LinesIterator::FindEndOfLine()
  {
    lineEnd_ = lineStart_;

    while (lineEnd_ < content_.size() &&
           content_[lineEnd_] != '\n' &&
           content_[lineEnd_] != '\r')
    {
      lineEnd_ += 1;
    }
  }
  

  Toolbox::LinesIterator::LinesIterator(const std::string& content) :
    content_(content),
    lineStart_(0)
  {
    FindEndOfLine();
  }

    
  bool Toolbox::LinesIterator::GetLine(std::string& target) const
  {
    assert(lineStart_ <= content_.size() &&
           lineEnd_ <= content_.size() &&
           lineStart_ <= lineEnd_);

    if (lineStart_ == content_.size())
    {
      return false;
    }
    else
    {
      target = content_.substr(lineStart_, lineEnd_ - lineStart_);
      return true;
    }
  }

    
  void Toolbox::LinesIterator::Next()
  {
    lineStart_ = lineEnd_;

    if (lineStart_ != content_.size())
    {
      assert(content_[lineStart_] == '\r' ||
             content_[lineStart_] == '\n');

      char second;
      
      if (content_[lineStart_] == '\r')
      {
        second = '\n';
      }
      else
      {
        second = '\r';
      }
        
      lineStart_ += 1;

      if (lineStart_ < content_.size() &&
          content_[lineStart_] == second)
      {
        lineStart_ += 1;
      }

      FindEndOfLine();
    }
  }

  
  void Toolbox::ToUpperCase(std::string& s)
  {
    std::transform(s.begin(), s.end(), s.begin(), toupper);
  }


  void Toolbox::ToLowerCase(std::string& s)
  {
    std::transform(s.begin(), s.end(), s.begin(), tolower);
  }


  void Toolbox::ToUpperCase(std::string& result,
                            const std::string& source)
  {
    result = source;
    ToUpperCase(result);
  }

  void Toolbox::ToLowerCase(std::string& result,
                            const std::string& source)
  {
    result = source;
    ToLowerCase(result);
  }


  void Toolbox::SplitUriComponents(UriComponents& components,
                                   const std::string& uri)
  {
    static const char URI_SEPARATOR = '/';

    components.clear();

    if (uri.size() == 0 ||
        uri[0] != URI_SEPARATOR)
    {
      throw OrthancException(ErrorCode_UriSyntax);
    }

    // Count the number of slashes in the URI to make an assumption
    // about the number of components in the URI
    unsigned int estimatedSize = 0;
    for (unsigned int i = 0; i < uri.size(); i++)
    {
      if (uri[i] == URI_SEPARATOR)
        estimatedSize++;
    }

    components.reserve(estimatedSize - 1);

    unsigned int start = 1;
    unsigned int end = 1;
    while (end < uri.size())
    {
      // This is the loop invariant
      assert(uri[start - 1] == '/' && (end >= start));

      if (uri[end] == '/')
      {
        components.push_back(std::string(&uri[start], end - start));
        end++;
        start = end;
      }
      else
      {
        end++;
      }
    }

    if (start < uri.size())
    {
      components.push_back(std::string(&uri[start], end - start));
    }

    for (size_t i = 0; i < components.size(); i++)
    {
      if (components[i].size() == 0)
      {
        // Empty component, as in: "/coucou//e"
        throw OrthancException(ErrorCode_UriSyntax);
      }
    }
  }


  void Toolbox::TruncateUri(UriComponents& target,
                            const UriComponents& source,
                            size_t fromLevel)
  {
    target.clear();

    if (source.size() > fromLevel)
    {
      target.resize(source.size() - fromLevel);

      size_t j = 0;
      for (size_t i = fromLevel; i < source.size(); i++, j++)
      {
        target[j] = source[i];
      }

      assert(j == target.size());
    }
  }
  


  bool Toolbox::IsChildUri(const UriComponents& baseUri,
                           const UriComponents& testedUri)
  {
    if (testedUri.size() < baseUri.size())
    {
      return false;
    }

    for (size_t i = 0; i < baseUri.size(); i++)
    {
      if (baseUri[i] != testedUri[i])
        return false;
    }

    return true;
  }


  std::string Toolbox::FlattenUri(const UriComponents& components,
                                  size_t fromLevel)
  {
    if (components.size() <= fromLevel)
    {
      return "/";
    }
    else
    {
      std::string r;

      for (size_t i = fromLevel; i < components.size(); i++)
      {
        r += "/" + components[i];
      }

      return r;
    }
  }

  std::string Toolbox::JoinUri(const std::string& base, const std::string& uri)
  {
    if (uri.size() > 0 && base.size() > 0)
    {
      if (base[base.size() - 1] == '/' && uri[0] == '/')
      {
        return base + uri.substr(1, uri.size() - 1);
      }
      else if (base[base.size() - 1] != '/' && uri[0] != '/')
      {
        return base + "/" + uri;
      }
    }

    return base + uri;
  }


#if ORTHANC_ENABLE_MD5 == 1
  static char GetHexadecimalCharacter(uint8_t value)
  {
    assert(value < 16);

    if (value < 10)
    {
      return value + '0';
    }
    else
    {
      return (value - 10) + 'a';
    }
  }


  void Toolbox::ComputeMD5(std::string& result,
                           const std::string& data)
  {
    if (data.size() > 0)
    {
      ComputeMD5(result, &data[0], data.size());
    }
    else
    {
      ComputeMD5(result, NULL, 0);
    }
  }


  void Toolbox::ComputeMD5(std::string& result,
                           const void* data,
                           size_t size)
  {
    md5_state_s state;
    md5_init(&state);

    if (size > 0)
    {
      md5_append(&state, 
                 reinterpret_cast<const md5_byte_t*>(data), 
                 static_cast<int>(size));
    }

    md5_byte_t actualHash[16];
    md5_finish(&state, actualHash);

    result.resize(32);
    for (unsigned int i = 0; i < 16; i++)
    {
      result[2 * i] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] / 16));
      result[2 * i + 1] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] % 16));
    }
  }

  void Toolbox::ComputeMD5(std::string& result,
                           const std::set<std::string>& data)
  {
    std::string s;

    for (std::set<std::string>::const_iterator it = data.begin(); it != data.end(); ++it)
    {
      s += *it;
    }

    ComputeMD5(result, s);
  }

#endif


#if ORTHANC_ENABLE_BASE64 == 1
  void Toolbox::EncodeBase64(std::string& result, 
                             const std::string& data)
  {
    result.clear();
    base64_encode(result, data);
  }

  void Toolbox::DecodeBase64(std::string& result, 
                             const std::string& data)
  {
    for (size_t i = 0; i < data.length(); i++)
    {
      if (!isalnum(data[i]) &&
          data[i] != '+' &&
          data[i] != '/' &&
          data[i] != '=')
      {
        // This is not a valid character for a Base64 string
        throw OrthancException(ErrorCode_BadFileFormat);
      }
    }

    result.clear();
    base64_decode(result, data);
  }


  bool Toolbox::DecodeDataUriScheme(std::string& mime,
                                    std::string& content,
                                    const std::string& source)
  {
    boost::regex pattern("data:([^;]+);base64,([a-zA-Z0-9=+/]*)",
                         boost::regex::icase /* case insensitive search */);

    boost::cmatch what;
    if (regex_match(source.c_str(), what, pattern))
    {
      mime = what[1];
      DecodeBase64(content, what[2]);
      return true;
    }
    else
    {
      return false;
    }
  }


  void Toolbox::EncodeDataUriScheme(std::string& result,
                                    const std::string& mime,
                                    const std::string& content)
  {
    result = "data:" + mime + ";base64,";
    base64_encode(result, content);
  }

#endif


#if ORTHANC_ENABLE_LOCALE == 1
  static const char* GetBoostLocaleEncoding(const Encoding sourceEncoding)
  {
    switch (sourceEncoding)
    {
      case Encoding_Utf8:
        return "UTF-8";

      case Encoding_Ascii:
        return "ASCII";

      case Encoding_Latin1:
        return "ISO-8859-1";

      case Encoding_Latin2:
        return "ISO-8859-2";

      case Encoding_Latin3:
        return "ISO-8859-3";

      case Encoding_Latin4:
        return "ISO-8859-4";

      case Encoding_Latin5:
        return "ISO-8859-9";

      case Encoding_Cyrillic:
        return "ISO-8859-5";

      case Encoding_Windows1251:
        return "WINDOWS-1251";

      case Encoding_Arabic:
        return "ISO-8859-6";

      case Encoding_Greek:
        return "ISO-8859-7";

      case Encoding_Hebrew:
        return "ISO-8859-8";
        
      case Encoding_Japanese:
        return "SHIFT-JIS";

      case Encoding_Chinese:
        return "GB18030";

      case Encoding_Thai:
#if BOOST_LOCALE_WITH_ICU == 1
        return "tis620.2533";
#else
        return "TIS620.2533-0";
#endif

      case Encoding_Korean:
        return "ISO-IR-149";

      case Encoding_JapaneseKanji:
        return "JIS";

      case Encoding_SimplifiedChinese:
        return "GB2312";

      default:
        throw OrthancException(ErrorCode_NotImplemented);
    }
  }
#endif


#if ORTHANC_ENABLE_LOCALE == 1
  // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2
  std::string Toolbox::ConvertToUtf8(const std::string& source,
                                     Encoding sourceEncoding,
                                     bool hasCodeExtensions)
  {
#if ORTHANC_STATIC_ICU == 1
#  if ORTHANC_ENABLE_ICU == 0
    throw OrthancException(ErrorCode_NotImplemented, "ICU is disabled for this target");
#  else
    if (globalIcuData_.empty())
    {
      throw OrthancException(ErrorCode_BadSequenceOfCalls,
                             "Call Toolbox::InitializeGlobalLocale()");
    }
#  endif
#endif

    // The "::skip" flag makes boost skip invalid UTF-8
    // characters. This can occur in badly-encoded DICOM files.
    
    try
    {
      if (sourceEncoding == Encoding_Ascii)
      {
        return ConvertToAscii(source);
      }
      else 
      {
        std::string s;
        
        if (sourceEncoding == Encoding_Utf8)
        {
          // Already in UTF-8: No conversion is required, but we ensure
          // the output is correctly encoded
          s = boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
        }
        else
        {
          const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
          s = boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip);
        }

        if (hasCodeExtensions)
        {
          std::string t;
          RemoveIso2022EscapeSequences(t, s);
          return t;
        }
        else
        {
          return s;
        }        
      }
    }
    catch (std::runtime_error& e)
    {
      // Bad input string or bad encoding
      LOG(INFO) << e.what();
      return ConvertToAscii(source);
    }
  }
#endif
  

#if ORTHANC_ENABLE_LOCALE == 1
  std::string Toolbox::ConvertFromUtf8(const std::string& source,
                                       Encoding targetEncoding)
  {
#if ORTHANC_STATIC_ICU == 1
#  if ORTHANC_ENABLE_ICU == 0
    throw OrthancException(ErrorCode_NotImplemented, "ICU is disabled for this target");
#  else
    if (globalIcuData_.empty())
    {
      throw OrthancException(ErrorCode_BadSequenceOfCalls,
                             "Call Toolbox::InitializeGlobalLocale()");
    }
#  endif
#endif

    // The "::skip" flag makes boost skip invalid UTF-8
    // characters. This can occur in badly-encoded DICOM files.
    
    try
    {
      if (targetEncoding == Encoding_Utf8)
      {
        // Already in UTF-8: No conversion is required.
        return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
      }
      else if (targetEncoding == Encoding_Ascii)
      {
        return ConvertToAscii(source);
      }
      else
      {
        const char* encoding = GetBoostLocaleEncoding(targetEncoding);
        return boost::locale::conv::from_utf<char>(source, encoding, boost::locale::conv::skip);
      }
    }
    catch (std::runtime_error&)
    {
      // Bad input string or bad encoding
      return ConvertToAscii(source);
    }
  }
#endif


  static bool IsAsciiCharacter(uint8_t c)
  {
    return (c != 0 &&
            c <= 127 &&
            (c == '\n' || !iscntrl(c)));
  }


  bool Toolbox::IsAsciiString(const void* data,
                              size_t size)
  {
    const uint8_t* p = reinterpret_cast<const uint8_t*>(data);

    for (size_t i = 0; i < size; i++, p++)
    {
      if (!IsAsciiCharacter(*p))
      {
        return false;
      }
    }

    return true;
  }


  bool Toolbox::IsAsciiString(const std::string& s)
  {
    return IsAsciiString(s.c_str(), s.size());
  }
  

  std::string Toolbox::ConvertToAscii(const std::string& source)
  {
    std::string result;

    result.reserve(source.size() + 1);
    for (size_t i = 0; i < source.size(); i++)
    {
      if (IsAsciiCharacter(source[i]))
      {
        result.push_back(source[i]);
      }
    }

    return result;
  }

  void Toolbox::ComputeSHA1(std::string& result,
                            const void* data,
                            size_t size)
  {
    boost::uuids::detail::sha1 sha1;

    if (size > 0)
    {
      sha1.process_bytes(data, size);
    }

#if BOOST_VERSION >= 108600
    unsigned char digest[20];

    // Sanity check for the memory layout: A SHA-1 digest is 160 bits wide
    assert(sizeof(digest) == (160 / 8));
    assert(sizeof(boost::uuids::detail::sha1::digest_type) == 20);

    // From Boost 1.86, digest_type is "unsigned char[20]" while it was "unsigned int[5]"" in previous versions.
    // Always perform the cast even if it is useless for Boost < 1.86
    sha1.get_digest(digest);

    result.resize(8 * 5 + 4);
    sprintf(&result[0], "%02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x%02x%02x-%02x%02x%02x%02x",
            digest[0], digest[1], digest[2], digest[3],
            digest[4], digest[5], digest[6], digest[7],
            digest[8], digest[9], digest[10], digest[11],
            digest[12], digest[13], digest[14], digest[15],
            digest[16], digest[17], digest[18], digest[19]);

#else
    unsigned int digest[5];
    // Sanity check for the memory layout: A SHA-1 digest is 160 bits wide
    assert(sizeof(unsigned int) == 4 && sizeof(digest) == (160 / 8));
    assert(sizeof(boost::uuids::detail::sha1::digest_type) == 20);

    sha1.get_digest(digest);

    result.resize(8 * 5 + 4);
    sprintf(&result[0], "%08x-%08x-%08x-%08x-%08x",
            digest[0],
            digest[1],
            digest[2],
            digest[3],
            digest[4]);

#endif

  }

  void Toolbox::ComputeSHA1(std::string& result,
                            const std::string& data)
  {
    if (data.size() > 0)
    {
      ComputeSHA1(result, data.c_str(), data.size());
    }
    else
    {
      ComputeSHA1(result, NULL, 0);
    }
  }


  bool Toolbox::IsSHA1(const void* str,
                       size_t size)
  {
    if (size == 0)
    {
      return false;
    }

    const char* start = reinterpret_cast<const char*>(str);
    const char* end = start + size;

    // Trim the beginning of the string
    while (start < end)
    {
      if (*start == '\0' ||
          isspace(*start))
      {
        start++;
      }
      else
      {
        break;
      }
    }

    // Trim the trailing of the string
    while (start < end)
    {
      if (*(end - 1) == '\0' ||
          isspace(*(end - 1)))
      {
        end--;
      }
      else
      {
        break;
      }
    }

    if (end - start != 44)
    {
      return false;
    }

    for (unsigned int i = 0; i < 44; i++)
    {
      if (i == 8 ||
          i == 17 ||
          i == 26 ||
          i == 35)
      {
        if (start[i] != '-')
          return false;
      }
      else
      {
        if (!isalnum(start[i]))
          return false;
      }
    }

    return true;
  }


  bool Toolbox::IsSHA1(const std::string& s)
  {
    if (s.size() == 0)
    {
      return false;
    }
    else
    {
      return IsSHA1(s.c_str(), s.size());
    }
  }


  std::string Toolbox::StripSpaces(const std::string& source)
  {
    size_t first = 0;

    while (first < source.length() &&
           isspace(source[first]))
    {
      first++;
    }

    if (first == source.length())
    {
      // String containing only spaces
      return "";
    }

    size_t last = source.length();
    while (last > first &&
           isspace(source[last - 1]))
    {
      last--;
    }          
    
    assert(first <= last);
    return source.substr(first, last - first);
  }


  static char Hex2Dec(char c)
  {
    return ((c >= '0' && c <= '9') ? c - '0' :
            ((c >= 'a' && c <= 'f') ? c - 'a' + 10 : c - 'A' + 10));
  }

  void Toolbox::UrlDecode(std::string& s)
  {
    // http://en.wikipedia.org/wiki/Percent-encoding
    // http://www.w3schools.com/tags/ref_urlencode.asp
    // http://stackoverflow.com/questions/154536/encode-decode-urls-in-c

    if (s.size() == 0)
    {
      return;
    }

    size_t source = 0;
    size_t target = 0;

    while (source < s.size())
    {
      if (s[source] == '%' &&
          source + 2 < s.size() &&
          isalnum(s[source + 1]) &&
          isalnum(s[source + 2]))
      {
        s[target] = (Hex2Dec(s[source + 1]) << 4) | Hex2Dec(s[source + 2]);
        source += 3;
        target += 1;
      }
      else
      {
        if (s[source] == '+')
          s[target] = ' ';
        else
          s[target] = s[source];

        source++;
        target++;
      }
    }

    s.resize(target);
  }


  Endianness Toolbox::DetectEndianness()
  {
    // http://sourceforge.net/p/predef/wiki/Endianness/

    uint32_t bufferView = 0;

    uint8_t* buffer = reinterpret_cast<uint8_t*>(&bufferView);

    buffer[0] = 0x00;
    buffer[1] = 0x01;
    buffer[2] = 0x02;
    buffer[3] = 0x03;

    switch (bufferView) 
    {
      case 0x00010203: 
        return Endianness_Big;

      case 0x03020100: 
        return Endianness_Little;
        
      default:
        throw OrthancException(ErrorCode_NotImplemented);
    }
  }

  std::string Toolbox::WildcardToRegularExpression(const std::string& source)
  {
    // TODO - Speed up this with a regular expression

    std::string result = source;

    // Escape all special characters
    boost::replace_all(result, "\\", "\\\\");
    boost::replace_all(result, "^", "\\^");
    boost::replace_all(result, ".", "\\.");
    boost::replace_all(result, "$", "\\$");
    boost::replace_all(result, "|", "\\|");
    boost::replace_all(result, "(", "\\(");
    boost::replace_all(result, ")", "\\)");
    boost::replace_all(result, "[", "\\[");
    boost::replace_all(result, "]", "\\]");
    boost::replace_all(result, "+", "\\+");
    boost::replace_all(result, "/", "\\/");
    boost::replace_all(result, "{", "\\{");
    boost::replace_all(result, "}", "\\}");

    // Convert wildcards '*' and '?' to their regex equivalents
    boost::replace_all(result, "?", ".");
    boost::replace_all(result, "*", ".*");

    return result;
  }

  static void TokenizeStringInternal(std::vector<std::string>& result,
                               const std::string& value,
                               char separator,
                               bool includeEmptyStrings)
  {
    size_t countSeparators = 0;
    
    for (size_t i = 0; i < value.size(); i++)
    {
      if (value[i] == separator)
      {
        countSeparators++;
      }
    }
    
    result.clear();
    result.reserve(countSeparators + 1);

    std::string currentItem;

    for (size_t i = 0; i < value.size(); i++)
    {
      if (value[i] == separator)
      {
        result.push_back(currentItem);
        currentItem.clear();
      }
      else
      {
        currentItem.push_back(value[i]);
      }
    }

    if (includeEmptyStrings || !currentItem.empty())
    {
      result.push_back(currentItem);
    }
  }


  void Toolbox::TokenizeString(std::vector<std::string>& result,
                               const std::string& value,
                               char separator)
  {
    TokenizeStringInternal(result, value, separator, true);
  }


  void Toolbox::SplitString(std::set<std::string>& result,
                            const std::string& value,
                            char separator)
  {
    result.clear();

    std::vector<std::string> temp;
    TokenizeStringInternal(temp, value, separator, false);
    for (size_t i = 0; i < temp.size(); ++i)
    {
      result.insert(temp[i]);
    }
  }


  void Toolbox::SplitString(std::vector<std::string>& result,
                            const std::string& value,
                            char separator)
  {
    TokenizeStringInternal(result, value, separator, false);
  }


  void Toolbox::JoinStrings(std::string& result,
                            const std::set<std::string>& source,
                            const char* separator)
  {
    result = boost::algorithm::join(source, separator);
  }

  void Toolbox::JoinStrings(std::string& result,
                            const std::vector<std::string>& source,
                            const char* separator)
  {
    result = boost::algorithm::join(source, separator);
  }


#if ORTHANC_ENABLE_PUGIXML == 1
  class ChunkedBufferWriter : public pugi::xml_writer
  {
  private:
    ChunkedBuffer buffer_;

  public:
    virtual void write(const void *data, size_t size)
    {
      if (size > 0)
      {
        buffer_.AddChunk(reinterpret_cast<const char*>(data), size);
      }
    }

    void Flatten(std::string& s)
    {
      buffer_.Flatten(s);
    }
  };


  static void JsonToXmlInternal(pugi::xml_node& target,
                                const Json::Value& source,
                                const std::string& arrayElement)
  {
    // http://jsoncpp.sourceforge.net/value_8h_source.html#l00030

    switch (source.type())
    {
      case Json::nullValue:
      {
        target.append_child(pugi::node_pcdata).set_value("null");
        break;
      }

      case Json::intValue:
      {
        std::string s = boost::lexical_cast<std::string>(source.asInt());
        target.append_child(pugi::node_pcdata).set_value(s.c_str());
        break;
      }

      case Json::uintValue:
      {
        std::string s = boost::lexical_cast<std::string>(source.asUInt());
        target.append_child(pugi::node_pcdata).set_value(s.c_str());
        break;
      }

      case Json::realValue:
      {
        std::string s = boost::lexical_cast<std::string>(source.asFloat());
        target.append_child(pugi::node_pcdata).set_value(s.c_str());
        break;
      }

      case Json::stringValue:
      {
        target.append_child(pugi::node_pcdata).set_value(source.asString().c_str());
        break;
      }

      case Json::booleanValue:
      {
        target.append_child(pugi::node_pcdata).set_value(source.asBool() ? "true" : "false");
        break;
      }

      case Json::arrayValue:
      {
        for (Json::Value::ArrayIndex i = 0; i < source.size(); i++)
        {
          pugi::xml_node node = target.append_child();
          node.set_name(arrayElement.c_str());
          JsonToXmlInternal(node, source[i], arrayElement);
        }
        break;
      }
        
      case Json::objectValue:
      {
        Json::Value::Members members = source.getMemberNames();

        for (size_t i = 0; i < members.size(); i++)
        {
          pugi::xml_node node = target.append_child();
          node.set_name(members[i].c_str());
          JsonToXmlInternal(node, source[members[i]], arrayElement);          
        }

        break;
      }

      default:
        throw OrthancException(ErrorCode_NotImplemented);
    }
  }


  void Toolbox::JsonToXml(std::string& target,
                          const Json::Value& source,
                          const std::string& rootElement,
                          const std::string& arrayElement)
  {
    pugi::xml_document doc;

    pugi::xml_node n = doc.append_child(rootElement.c_str());
    JsonToXmlInternal(n, source, arrayElement);

    pugi::xml_node decl = doc.prepend_child(pugi::node_declaration);
    decl.append_attribute("version").set_value("1.0");
    decl.append_attribute("encoding").set_value("utf-8");

    XmlToString(target, doc);
  }

  void Toolbox::XmlToString(std::string& target,
                            const pugi::xml_document& source)
  {
    ChunkedBufferWriter writer;
    source.save(writer, "  ", pugi::format_default, pugi::encoding_utf8);
    writer.Flatten(target);
  }
#endif


  
  bool Toolbox::IsInteger(const std::string& str)
  {
    std::string s = StripSpaces(str);

    if (s.size() == 0)
    {
      return false;
    }

    size_t pos = 0;
    if (s[0] == '-')
    {
      if (s.size() == 1)
      {
        return false;
      }

      pos = 1;
    }

    while (pos < s.size())
    {
      if (!isdigit(s[pos]))
      {
        return false;
      }

      pos++;
    }

    return true;
  }


  void Toolbox::CopyJsonWithoutComments(Json::Value& target,
                                        const Json::Value& source)
  {
    switch (source.type())
    {
      case Json::nullValue:
        target = Json::nullValue;
        break;

      case Json::intValue:
        target = source.asInt64();
        break;

      case Json::uintValue:
        target = source.asUInt64();
        break;

      case Json::realValue:
        target = source.asDouble();
        break;

      case Json::stringValue:
        target = source.asString();
        break;

      case Json::booleanValue:
        target = source.asBool();
        break;

      case Json::arrayValue:
      {
        target = Json::arrayValue;
        for (Json::Value::ArrayIndex i = 0; i < source.size(); i++)
        {
          Json::Value& item = target.append(Json::nullValue);
          CopyJsonWithoutComments(item, source[i]);
        }

        break;
      }

      case Json::objectValue:
      {
        target = Json::objectValue;
        Json::Value::Members members = source.getMemberNames();
        for (Json::Value::ArrayIndex i = 0; i < members.size(); i++)
        {
          const std::string item = members[i];
          CopyJsonWithoutComments(target[item], source[item]);
        }

        break;
      }

      default:
        break;
    }
  }


  bool Toolbox::StartsWith(const std::string& str,
                           const std::string& prefix)
  {
    if (str.size() < prefix.size())
    {
      return false;
    }
    else
    {
      return str.compare(0, prefix.size(), prefix) == 0;
    }
  }
  

  static bool IsUnreservedCharacter(char c)
  {
    // This function checks whether "c" is an unserved character
    // wrt. an URI percent-encoding
    // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding%5Fin%5Fa%5FURI

    return ((c >= 'A' && c <= 'Z') ||
            (c >= 'a' && c <= 'z') ||
            (c >= '0' && c <= '9') ||
            c == '-' ||
            c == '_' ||
            c == '.' ||
            c == '~' ||
            c == '/');
  }

  void Toolbox::UriEncode(std::string& target,
                          const std::string& source)
  {
    // Estimate the length of the percent-encoded URI
    size_t length = 0;

    for (size_t i = 0; i < source.size(); i++)
    {
      if (IsUnreservedCharacter(source[i]))
      {
        length += 1;
      }
      else
      {
        // This character must be percent-encoded
        length += 3;
      }
    }

    target.clear();
    target.reserve(length);

    for (size_t i = 0; i < source.size(); i++)
    {
      if (IsUnreservedCharacter(source[i]))
      {
        target.push_back(source[i]);
      }
      else
      {
        // This character must be percent-encoded
        uint8_t byte = static_cast<uint8_t>(source[i]);
        uint8_t a = byte >> 4;
        uint8_t b = byte & 0x0f;

        target.push_back('%');
        target.push_back(a < 10 ? a + '0' : a - 10 + 'A');
        target.push_back(b < 10 ? b + '0' : b - 10 + 'A');
      }
    }
  }


  static bool HasField(const Json::Value& json,
                       const std::string& key,
                       Json::ValueType expectedType)
  {
    if (json.type() != Json::objectValue ||
        !json.isMember(key))
    {
      return false;
    }
    else if (json[key].type() == expectedType)
    {
      return true;
    }
    else
    {
      throw OrthancException(ErrorCode_BadParameterType);
    }
  }


  std::string Toolbox::GetJsonStringField(const Json::Value& json,
                                          const std::string& key,
                                          const std::string& defaultValue)
  {
    if (HasField(json, key, Json::stringValue))
    {
      return json[key].asString();
    }
    else
    {
      return defaultValue;
    }
  }


  bool Toolbox::GetJsonBooleanField(const ::Json::Value& json,
                                    const std::string& key,
                                    bool defaultValue)
  {
    if (HasField(json, key, Json::booleanValue))
    {
      return json[key].asBool();
    }
    else
    {
      return defaultValue;
    }
  }


  int Toolbox::GetJsonIntegerField(const ::Json::Value& json,
                                   const std::string& key,
                                   int defaultValue)
  {
    if (HasField(json, key, Json::intValue))
    {
      return json[key].asInt();
    }
    else
    {
      return defaultValue;
    }
  }


  unsigned int Toolbox::GetJsonUnsignedIntegerField(const ::Json::Value& json,
                                                    const std::string& key,
                                                    unsigned int defaultValue)
  {
    int v = GetJsonIntegerField(json, key, defaultValue);

    if (v < 0)
    {
      throw OrthancException(ErrorCode_ParameterOutOfRange);
    }
    else
    {
      return static_cast<unsigned int>(v);
    }
  }


  bool Toolbox::IsUuid(const std::string& str)
  {
    if (str.size() != 36)
    {
      return false;
    }

    for (size_t i = 0; i < str.length(); i++)
    {
      if (i == 8 || i == 13 || i == 18 || i == 23)
      {
        if (str[i] != '-')
          return false;
      }
      else
      {
        if (!isalnum(str[i]))
          return false;
      }
    }

    return true;
  }


  bool Toolbox::StartsWithUuid(const std::string& str)
  {
    if (str.size() < 36)
    {
      return false;
    }

    if (str.size() == 36)
    {
      return IsUuid(str);
    }

    assert(str.size() > 36);
    if (!isspace(str[36]))
    {
      return false;
    }

    return IsUuid(str.substr(0, 36));
  }


#if ORTHANC_ENABLE_LOCALE == 1
  static std::unique_ptr<std::locale>  globalLocale_;

  static bool SetGlobalLocale(const char* locale)
  {
    try
    {
      if (locale == NULL)
      {
        LOG(WARNING) << "Falling back to system-wide default locale";
        globalLocale_.reset(new std::locale());
      }
      else
      {
        LOG(INFO) << "Using locale: \"" << locale << "\" for case-insensitive comparison of strings";
        globalLocale_.reset(new std::locale(locale));
      }
    }
    catch (std::runtime_error& e)
    {
      LOG(ERROR) << "Cannot set globale locale to "
                 << (locale ? std::string(locale) : "(null)")
                 << ": " << e.what();
      globalLocale_.reset(NULL);
    }

    return (globalLocale_.get() != NULL);
  }

  
  static void InitializeIcu()
  {
#if (ORTHANC_STATIC_ICU == 1) && (ORTHANC_ENABLE_ICU == 1)
    if (globalIcuData_.empty())
    {
      LOG(INFO) << "Setting up the ICU common data";

      GzipCompressor compressor;
      compressor.Uncompress(globalIcuData_,
                            FrameworkResources::GetFileResourceBuffer(FrameworkResources::LIBICU_DATA),
                            FrameworkResources::GetFileResourceSize(FrameworkResources::LIBICU_DATA));

      std::string md5;
      Toolbox::ComputeMD5(md5, globalIcuData_);

      if (md5 != ORTHANC_ICU_DATA_MD5 ||
          globalIcuData_.empty())
      {
        throw OrthancException(ErrorCode_InternalError,
                               "Cannot decode the ICU common data");
      }

      // "ICU data is designed to be 16-aligned"
      // http://userguide.icu-project.org/icudata#TOC-Alignment

      {
        static const size_t ALIGN = 16;

        UErrorCode status = U_ZERO_ERROR;

        if (reinterpret_cast<intptr_t>(globalIcuData_.c_str()) % ALIGN == 0)
        {
          // Data is already properly aligned
          udata_setCommonData(globalIcuData_.c_str(), &status);  
        }
        else
        {
          std::string aligned;
          aligned.resize(globalIcuData_.size() + ALIGN - 1);

          intptr_t offset = reinterpret_cast<intptr_t>(aligned.c_str()) % ALIGN;
          if (offset != 0)
          {
            offset = ALIGN - offset;
          }

          if (offset + globalIcuData_.size() > aligned.size())
          {
            throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
          }

          // We don't use "memcpy()", as it expects its data to be aligned
          const uint8_t* p = reinterpret_cast<uint8_t*>(&globalIcuData_[0]);
          uint8_t* q = reinterpret_cast<uint8_t*>(&aligned[0]) + offset;
          for (size_t i = 0; i < globalIcuData_.size(); i++, p++, q++)
          {
            *q = *p;
          }
        
          globalIcuData_.swap(aligned);

          const uint8_t* data = reinterpret_cast<const uint8_t*>(globalIcuData_.c_str()) + offset;
        
          if (reinterpret_cast<intptr_t>(data) % ALIGN != 0)
          {
            throw OrthancException(ErrorCode_InternalError, "Cannot align on 16-bytes boundary");
          }
          else
          {
            udata_setCommonData(data, &status);  
          }
        }

        if (status != U_ZERO_ERROR)
        {
          throw OrthancException(ErrorCode_InternalError, "Cannot initialize ICU");
        }
      }

      if (Toolbox::DetectEndianness() != Endianness_Little)
      {
        // TODO - The data table must be swapped (uint16_t)
        throw OrthancException(ErrorCode_NotImplemented);
      }

      // "First-use of ICU from a single thread before the
      // multi-threaded use of ICU begins", to make sure everything is
      // properly initialized (should not be mandatory in our
      // case). We let boost handle calls to "u_init()" and "u_cleanup()".
      // http://userguide.icu-project.org/design#TOC-ICU-Initialization-and-Termination
      uloc_getDefault();
    }
#endif
  }
  
  void Toolbox::InitializeGlobalLocale(const char* locale)
  {
    InitializeIcu();

#if defined(__unix__) && ORTHANC_SANDBOXED != 1
    static const char* LOCALTIME = "/etc/localtime";
    
    if (!SystemToolbox::IsExistingFile(LOCALTIME))
    {
      // Check out file
      // "boost_1_69_0/libs/locale/src/icu/time_zone.cpp": Direct
      // access is made to this file if ICU is not used. Crash arises
      // in Boost if the file is a symbolic link to a non-existing
      // file (such as in Ubuntu 16.04 base Docker image).
      throw OrthancException(
        ErrorCode_InternalError,
        "On UNIX-like systems, the file " + std::string(LOCALTIME) +
        " must be present on the filesystem (install \"tzdata\" package on Debian)");
    }
#endif

    bool ok;
    
    if (locale == NULL)
    {
      // Make Orthanc use English, United States locale
      // Linux: use "en_US.UTF-8"
      // Windows: use ""
      // Wine: use NULL
    
#if defined(__MINGW32__)
      // Visibly, there is no support of locales in MinGW yet
      // http://mingw.5.n7.nabble.com/How-to-use-std-locale-global-with-MinGW-correct-td33048.html
      static const char* DEFAULT_LOCALE = NULL;
#elif defined(_WIN32)
      // For Windows: use default locale (using "en_US" does not work)
      static const char* DEFAULT_LOCALE = "";
#else
      // For Linux & cie
      static const char* DEFAULT_LOCALE = "en_US.UTF-8";
#endif

      ok = SetGlobalLocale(DEFAULT_LOCALE);

#if defined(__MINGW32__)
      LOG(WARNING) << "This is a MinGW build, case-insensitive comparison of "
                   << "strings with accents will not work outside of Wine";
#endif
    }
    else
    {
      ok = SetGlobalLocale(locale);
    }

    if (!ok &&
        !SetGlobalLocale(NULL))
    {
      throw OrthancException(ErrorCode_InternalError,
                             "Cannot initialize global locale");
    }

  }


  void Toolbox::FinalizeGlobalLocale()
  {
    globalLocale_.reset();
  }


  std::string Toolbox::ToUpperCaseWithAccents(const std::string& source)
  {
    bool error = (globalLocale_.get() == NULL);

#if ORTHANC_STATIC_ICU == 1
#  if ORTHANC_ENABLE_ICU == 0
    throw OrthancException(ErrorCode_NotImplemented, "ICU is disabled for this target");
#  else
    if (globalIcuData_.empty())
    {
      error = true;
    }
#  endif
#endif
    
    if (error)
    {
      throw OrthancException(ErrorCode_BadSequenceOfCalls,
                             "No global locale was set, call Toolbox::InitializeGlobalLocale()");
    }

    /**
     * A few notes about locales:
     *
     * (1) We don't use "case folding":
     * http://www.boost.org/doc/libs/1_64_0/libs/locale/doc/html/conversions.html
     *
     * Characters are made uppercase one by one. This is because, in
     * static builds, we are using iconv, which is visibly not
     * supported correctly (TODO: Understand why). Case folding seems
     * to be working correctly if using the default backend under
     * Linux (ICU or POSIX?). If one wishes to use case folding, one
     * would use:
     *
     *   boost::locale::generator gen;
     *   std::locale::global(gen(DEFAULT_LOCALE));
     *   return boost::locale::to_upper(source);
     *
     * (2) The function "boost::algorithm::to_upper_copy" does not
     * make use of the "std::locale::global()". We therefore create a
     * global variable "globalLocale_".
     * 
     * (3) The variant of "boost::algorithm::to_upper_copy()" that
     * uses std::string does not work properly. We need to apply it
     * one wide strings (std::wstring). This explains the two calls to
     * "utf_to_utf" in order to convert to/from std::wstring.
     **/

    std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip);
    w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_);
    return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip);
  }
#endif



#if ORTHANC_ENABLE_SSL == 0
  /**
   * OpenSSL is disabled
   **/
  void Toolbox::InitializeOpenSsl()
  {
    LOG(INFO) << "OpenSSL is disabled";
  }
  
  void Toolbox::FinalizeOpenSsl()
  {
  }  


#elif (ORTHANC_ENABLE_SSL == 1 &&               \
       OPENSSL_VERSION_NUMBER < 0x10100000L) 
  /**
   * OpenSSL < 1.1.0
   **/
  void Toolbox::InitializeOpenSsl()
  {
    LOG(INFO) << "OpenSSL version: " << OPENSSL_VERSION_TEXT;
    
    // https://wiki.openssl.org/index.php/Library_Initialization
    SSL_library_init();
    SSL_load_error_strings();
    OpenSSL_add_all_algorithms();
    ERR_load_crypto_strings();
  }

  void Toolbox::FinalizeOpenSsl()
  {
    // Finalize OpenSSL
    // https://wiki.openssl.org/index.php/Library_Initialization#Cleanup
#ifdef FIPS_mode_set
    FIPS_mode_set(0);
#endif

#if !defined(OPENSSL_NO_ENGINE)
    ENGINE_cleanup();
#endif
    
    CONF_modules_unload(1);
    EVP_cleanup();
    CRYPTO_cleanup_all_ex_data();
    ERR_remove_state(0);
    ERR_free_strings();
  }

  
#elif (ORTHANC_ENABLE_SSL == 1 &&               \
       OPENSSL_VERSION_NUMBER >= 0x10100000L) 
  /**
   * OpenSSL >= 1.1.0. In this case, the initialization is
   * automatically done by the functions of OpenSSL.
   * https://wiki.openssl.org/index.php/Library_Initialization
   **/
  void Toolbox::InitializeOpenSsl()
  {
    LOG(INFO) << "OpenSSL version: " << OPENSSL_VERSION_TEXT;
  }

  void Toolbox::FinalizeOpenSsl()
  {
  }

#else
#  error "Support your platform here"
#endif
  


  std::string Toolbox::GenerateUuid()
  {
#ifdef _WIN32
    UUID uuid;
    UuidCreate ( &uuid );

    unsigned char * str;
    UuidToStringA ( &uuid, &str );

    std::string s( ( char* ) str );

    RpcStringFreeA ( &str );
#else
    uuid_t uuid;
    uuid_generate_random ( uuid );
    char s[37];
    uuid_unparse ( uuid, s );
#endif
    return s;
  }


  namespace
  {
    // Anonymous namespace to avoid clashes between compilation modules

    class VariableFormatter
    {
    public:
      typedef std::map<std::string, std::string>   Dictionary;

    private:
      const Dictionary& dictionary_;

    public:
      explicit VariableFormatter(const Dictionary& dictionary) :
        dictionary_(dictionary)
      {
      }
  
      template<typename Out>
      Out operator()(const boost::smatch& what,
                     Out out) const
      {
        if (!what[1].str().empty())
        {
          // Variable without a default value
          Dictionary::const_iterator found = dictionary_.find(what[1]);
    
          if (found != dictionary_.end())
          {
            const std::string& value = found->second;
            out = std::copy(value.begin(), value.end(), out);
          }
        }
        else
        {
          // Variable with a default value
          std::string key;
          std::string defaultValue;
          
          if (!what[2].str().empty())
          {
            key = what[2].str();
            defaultValue = what[3].str();
          }
          else if (!what[4].str().empty())
          {
            key = what[4].str();
            defaultValue = what[5].str();
          }
          else if (!what[6].str().empty())
          {
            key = what[6].str();
            defaultValue = what[7].str();
          }
          else
          {
            throw OrthancException(ErrorCode_InternalError);
          }

          Dictionary::const_iterator found = dictionary_.find(key);
    
          if (found == dictionary_.end())
          {
            out = std::copy(defaultValue.begin(), defaultValue.end(), out);
          }
          else
          {
            const std::string& value = found->second;
            out = std::copy(value.begin(), value.end(), out);
          }
        }
    
        return out;
      }
    };
  }

  
  std::string Toolbox::SubstituteVariables(const std::string& source,
                                           const std::map<std::string, std::string>& dictionary)
  {
    const boost::regex pattern("\\$\\{([^:]*?)\\}|"                 // ${what[1]}
                               "\\$\\{([^:]*?):-([^'\"]*?)\\}|"     // ${what[2]:-what[3]}
                               "\\$\\{([^:]*?):-\"([^\"]*?)\"\\}|"  // ${what[4]:-"what[5]"}
                               "\\$\\{([^:]*?):-'([^']*?)'\\}");    // ${what[6]:-'what[7]'}

    VariableFormatter formatter(dictionary);

    return boost::regex_replace(source, pattern, formatter);
  }


  namespace Iso2022
  {
    /**
       Returns whether the string s contains a single-byte control message
       at index i
    **/
    static inline bool IsControlMessage1(const std::string& s, size_t i)
    {
      if (i < s.size())
      {
        char c = s[i];
        return
          (c == '\x0f') || // Locking shift zero
          (c == '\x0e');   // Locking shift one
      }
      else
      {
        return false;
      }
    }

    /**
       Returns whether the string s contains a double-byte control message
       at index i
    **/
    static inline size_t IsControlMessage2(const std::string& s, size_t i)
    {
      if (i + 1 < s.size())
      {
        char c1 = s[i];
        char c2 = s[i + 1];
        return (c1 == 0x1b) && (
          (c2 == '\x6e') || // Locking shift two
          (c2 == '\x6f') || // Locking shift three
          (c2 == '\x4e') || // Single shift two (alt)
          (c2 == '\x4f') || // Single shift three (alt)
          (c2 == '\x7c') || // Locking shift three right
          (c2 == '\x7d') || // Locking shift two right
          (c2 == '\x7e')    // Locking shift one right
          );
      }
      else
      {
        return false;
      }
    }

    /**
       Returns whether the string s contains a triple-byte control message
       at index i
    **/
    static inline size_t IsControlMessage3(const std::string& s, size_t i)
    {
      if (i + 2 < s.size())
      {
        char c1 = s[i];
        char c2 = s[i + 1];
        char c3 = s[i + 2];
        return ((c1 == '\x8e' && c2 == 0x1b && c3 == '\x4e') ||
                (c1 == '\x8f' && c2 == 0x1b && c3 == '\x4f'));
      }
      else
      {
        return false;
      }
    }

    /**
       This function returns true if the index i in the supplied string s:
       - is valid
       - contains the c character
       This function returns false otherwise.
    **/
    static inline bool TestCharValue(
      const std::string& s, size_t i, char c)
    {
      if (i < s.size())
        return s[i] == c;
      else
        return false;
    }

    /**
       This function returns true if the index i in the supplied string s:
       - is valid
       - has a c character that is >= cMin and <= cMax (included)
       This function returns false otherwise.
    **/
    static inline bool TestCharRange(
      const std::string& s, size_t i, char cMin, char cMax)
    {
      if (i < s.size())
        return (s[i] >= cMin) && (s[i] <= cMax);
      else
        return false;
    }

    /**
       This function returns the total length in bytes of the escape sequence
       located in string s at index i, if there is one, or 0 otherwise.
    **/
    static inline size_t GetEscapeSequenceLength(const std::string& s, size_t i)
    {
      if (TestCharValue(s, i, 0x1b))
      {
        size_t j = i+1;

        // advance reading cursor while we are in a sequence 
        while (TestCharRange(s, j, '\x20', '\x2f'))
          ++j;

        // check there is a valid termination byte AND we're long enough (there
        // must be at least one byte between 0x20 and 0x2f
        if (TestCharRange(s, j, '\x30', '\x7f') && (j - i) >= 2)
          return j - i + 1;
        else
          return 0;
      }
      else
        return 0;
    }
  }

  

  /**
     This function will strip all ISO/IEC 2022 control codes and escape
     sequences.
     Please see https://en.wikipedia.org/wiki/ISO/IEC_2022 (as of 2019-02)
     for a list of those.

     Please note that this operation is potentially destructive, because
     it removes the character set information from the byte stream.

     However, in the case where the encoding is unique, then suppressing
     the escape sequences allows one to provide us with a clean string after
     conversion to utf-8 with boost.
  **/
  void Toolbox::RemoveIso2022EscapeSequences(std::string& dest, const std::string& src)
  {
    // we need AT MOST the same size as the source string in the output
    dest.clear();
    if (dest.capacity() < src.size())
      dest.reserve(src.size());

    size_t i = 0;

    // uint8_t view to the string
    while (i < src.size())
    {
      size_t j = i;

      // The i index will only be incremented if a message is detected
      // in that case, the message is skipped and the index is set to the
      // next position to read
      if (Iso2022::IsControlMessage1(src, i))
        i += 1;
      else if (Iso2022::IsControlMessage2(src, i))
        i += 2;
      else if (Iso2022::IsControlMessage3(src, i))
        i += 3;
      else
        i += Iso2022::GetEscapeSequenceLength(src, i);

      // if the index was NOT incremented, this means there was no message at
      // this location: we then may copy the character at this index and 
      // increment the index to point to the next read position
      if (j == i)
      {
        dest.push_back(src[i]);
        i++;
      }
    }
  }


  void Toolbox::Utf8ToUnicodeCharacter(uint32_t& unicode,
                                       size_t& length,
                                       const std::string& utf8,
                                       size_t position)
  {
    // https://en.wikipedia.org/wiki/UTF-8

    static const uint8_t MASK_IS_1_BYTE = 0x80;     // printf '0x%x\n' "$((2#10000000))"
    static const uint8_t TEST_IS_1_BYTE = 0x00;
 
    static const uint8_t MASK_IS_2_BYTES = 0xe0;    // printf '0x%x\n' "$((2#11100000))"
    static const uint8_t TEST_IS_2_BYTES = 0xc0;    // printf '0x%x\n' "$((2#11000000))"

    static const uint8_t MASK_IS_3_BYTES = 0xf0;    // printf '0x%x\n' "$((2#11110000))"
    static const uint8_t TEST_IS_3_BYTES = 0xe0;    // printf '0x%x\n' "$((2#11100000))"

    static const uint8_t MASK_IS_4_BYTES = 0xf8;    // printf '0x%x\n' "$((2#11111000))"
    static const uint8_t TEST_IS_4_BYTES = 0xf0;    // printf '0x%x\n' "$((2#11110000))"

    static const uint8_t MASK_CONTINUATION = 0xc0;  // printf '0x%x\n' "$((2#11000000))"
    static const uint8_t TEST_CONTINUATION = 0x80;  // printf '0x%x\n' "$((2#10000000))"

    if (position >= utf8.size())
    {
      throw OrthancException(ErrorCode_ParameterOutOfRange);
    }

    assert(sizeof(uint8_t) == sizeof(char));
    const uint8_t* buffer = reinterpret_cast<const uint8_t*>(utf8.c_str()) + position;

    if ((buffer[0] & MASK_IS_1_BYTE) == TEST_IS_1_BYTE)
    {
      length = 1;
      unicode = buffer[0] & ~MASK_IS_1_BYTE;
    }
    else if ((buffer[0] & MASK_IS_2_BYTES) == TEST_IS_2_BYTES &&
             position + 1 < utf8.size() &&
             (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION)
    {
      length = 2;
      uint32_t a = buffer[0] & ~MASK_IS_2_BYTES;
      uint32_t b = buffer[1] & ~MASK_CONTINUATION;
      unicode = (a << 6) | b;
    }
    else if ((buffer[0] & MASK_IS_3_BYTES) == TEST_IS_3_BYTES &&
             position + 2 < utf8.size() &&
             (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION &&
             (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION)
    {
      length = 3;
      uint32_t a = buffer[0] & ~MASK_IS_3_BYTES;
      uint32_t b = buffer[1] & ~MASK_CONTINUATION;
      uint32_t c = buffer[2] & ~MASK_CONTINUATION;
      unicode = (a << 12) | (b << 6) | c;
    }
    else if ((buffer[0] & MASK_IS_4_BYTES) == TEST_IS_4_BYTES &&
             position + 3 < utf8.size() &&
             (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION &&
             (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION &&
             (buffer[3] & MASK_CONTINUATION) == TEST_CONTINUATION)
    {
      length = 4;
      uint32_t a = buffer[0] & ~MASK_IS_4_BYTES;
      uint32_t b = buffer[1] & ~MASK_CONTINUATION;
      uint32_t c = buffer[2] & ~MASK_CONTINUATION;
      uint32_t d = buffer[3] & ~MASK_CONTINUATION;
      unicode = (a << 18) | (b << 12) | (c << 6) | d;
    }
    else
    {
      // This is not a valid UTF-8 encoding
      throw OrthancException(ErrorCode_BadFileFormat, "Invalid UTF-8 string");
    }
  }


  std::string Toolbox::LargeHexadecimalToDecimal(const std::string& hex)
  {
    /**
     * NB: Focus of the code below is *not* efficiency, but
     * readability!
     **/
    
    for (size_t i = 0; i < hex.size(); i++)
    {
      const char c = hex[i];
      if (!((c >= 'A' && c <= 'F') ||
            (c >= 'a' && c <= 'f') ||
            (c >= '0' && c <= '9')))
      {
        throw OrthancException(ErrorCode_ParameterOutOfRange,
                               "Not an hexadecimal number");
      }
    }
    
    std::vector<uint8_t> decimal;
    decimal.push_back(0);

    for (size_t i = 0; i < hex.size(); i++)
    {
      uint8_t hexDigit = static_cast<uint8_t>(Hex2Dec(hex[i]));
      assert(hexDigit <= 15);

      for (size_t j = 0; j < decimal.size(); j++)
      {
        uint8_t val = static_cast<uint8_t>(decimal[j]) * 16 + hexDigit;  // Maximum: 9 * 16 + 15
        assert(val <= 159 /* == 9 * 16 + 15 */);
      
        decimal[j] = val % 10;
        hexDigit = val / 10;
        assert(hexDigit <= 15 /* == 159 / 10 */);
      }

      while (hexDigit > 0)
      {
        decimal.push_back(hexDigit % 10);
        hexDigit /= 10;
      }
    }

    size_t start = 0;
    while (start < decimal.size() &&
           decimal[start] == '0')
    {
      start++;
    }

    std::string s;
    s.reserve(decimal.size() - start);

    for (size_t i = decimal.size(); i > start; i--)
    {
      s.push_back(decimal[i - 1] + '0');
    }

    return s;
  }


  std::string Toolbox::GenerateDicomPrivateUniqueIdentifier()
  {
    /**
     * REFERENCE: "Creating a Privately Defined Unique Identifier
     * (Informative)" / "UUID Derived UID"
     * http://dicom.nema.org/medical/dicom/2019a/output/chtml/part05/sect_B.2.html
     * https://stackoverflow.com/a/46316162/881731
     **/

    std::string uuid = GenerateUuid();
    assert(IsUuid(uuid) && uuid.size() == 36);

    /**
     * After removing the four dashes ("-") out of the 36-character
     * UUID, we get a large hexadecimal number with 32 characters,
     * each of those characters lying in the range [0,16[. The large
     * number is thus in the [0,16^32[ = [0,256^16[ range. This number
     * has a maximum of 39 decimal digits, as can be seen in Python:
     * 
     * # python -c 'import math; print(math.log(16**32))/math.log(10))'
     * 38.531839445
     *
     * We now to convert the large hexadecimal number to a decimal
     * number with up to 39 digits, remove the leading zeros, then
     * prefix it with "2.25."
     **/

    // Remove the dashes
    std::string hex = (uuid.substr(0, 8) +
                       uuid.substr(9, 4) +
                       uuid.substr(14, 4) +
                       uuid.substr(19, 4) +
                       uuid.substr(24, 12));
    assert(hex.size() == 32);

    return "2.25." + LargeHexadecimalToDecimal(hex);
  }


  void Toolbox::SimplifyDicomAsJson(Json::Value& target,
                                    const Json::Value& source,
                                    DicomToJsonFormat format)
  {
    if (!source.isObject())
    {
      throw OrthancException(ErrorCode_BadFileFormat);
    }

    target = Json::objectValue;
    Json::Value::Members members = source.getMemberNames();

    for (size_t i = 0; i < members.size(); i++)
    {
      const Json::Value& v = source[members[i]];
      const std::string& type = v["Type"].asString();

      std::string name;
      switch (format)
      {
        case DicomToJsonFormat_Human:
          name = v["Name"].asString();
          break;

        case DicomToJsonFormat_Short:
          name = members[i];
          break;

        default:
          throw OrthancException(ErrorCode_ParameterOutOfRange);
      }

      if (type == "String")
      {
        target[name] = v["Value"].asString();
      }
      else if (type == "TooLong" ||
               type == "Null" ||
               type == "Binary")
      {
        target[name] = Json::nullValue;
      }
      else if (type == "Sequence")
      {
        const Json::Value& array = v["Value"];
        assert(array.isArray());

        Json::Value children = Json::arrayValue;
        for (Json::Value::ArrayIndex j = 0; j < array.size(); j++)
        {
          Json::Value c;
          SimplifyDicomAsJson(c, array[j], format);
          children.append(c);
        }

        target[name] = children;
      }
      else
      {
        assert(0);
      }
    }
  }


  static bool ReadJsonInternal(Json::Value& target,
                               const void* buffer,
                               size_t size,
                               bool collectComments)
  {
#if JSONCPP_USE_DEPRECATED == 1
    Json::Reader reader;
    return reader.parse(reinterpret_cast<const char*>(buffer),
                        reinterpret_cast<const char*>(buffer) + size, target, collectComments);
#else
    Json::CharReaderBuilder builder;
    builder.settings_["collectComments"] = collectComments;
    
    const std::unique_ptr<Json::CharReader> reader(builder.newCharReader());
    assert(reader.get() != NULL);
    
    JSONCPP_STRING err;
    if (reader->parse(reinterpret_cast<const char*>(buffer),
                      reinterpret_cast<const char*>(buffer) + size, &target, &err))
    {
      return true;
    }
    else
    {
      LOG(ERROR) << "Cannot parse JSON: " << err;
      return false;
    }
#endif
  }


  bool Toolbox::ReadJson(Json::Value& target,
                         const std::string& source)
  {
    return ReadJson(target, source.empty() ? NULL : source.c_str(), source.size());
  }
  

  bool Toolbox::ReadJson(Json::Value& target,
                         const void* buffer,
                         size_t size)
  {
    return ReadJsonInternal(target, buffer, size, true);
  }
  

  bool Toolbox::ReadJsonWithoutComments(Json::Value& target,
                                        const std::string& source)
  {
    return ReadJsonWithoutComments(target, source.empty() ? NULL : source.c_str(), source.size());
  }
  

  bool Toolbox::ReadJsonWithoutComments(Json::Value& target,
                                        const void* buffer,
                                        size_t size)
  {
    return ReadJsonInternal(target, buffer, size, false);
  }
  

  void Toolbox::WriteFastJson(std::string& target,
                              const Json::Value& source)
  {
#if JSONCPP_USE_DEPRECATED == 1
    Json::FastWriter writer;
    target = writer.write(source);
#else
    Json::StreamWriterBuilder builder;
    builder.settings_["indentation"] = "";
    target = Json::writeString(builder, source);
#endif
  }
  

  void Toolbox::WriteStyledJson(std::string& target,
                                const Json::Value& source)
  {
#if JSONCPP_USE_DEPRECATED == 1
    Json::StyledWriter writer;
    target = writer.write(source);
#else
    Json::StreamWriterBuilder builder;
    builder.settings_["indentation"] = "   ";
    target = Json::writeString(builder, source);
#endif
  }


  void Toolbox::RemoveSurroundingQuotes(std::string& value)
  {
    if (!value.empty() &&
        value[0] == '\"' &&
        value[value.size() - 1] == '\"')
    {
      value = value.substr(1, value.size() - 2);
    }
  }

  Toolbox::ElapsedTimer::ElapsedTimer()
  {
    Restart();
  }

  void Toolbox::ElapsedTimer::Restart()
  {
    start_ = boost::posix_time::microsec_clock::universal_time();
  }

  uint64_t Toolbox::ElapsedTimer::GetElapsedMilliseconds()
  {
    return GetElapsedNanoseconds() / 1000000;
  }
  
  uint64_t Toolbox::ElapsedTimer::GetElapsedMicroseconds()
  {
    return GetElapsedNanoseconds() / 1000;
  }

  uint64_t Toolbox::ElapsedTimer::GetElapsedNanoseconds()
  {
    boost::posix_time::ptime now = boost::posix_time::microsec_clock::universal_time();
    boost::posix_time::time_duration diff = now - start_;
    return static_cast<uint64_t>(diff.total_nanoseconds());
  }

  std::string Toolbox::ElapsedTimer::GetHumanElapsedDuration()
  {
    return Toolbox::GetHumanDuration(GetElapsedNanoseconds());
  }

  // in "full" mode, returns " 26.45MB in 2.25s = 94.04Mbps"
  // else, returns "94.04Mbps"
  std::string Toolbox::ElapsedTimer::GetHumanTransferSpeed(bool full, uint64_t sizeInBytes)
  {
    return Toolbox::GetHumanTransferSpeed(full, sizeInBytes, GetElapsedNanoseconds());
  }

  Toolbox::ElapsedTimeLogger::ElapsedTimeLogger(const std::string& message)
  : message_(message),
    logged_(false)
  {
    Restart();
  }

  Toolbox::ElapsedTimeLogger::~ElapsedTimeLogger()
  {
    if (!logged_)
    {
      StopAndLog();
    }
  }

  void Toolbox::ElapsedTimeLogger::Restart()
  {
    timer_.Restart();
  }

  void Toolbox::ElapsedTimeLogger::StopAndLog()
  {
    LOG(WARNING) << "ELAPSED TIMER: " << message_ << " (" << timer_.GetElapsedMicroseconds() << " us)";
    logged_ = true;
  }

  std::string Toolbox::GetHumanFileSize(uint64_t sizeInBytes)
  {
    if (sizeInBytes < 1024)
    {
      std::ostringstream oss;
      oss << sizeInBytes << "bytes";
      return oss.str();
    }
    else
    {
      static const char* suffixes[] = {"KB", "MB", "GB", "TB"};
      static const int suffixesCount = sizeof(suffixes) / sizeof(suffixes[0]);

      int i = 0;
      double size = static_cast<double>(sizeInBytes)/1024.0;

      while (size >= 1024.0 && i < suffixesCount - 1) 
      {
        size /= 1024.0;
        i++;
      }

      std::ostringstream oss;
      oss << std::fixed << std::setprecision(2) << size << suffixes[i];
      return oss.str();
    }
  }

  std::string Toolbox::GetHumanDuration(uint64_t durationInNanoseconds)
  {
    if (durationInNanoseconds < 1024)
    {
      std::ostringstream oss;
      oss << durationInNanoseconds << "ns";
      return oss.str();
    }
    else
    {
      static const char* suffixes[] = {"ns", "us", "ms", "s"};
      static const int suffixesCount = sizeof(suffixes) / sizeof(suffixes[0]);

      int i = 0;
      double duration = static_cast<double>(durationInNanoseconds);

      while (duration >= 1000.0 && i < suffixesCount - 1) 
      {
        duration /= 1000.0;
        i++;
      }

      std::ostringstream oss;
      oss << std::fixed << std::setprecision(2) << duration <<  suffixes[i];
      return oss.str();
    }
  }

  std::string Toolbox::GetHumanTransferSpeed(bool full, uint64_t sizeInBytes, uint64_t durationInNanoseconds)
  {
    // in "full" mode, returns " 26.45MB in 2.25s = 94.04Mbps"    
    // else, return "94.04Mbps"

    if (full)
    {
      std::ostringstream oss;
      oss << Toolbox::GetHumanFileSize(sizeInBytes) << " in " << Toolbox::GetHumanDuration(durationInNanoseconds) << " = " << GetHumanTransferSpeed(false, sizeInBytes, durationInNanoseconds);
      return oss.str();
    }

    double throughputInBps = 8.0 * 1000000000.0 * static_cast<double>(sizeInBytes) / static_cast<double>(durationInNanoseconds);

    if (throughputInBps < 1000.0)
    {
      std::ostringstream oss;
      oss << throughputInBps << "bps";
      return oss.str();
    }
    else
    {
      throughputInBps /= 1000.0;
      static const char* suffixes[] = {"kbps", "Mbps", "Gbps"};
      static const int suffixesCount = sizeof(suffixes) / sizeof(suffixes[0]);

      int i = 0;

      while (throughputInBps >= 1000.0 && i < suffixesCount - 1) 
      {
        throughputInBps /= 1000.0;
        i++;
      }

      std::ostringstream oss;
      oss << std::fixed << std::setprecision(2) << throughputInBps <<  suffixes[i];
      return oss.str();
    }
  }


  bool Toolbox::ParseVersion(unsigned int& major,
                             unsigned int& minor,
                             unsigned int& revision,
                             const char* version)
  {
    if (version == NULL)
    {
      throw OrthancException(ErrorCode_NullPointer);
    }

#ifdef _MSC_VER
#define ORTHANC_SCANF sscanf_s
#else
#define ORTHANC_SCANF sscanf
#endif

    int a, b, c;
    if (ORTHANC_SCANF(version, "%4d.%4d.%4d", &a, &b, &c) == 3)
    {
      if (a >= 0 &&
          b >= 0 &&
          c >= 0)
      {
        major = static_cast<unsigned int>(a);
        minor = static_cast<unsigned int>(b);
        revision = static_cast<unsigned int>(c);
        return true;
      }
      else
      {
        return false;
      }
    }
    else if (ORTHANC_SCANF(version, "%4d.%4d", &a, &b) == 2)
    {
      if (a >= 0 &&
          b >= 0)
      {
        major = static_cast<unsigned int>(a);
        minor = static_cast<unsigned int>(b);
        revision = 0;
        return true;
      }
      else
      {
        return false;
      }
    }
    else if (ORTHANC_SCANF(version, "%4d", &a) == 1 &&
             a >= 0)
    {
      if (a >= 0)
      {
        major = static_cast<unsigned int>(a);
        minor = 0;
        revision = 0;
        return true;
      }
      else
      {
        return false;
      }
    }
    else
    {
      return false;
    }
  }


  bool Toolbox::IsVersionAbove(const char* version,
                               unsigned int major,
                               unsigned int minor,
                               unsigned int revision)
  {
    /**
     * Note: Similar standalone functions are implemented in
     * "OrthancCPlugin.h" and "OrthancPluginCppWrapper.cpp".
     **/

    unsigned int actualMajor, actualMinor, actualRevision;

    if (version == NULL)
    {
      throw OrthancException(ErrorCode_NullPointer);
    }
    else if (!strcmp(version, "mainline"))
    {
      // Assume compatibility with the mainline
      return true;
    }
    else if (ParseVersion(actualMajor, actualMinor, actualRevision, version))
    {
      if (actualMajor > major)
      {
        return true;
      }

      if (actualMajor < major)
      {
        return false;
      }

      // Check the minor version number
      assert(actualMajor == major);

      if (actualMinor > minor)
      {
        return true;
      }

      if (actualMinor < minor)
      {
        return false;
      }

      // Check the patch level version number
      assert(actualMajor == major);

      if (actualRevision >= revision)
      {
        return true;
      }
      else
      {
        return false;
      }
    }
    else
    {
      throw OrthancException(ErrorCode_ParameterOutOfRange, "Not a valid version: " + std::string(version));
    }
  }
}



OrthancLinesIterator* OrthancLinesIterator_Create(const std::string& content)
{
  return reinterpret_cast<OrthancLinesIterator*>(new Orthanc::Toolbox::LinesIterator(content));
}


bool OrthancLinesIterator_GetLine(std::string& target,
                                  const OrthancLinesIterator* iterator)
{
  if (iterator != NULL)
  {
    return reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator)->GetLine(target);
  }
  else
  {
    return false;
  }
}


void OrthancLinesIterator_Next(OrthancLinesIterator* iterator)
{
  if (iterator != NULL)
  {
    reinterpret_cast<Orthanc::Toolbox::LinesIterator*>(iterator)->Next();
  }
}


void OrthancLinesIterator_Free(OrthancLinesIterator* iterator)
{
  if (iterator != NULL)
  {
    delete reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator);
  }
}