Mercurial > hg > orthanc

/**
 * Orthanc - A Lightweight, RESTful DICOM Store
 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
 * Department, University Hospital of Liege, Belgium
 * Copyright (C) 2017-2019 Osimis S.A., Belgium
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * In addition, as a special exception, the copyright holders of this
 * program give permission to link the code of its release with the
 * OpenSSL project's "OpenSSL" library (or with modified versions of it
 * that use the same license as the "OpenSSL" library), and distribute
 * the linked executables. You must obey the GNU General Public License
 * in all respects for all of the code used other than "OpenSSL". If you
 * modify file(s) with this exception, you may extend this exception to
 * your version of the file(s), but you are not obligated to do so. If
 * you do not wish to do so, delete this exception statement from your
 * version. If you delete this exception statement from all source files
 * in the program, then also delete it here.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 **/


#include "PrecompiledHeaders.h"
#include "Toolbox.h"

#include "OrthancException.h"
#include "Logging.h"

#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/regex.hpp>

#if BOOST_VERSION >= 106600
#  include <boost/uuid/detail/sha1.hpp>
#else
#  include <boost/uuid/sha1.hpp>
#endif

#include <string>
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <ctype.h>


#if ORTHANC_ENABLE_MD5 == 1
// TODO - Could be replaced by <boost/uuid/detail/md5.hpp> starting
// with Boost >= 1.66.0
#  include "../Resources/ThirdParty/md5/md5.h"
#endif

#if ORTHANC_ENABLE_BASE64 == 1
#  include "../Resources/ThirdParty/base64/base64.h"
#endif

#if ORTHANC_ENABLE_LOCALE == 1
#  include <boost/locale.hpp>
#endif

#if ORTHANC_ENABLE_SSL == 1
// For OpenSSL initialization and finalization
#  include <openssl/conf.h>
#  include <openssl/engine.h>
#  include <openssl/err.h>
#  include <openssl/evp.h>
#  include <openssl/ssl.h>
#endif


#if defined(_MSC_VER) && (_MSC_VER < 1800)
// Patch for the missing "_strtoll" symbol when compiling with Visual Studio < 2013
extern "C"
{
  int64_t _strtoi64(const char *nptr, char **endptr, int base);
  int64_t strtoll(const char *nptr, char **endptr, int base)
  {
    return _strtoi64(nptr, endptr, base);
  }
}
#endif


#if defined(_WIN32)
#  include <windows.h>   // For ::Sleep
#endif


#if ORTHANC_ENABLE_PUGIXML == 1
#  include "ChunkedBuffer.h"
#endif


// Inclusions for UUID
// http://stackoverflow.com/a/1626302

extern "C"
{
#if defined(_WIN32)
#  include <rpc.h>
#else
#  include <uuid/uuid.h>
#endif
}


namespace Orthanc
{
  void Toolbox::LinesIterator::FindEndOfLine()
  {
    lineEnd_ = lineStart_;

    while (lineEnd_ < content_.size() &&
           content_[lineEnd_] != '\n' &&
           content_[lineEnd_] != '\r')
    {
      lineEnd_ += 1;
    }
  }


  Toolbox::LinesIterator::LinesIterator(const std::string& content) :
    content_(content),
    lineStart_(0)
  {
    FindEndOfLine();
  }


  bool Toolbox::LinesIterator::GetLine(std::string& target) const
  {
    assert(lineStart_ <= content_.size() &&
           lineEnd_ <= content_.size() &&
           lineStart_ <= lineEnd_);

    if (lineStart_ == content_.size())
    {
      return false;
    }
    else
    {
      target = content_.substr(lineStart_, lineEnd_ - lineStart_);
      return true;
    }
  }


  void Toolbox::LinesIterator::Next()
  {
    lineStart_ = lineEnd_;

    if (lineStart_ != content_.size())
    {
      assert(content_[lineStart_] == '\r' ||
             content_[lineStart_] == '\n');

      char second;

      if (content_[lineStart_] == '\r')
      {
        second = '\n';
      }
      else
      {
        second = '\r';
      }

      lineStart_ += 1;

      if (lineStart_ < content_.size() &&
          content_[lineStart_] == second)
      {
        lineStart_ += 1;
      }

      FindEndOfLine();
    }
  }


  void Toolbox::ToUpperCase(std::string& s)
  {
    std::transform(s.begin(), s.end(), s.begin(), toupper);
  }


  void Toolbox::ToLowerCase(std::string& s)
  {
    std::transform(s.begin(), s.end(), s.begin(), tolower);
  }


  void Toolbox::ToUpperCase(std::string& result,
                            const std::string& source)
  {
    result = source;
    ToUpperCase(result);
  }

  void Toolbox::ToLowerCase(std::string& result,
                            const std::string& source)
  {
    result = source;
    ToLowerCase(result);
  }


  void Toolbox::SplitUriComponents(UriComponents& components,
                                   const std::string& uri)
  {
    static const char URI_SEPARATOR = '/';

    components.clear();

    if (uri.size() == 0 ||
        uri[0] != URI_SEPARATOR)
    {
      throw OrthancException(ErrorCode_UriSyntax);
    }

    // Count the number of slashes in the URI to make an assumption
    // about the number of components in the URI
    unsigned int estimatedSize = 0;
    for (unsigned int i = 0; i < uri.size(); i++)
    {
      if (uri[i] == URI_SEPARATOR)
        estimatedSize++;
    }

    components.reserve(estimatedSize - 1);

    unsigned int start = 1;
    unsigned int end = 1;
    while (end < uri.size())
    {
      // This is the loop invariant
      assert(uri[start - 1] == '/' && (end >= start));

      if (uri[end] == '/')
      {
        components.push_back(std::string(&uri[start], end - start));
        end++;
        start = end;
      }
      else
      {
        end++;
      }
    }

    if (start < uri.size())
    {
      components.push_back(std::string(&uri[start], end - start));
    }

    for (size_t i = 0; i < components.size(); i++)
    {
      if (components[i].size() == 0)
      {
        // Empty component, as in: "/coucou//e"
        throw OrthancException(ErrorCode_UriSyntax);
      }
    }
  }


  void Toolbox::TruncateUri(UriComponents& target,
                            const UriComponents& source,
                            size_t fromLevel)
  {
    target.clear();

    if (source.size() > fromLevel)
    {
      target.resize(source.size() - fromLevel);

      size_t j = 0;
      for (size_t i = fromLevel; i < source.size(); i++, j++)
      {
        target[j] = source[i];
      }

      assert(j == target.size());
    }
  }


  bool Toolbox::IsChildUri(const UriComponents& baseUri,
                           const UriComponents& testedUri)
  {
    if (testedUri.size() < baseUri.size())
    {
      return false;
    }

    for (size_t i = 0; i < baseUri.size(); i++)
    {
      if (baseUri[i] != testedUri[i])
        return false;
    }

    return true;
  }


  std::string Toolbox::FlattenUri(const UriComponents& components,
                                  size_t fromLevel)
  {
    if (components.size() <= fromLevel)
    {
      return "/";
    }
    else
    {
      std::string r;

      for (size_t i = fromLevel; i < components.size(); i++)
      {
        r += "/" + components[i];
      }

      return r;
    }
  }


#if ORTHANC_ENABLE_MD5 == 1
  static char GetHexadecimalCharacter(uint8_t value)
  {
    assert(value < 16);

    if (value < 10)
    {
      return value + '0';
    }
    else
    {
      return (value - 10) + 'a';
    }
  }


  void Toolbox::ComputeMD5(std::string& result,
                           const std::string& data)
  {
    if (data.size() > 0)
    {
      ComputeMD5(result, &data[0], data.size());
    }
    else
    {
      ComputeMD5(result, NULL, 0);
    }
  }


  void Toolbox::ComputeMD5(std::string& result,
                           const void* data,
                           size_t size)
  {
    md5_state_s state;
    md5_init(&state);

    if (size > 0)
    {
      md5_append(&state,
                 reinterpret_cast<const md5_byte_t*>(data),
                 static_cast<int>(size));
    }

    md5_byte_t actualHash[16];
    md5_finish(&state, actualHash);

    result.resize(32);
    for (unsigned int i = 0; i < 16; i++)
    {
      result[2 * i] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] / 16));
      result[2 * i + 1] = GetHexadecimalCharacter(static_cast<uint8_t>(actualHash[i] % 16));
    }
  }
#endif


#if ORTHANC_ENABLE_BASE64 == 1
  void Toolbox::EncodeBase64(std::string& result,
                             const std::string& data)
  {
    result.clear();
    base64_encode(result, data);
  }

  void Toolbox::DecodeBase64(std::string& result,
                             const std::string& data)
  {
    for (size_t i = 0; i < data.length(); i++)
    {
      if (!isalnum(data[i]) &&
          data[i] != '+' &&
          data[i] != '/' &&
          data[i] != '=')
      {
        // This is not a valid character for a Base64 string
        throw OrthancException(ErrorCode_BadFileFormat);
      }
    }

    result.clear();
    base64_decode(result, data);
  }


  bool Toolbox::DecodeDataUriScheme(std::string& mime,
                                    std::string& content,
                                    const std::string& source)
  {
    boost::regex pattern("data:([^;]+);base64,([a-zA-Z0-9=+/]*)",
                         boost::regex::icase /* case insensitive search */);

    boost::cmatch what;
    if (regex_match(source.c_str(), what, pattern))
    {
      mime = what[1];
      DecodeBase64(content, what[2]);
      return true;
    }
    else
    {
      return false;
    }
  }


  void Toolbox::EncodeDataUriScheme(std::string& result,
                                    const std::string& mime,
                                    const std::string& content)
  {
    result = "data:" + mime + ";base64,";
    base64_encode(result, content);
  }

#endif


#if ORTHANC_ENABLE_LOCALE == 1
  static const char* GetBoostLocaleEncoding(const Encoding sourceEncoding)
  {
    switch (sourceEncoding)
    {
      case Encoding_Utf8:
        return "UTF-8";

      case Encoding_Ascii:
        return "ASCII";

      case Encoding_Latin1:
        return "ISO-8859-1";

      case Encoding_Latin2:
        return "ISO-8859-2";

      case Encoding_Latin3:
        return "ISO-8859-3";

      case Encoding_Latin4:
        return "ISO-8859-4";

      case Encoding_Latin5:
        return "ISO-8859-9";

      case Encoding_Cyrillic:
        return "ISO-8859-5";

      case Encoding_Windows1251:
        return "WINDOWS-1251";

      case Encoding_Arabic:
        return "ISO-8859-6";

      case Encoding_Greek:
        return "ISO-8859-7";

      case Encoding_Hebrew:
        return "ISO-8859-8";

      case Encoding_Japanese:
        return "SHIFT-JIS";

      case Encoding_Chinese:
        return "GB18030";

      case Encoding_Thai:
#if BOOST_LOCALE_WITH_ICU == 1
        return "tis620.2533";
#else
        return "TIS620.2533-0";
#endif

      case Encoding_Korean:
        return "ISO-IR-149";

      case Encoding_JapaneseKanji:
        return "JIS";

      case Encoding_SimplifiedChinese:
        return "GB2312";

      default:
        throw OrthancException(ErrorCode_NotImplemented);
    }
  }
#endif


#if ORTHANC_ENABLE_LOCALE == 1
  // http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.12.html#sect_C.12.1.1.2
  std::string Toolbox::ConvertToUtf8(const std::string& source,
                                     Encoding sourceEncoding,
                                     bool hasCodeExtensions)
  {
    // The "::skip" flag makes boost skip invalid UTF-8
    // characters. This can occur in badly-encoded DICOM files.

    try
    {
      if (sourceEncoding == Encoding_Ascii)
      {
        return ConvertToAscii(source);
      }
      else
      {
        std::string s;

        if (sourceEncoding == Encoding_Utf8)
        {
          // Already in UTF-8: No conversion is required, but we ensure
          // the output is correctly encoded
          s = boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
        }
        else
        {
          const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
          s = boost::locale::conv::to_utf<char>(source, encoding, boost::locale::conv::skip);
        }

        if (hasCodeExtensions)
        {
          std::string t;
          RemoveIso2022EscapeSequences(t, s);
          return t;
        }
        else
        {
          return s;
        }
      }
    }
    catch (std::runtime_error& e)
    {
      // Bad input string or bad encoding
      LOG(INFO) << e.what();
      return ConvertToAscii(source);
    }
  }
#endif


#if ORTHANC_ENABLE_LOCALE == 1
  std::string Toolbox::ConvertFromUtf8(const std::string& source,
                                       Encoding targetEncoding)
  {
    // The "::skip" flag makes boost skip invalid UTF-8
    // characters. This can occur in badly-encoded DICOM files.

    try
    {
      if (targetEncoding == Encoding_Utf8)
      {
        // Already in UTF-8: No conversion is required.
        return boost::locale::conv::utf_to_utf<char>(source, boost::locale::conv::skip);
      }
      else if (targetEncoding == Encoding_Ascii)
      {
        return ConvertToAscii(source);
      }
      else
      {
        const char* encoding = GetBoostLocaleEncoding(targetEncoding);
        return boost::locale::conv::from_utf<char>(source, encoding, boost::locale::conv::skip);
      }
    }
    catch (std::runtime_error&)
    {
      // Bad input string or bad encoding
      return ConvertToAscii(source);
    }
  }
#endif


  static bool IsAsciiCharacter(uint8_t c)
  {
    return (c != 0 &&
            c <= 127 &&
            (c == '\n' || !iscntrl(c)));
  }


  bool Toolbox::IsAsciiString(const void* data,
                              size_t size)
  {
    const uint8_t* p = reinterpret_cast<const uint8_t*>(data);

    for (size_t i = 0; i < size; i++, p++)
    {
      if (!IsAsciiCharacter(*p))
      {
        return false;
      }
    }

    return true;
  }


  bool Toolbox::IsAsciiString(const std::string& s)
  {
    return IsAsciiString(s.c_str(), s.size());
  }


  std::string Toolbox::ConvertToAscii(const std::string& source)
  {
    std::string result;

    result.reserve(source.size() + 1);
    for (size_t i = 0; i < source.size(); i++)
    {
      if (IsAsciiCharacter(source[i]))
      {
        result.push_back(source[i]);
      }
    }

    return result;
  }


  void Toolbox::ComputeSHA1(std::string& result,
                            const void* data,
                            size_t size)
  {
    boost::uuids::detail::sha1 sha1;

    if (size > 0)
    {
      sha1.process_bytes(data, size);
    }

    unsigned int digest[5];

    // Sanity check for the memory layout: A SHA-1 digest is 160 bits wide
    assert(sizeof(unsigned int) == 4 && sizeof(digest) == (160 / 8));

    sha1.get_digest(digest);

    result.resize(8 * 5 + 4);
    sprintf(&result[0], "%08x-%08x-%08x-%08x-%08x",
            digest[0],
            digest[1],
            digest[2],
            digest[3],
            digest[4]);
  }

  void Toolbox::ComputeSHA1(std::string& result,
                            const std::string& data)
  {
    if (data.size() > 0)
    {
      ComputeSHA1(result, data.c_str(), data.size());
    }
    else
    {
      ComputeSHA1(result, NULL, 0);
    }
  }


  bool Toolbox::IsSHA1(const char* str,
                       size_t size)
  {
    if (size == 0)
    {
      return false;
    }

    const char* start = str;
    const char* end = str + size;

    // Trim the beginning of the string
    while (start < end)
    {
      if (*start == '\0' ||
          isspace(*start))
      {
        start++;
      }
      else
      {
        break;
      }
    }

    // Trim the trailing of the string
    while (start < end)
    {
      if (*(end - 1) == '\0' ||
          isspace(*(end - 1)))
      {
        end--;
      }
      else
      {
        break;
      }
    }

    if (end - start != 44)
    {
      return false;
    }

    for (unsigned int i = 0; i < 44; i++)
    {
      if (i == 8 ||
          i == 17 ||
          i == 26 ||
          i == 35)
      {
        if (start[i] != '-')
          return false;
      }
      else
      {
        if (!isalnum(start[i]))
          return false;
      }
    }

    return true;
  }


  bool Toolbox::IsSHA1(const std::string& s)
  {
    if (s.size() == 0)
    {
      return false;
    }
    else
    {
      return IsSHA1(s.c_str(), s.size());
    }
  }


  std::string Toolbox::StripSpaces(const std::string& source)
  {
    size_t first = 0;

    while (first < source.length() &&
           isspace(source[first]))
    {
      first++;
    }

    if (first == source.length())
    {
      // String containing only spaces
      return "";
    }

    size_t last = source.length();
    while (last > first &&
           isspace(source[last - 1]))
    {
      last--;
    }

    assert(first <= last);
    return source.substr(first, last - first);
  }


  static char Hex2Dec(char c)
  {
    return ((c >= '0' && c <= '9') ? c - '0' :
            ((c >= 'a' && c <= 'f') ? c - 'a' + 10 : c - 'A' + 10));
  }

  void Toolbox::UrlDecode(std::string& s)
  {
    // http://en.wikipedia.org/wiki/Percent-encoding
    // http://www.w3schools.com/tags/ref_urlencode.asp
    // http://stackoverflow.com/questions/154536/encode-decode-urls-in-c

    if (s.size() == 0)
    {
      return;
    }

    size_t source = 0;
    size_t target = 0;

    while (source < s.size())
    {
      if (s[source] == '%' &&
          source + 2 < s.size() &&
          isalnum(s[source + 1]) &&
          isalnum(s[source + 2]))
      {
        s[target] = (Hex2Dec(s[source + 1]) << 4) | Hex2Dec(s[source + 2]);
        source += 3;
        target += 1;
      }
      else
      {
        if (s[source] == '+')
          s[target] = ' ';
        else
          s[target] = s[source];

        source++;
        target++;
      }
    }

    s.resize(target);
  }


  Endianness Toolbox::DetectEndianness()
  {
    // http://sourceforge.net/p/predef/wiki/Endianness/

    uint8_t buffer[4];

    buffer[0] = 0x00;
    buffer[1] = 0x01;
    buffer[2] = 0x02;
    buffer[3] = 0x03;

    switch (*((uint32_t *)buffer))
    {
      case 0x00010203:
        return Endianness_Big;

      case 0x03020100:
        return Endianness_Little;

      default:
        throw OrthancException(ErrorCode_NotImplemented);
    }
  }


  std::string Toolbox::WildcardToRegularExpression(const std::string& source)
  {
    // TODO - Speed up this with a regular expression

    std::string result = source;

    // Escape all special characters
    boost::replace_all(result, "\\", "\\\\");
    boost::replace_all(result, "^", "\\^");
    boost::replace_all(result, ".", "\\.");
    boost::replace_all(result, "$", "\\$");
    boost::replace_all(result, "|", "\\|");
    boost::replace_all(result, "(", "\\(");
    boost::replace_all(result, ")", "\\)");
    boost::replace_all(result, "[", "\\[");
    boost::replace_all(result, "]", "\\]");
    boost::replace_all(result, "+", "\\+");
    boost::replace_all(result, "/", "\\/");
    boost::replace_all(result, "{", "\\{");
    boost::replace_all(result, "}", "\\}");

    // Convert wildcards '*' and '?' to their regex equivalents
    boost::replace_all(result, "?", ".");
    boost::replace_all(result, "*", ".*");

    return result;
  }


  void Toolbox::TokenizeString(std::vector<std::string>& result,
                               const std::string& value,
                               char separator)
  {
    result.clear();

    std::string currentItem;

    for (size_t i = 0; i < value.size(); i++)
    {
      if (value[i] == separator)
      {
        result.push_back(currentItem);
        currentItem.clear();
      }
      else
      {
        currentItem.push_back(value[i]);
      }
    }

    result.push_back(currentItem);
  }


#if ORTHANC_ENABLE_PUGIXML == 1
  class ChunkedBufferWriter : public pugi::xml_writer
  {
  private:
    ChunkedBuffer buffer_;

  public:
    virtual void write(const void *data, size_t size)
    {
      if (size > 0)
      {
        buffer_.AddChunk(reinterpret_cast<const char*>(data), size);
      }
    }

    void Flatten(std::string& s)
    {
      buffer_.Flatten(s);
    }
  };


  static void JsonToXmlInternal(pugi::xml_node& target,
                                const Json::Value& source,
                                const std::string& arrayElement)
  {
    // http://jsoncpp.sourceforge.net/value_8h_source.html#l00030

    switch (source.type())
    {
      case Json::nullValue:
      {
        target.append_child(pugi::node_pcdata).set_value("null");
        break;
      }

      case Json::intValue:
      {
        std::string s = boost::lexical_cast<std::string>(source.asInt());
        target.append_child(pugi::node_pcdata).set_value(s.c_str());
        break;
      }

      case Json::uintValue:
      {
        std::string s = boost::lexical_cast<std::string>(source.asUInt());
        target.append_child(pugi::node_pcdata).set_value(s.c_str());
        break;
      }

      case Json::realValue:
      {
        std::string s = boost::lexical_cast<std::string>(source.asFloat());
        target.append_child(pugi::node_pcdata).set_value(s.c_str());
        break;
      }

      case Json::stringValue:
      {
        target.append_child(pugi::node_pcdata).set_value(source.asString().c_str());
        break;
      }

      case Json::booleanValue:
      {
        target.append_child(pugi::node_pcdata).set_value(source.asBool() ? "true" : "false");
        break;
      }

      case Json::arrayValue:
      {
        for (Json::Value::ArrayIndex i = 0; i < source.size(); i++)
        {
          pugi::xml_node node = target.append_child();
          node.set_name(arrayElement.c_str());
          JsonToXmlInternal(node, source[i], arrayElement);
        }
        break;
      }

      case Json::objectValue:
      {
        Json::Value::Members members = source.getMemberNames();

        for (size_t i = 0; i < members.size(); i++)
        {
          pugi::xml_node node = target.append_child();
          node.set_name(members[i].c_str());
          JsonToXmlInternal(node, source[members[i]], arrayElement);
        }

        break;
      }

      default:
        throw OrthancException(ErrorCode_NotImplemented);
    }
  }


  void Toolbox::JsonToXml(std::string& target,
                          const Json::Value& source,
                          const std::string& rootElement,
                          const std::string& arrayElement)
  {
    pugi::xml_document doc;

    pugi::xml_node n = doc.append_child(rootElement.c_str());
    JsonToXmlInternal(n, source, arrayElement);

    pugi::xml_node decl = doc.prepend_child(pugi::node_declaration);
    decl.append_attribute("version").set_value("1.0");
    decl.append_attribute("encoding").set_value("utf-8");

    XmlToString(target, doc);
  }

  void Toolbox::XmlToString(std::string& target,
                            const pugi::xml_document& source)
  {
    ChunkedBufferWriter writer;
    source.save(writer, "  ", pugi::format_default, pugi::encoding_utf8);
    writer.Flatten(target);
  }
#endif


  bool Toolbox::IsInteger(const std::string& str)
  {
    std::string s = StripSpaces(str);

    if (s.size() == 0)
    {
      return false;
    }

    size_t pos = 0;
    if (s[0] == '-')
    {
      if (s.size() == 1)
      {
        return false;
      }

      pos = 1;
    }

    while (pos < s.size())
    {
      if (!isdigit(s[pos]))
      {
        return false;
      }

      pos++;
    }

    return true;
  }


  void Toolbox::CopyJsonWithoutComments(Json::Value& target,
                                        const Json::Value& source)
  {
    switch (source.type())
    {
      case Json::nullValue:
        target = Json::nullValue;
        break;

      case Json::intValue:
        target = source.asInt64();
        break;

      case Json::uintValue:
        target = source.asUInt64();
        break;

      case Json::realValue:
        target = source.asDouble();
        break;

      case Json::stringValue:
        target = source.asString();
        break;

      case Json::booleanValue:
        target = source.asBool();
        break;

      case Json::arrayValue:
      {
        target = Json::arrayValue;
        for (Json::Value::ArrayIndex i = 0; i < source.size(); i++)
        {
          Json::Value& item = target.append(Json::nullValue);
          CopyJsonWithoutComments(item, source[i]);
        }

        break;
      }

      case Json::objectValue:
      {
        target = Json::objectValue;
        Json::Value::Members members = source.getMemberNames();
        for (Json::Value::ArrayIndex i = 0; i < members.size(); i++)
        {
          const std::string item = members[i];
          CopyJsonWithoutComments(target[item], source[item]);
        }

        break;
      }

      default:
        break;
    }
  }


  bool Toolbox::StartsWith(const std::string& str,
                           const std::string& prefix)
  {
    if (str.size() < prefix.size())
    {
      return false;
    }
    else
    {
      return str.compare(0, prefix.size(), prefix) == 0;
    }
  }


  static bool IsUnreservedCharacter(char c)
  {
    // This function checks whether "c" is an unserved character
    // wrt. an URI percent-encoding
    // https://en.wikipedia.org/wiki/Percent-encoding#Percent-encoding%5Fin%5Fa%5FURI

    return ((c >= 'A' && c <= 'Z') ||
            (c >= 'a' && c <= 'z') ||
            (c >= '0' && c <= '9') ||
            c == '-' ||
            c == '_' ||
            c == '.' ||
            c == '~');
  }

  void Toolbox::UriEncode(std::string& target,
                          const std::string& source)
  {
    // Estimate the length of the percent-encoded URI
    size_t length = 0;

    for (size_t i = 0; i < source.size(); i++)
    {
      if (IsUnreservedCharacter(source[i]))
      {
        length += 1;
      }
      else
      {
        // This character must be percent-encoded
        length += 3;
      }
    }

    target.clear();
    target.reserve(length);

    for (size_t i = 0; i < source.size(); i++)
    {
      if (IsUnreservedCharacter(source[i]))
      {
        target.push_back(source[i]);
      }
      else
      {
        // This character must be percent-encoded
        uint8_t byte = static_cast<uint8_t>(source[i]);
        uint8_t a = byte >> 4;
        uint8_t b = byte & 0x0f;

        target.push_back('%');
        target.push_back(a < 10 ? a + '0' : a - 10 + 'A');
        target.push_back(b < 10 ? b + '0' : b - 10 + 'A');
      }
    }
  }


  static bool HasField(const Json::Value& json,
                       const std::string& key,
                       Json::ValueType expectedType)
  {
    if (json.type() != Json::objectValue ||
        !json.isMember(key))
    {
      return false;
    }
    else if (json[key].type() == expectedType)
    {
      return true;
    }
    else
    {
      throw OrthancException(ErrorCode_BadParameterType);
    }
  }


  std::string Toolbox::GetJsonStringField(const Json::Value& json,
                                          const std::string& key,
                                          const std::string& defaultValue)
  {
    if (HasField(json, key, Json::stringValue))
    {
      return json[key].asString();
    }
    else
    {
      return defaultValue;
    }
  }


  bool Toolbox::GetJsonBooleanField(const ::Json::Value& json,
                                    const std::string& key,
                                    bool defaultValue)
  {
    if (HasField(json, key, Json::booleanValue))
    {
      return json[key].asBool();
    }
    else
    {
      return defaultValue;
    }
  }


  int Toolbox::GetJsonIntegerField(const ::Json::Value& json,
                                   const std::string& key,
                                   int defaultValue)
  {
    if (HasField(json, key, Json::intValue))
    {
      return json[key].asInt();
    }
    else
    {
      return defaultValue;
    }
  }


  unsigned int Toolbox::GetJsonUnsignedIntegerField(const ::Json::Value& json,
                                                    const std::string& key,
                                                    unsigned int defaultValue)
  {
    int v = GetJsonIntegerField(json, key, defaultValue);

    if (v < 0)
    {
      throw OrthancException(ErrorCode_ParameterOutOfRange);
    }
    else
    {
      return static_cast<unsigned int>(v);
    }
  }


  bool Toolbox::IsUuid(const std::string& str)
  {
    if (str.size() != 36)
    {
      return false;
    }

    for (size_t i = 0; i < str.length(); i++)
    {
      if (i == 8 || i == 13 || i == 18 || i == 23)
      {
        if (str[i] != '-')
          return false;
      }
      else
      {
        if (!isalnum(str[i]))
          return false;
      }
    }

    return true;
  }


  bool Toolbox::StartsWithUuid(const std::string& str)
  {
    if (str.size() < 36)
    {
      return false;
    }

    if (str.size() == 36)
    {
      return IsUuid(str);
    }

    assert(str.size() > 36);
    if (!isspace(str[36]))
    {
      return false;
    }

    return IsUuid(str.substr(0, 36));
  }


#if ORTHANC_ENABLE_LOCALE == 1
  static std::auto_ptr<std::locale>  globalLocale_;

  static bool SetGlobalLocale(const char* locale)
  {
    globalLocale_.reset(NULL);

    try
    {
      if (locale == NULL)
      {
        LOG(WARNING) << "Falling back to system-wide default locale";
        globalLocale_.reset(new std::locale());
      }
      else
      {
        LOG(INFO) << "Using locale: \"" << locale << "\" for case-insensitive comparison of strings";
        globalLocale_.reset(new std::locale(locale));
      }
    }
    catch (std::runtime_error&)
    {
    }

    return (globalLocale_.get() != NULL);
  }

  void Toolbox::InitializeGlobalLocale(const char* locale)
  {
    // Make Orthanc use English, United States locale
    // Linux: use "en_US.UTF-8"
    // Windows: use ""
    // Wine: use NULL

#if defined(__MINGW32__)
    // Visibly, there is no support of locales in MinGW yet
    // http://mingw.5.n7.nabble.com/How-to-use-std-locale-global-with-MinGW-correct-td33048.html
    static const char* DEFAULT_LOCALE = NULL;
#elif defined(_WIN32)
    // For Windows: use default locale (using "en_US" does not work)
    static const char* DEFAULT_LOCALE = "";
#else
    // For Linux & cie
    static const char* DEFAULT_LOCALE = "en_US.UTF-8";
#endif

    bool ok;

    if (locale == NULL)
    {
      ok = SetGlobalLocale(DEFAULT_LOCALE);

#if defined(__MINGW32__)
      LOG(WARNING) << "This is a MinGW build, case-insensitive comparison of "
                   << "strings with accents will not work outside of Wine";
#endif
    }
    else
    {
      ok = SetGlobalLocale(locale);
    }

    if (!ok &&
        !SetGlobalLocale(NULL))
    {
      throw OrthancException(ErrorCode_InternalError,
                             "Cannot initialize global locale");
    }

  }


  void Toolbox::FinalizeGlobalLocale()
  {
    globalLocale_.reset();
  }


  std::string Toolbox::ToUpperCaseWithAccents(const std::string& source)
  {
    if (globalLocale_.get() == NULL)
    {
      throw OrthancException(ErrorCode_BadSequenceOfCalls,
                             "No global locale was set, call Toolbox::InitializeGlobalLocale()");
    }

    /**
     * A few notes about locales:
     *
     * (1) We don't use "case folding":
     * http://www.boost.org/doc/libs/1_64_0/libs/locale/doc/html/conversions.html
     *
     * Characters are made uppercase one by one. This is because, in
     * static builds, we are using iconv, which is visibly not
     * supported correctly (TODO: Understand why). Case folding seems
     * to be working correctly if using the default backend under
     * Linux (ICU or POSIX?). If one wishes to use case folding, one
     * would use:
     *
     *   boost::locale::generator gen;
     *   std::locale::global(gen(DEFAULT_LOCALE));
     *   return boost::locale::to_upper(source);
     *
     * (2) The function "boost::algorithm::to_upper_copy" does not
     * make use of the "std::locale::global()". We therefore create a
     * global variable "globalLocale_".
     *
     * (3) The variant of "boost::algorithm::to_upper_copy()" that
     * uses std::string does not work properly. We need to apply it
     * one wide strings (std::wstring). This explains the two calls to
     * "utf_to_utf" in order to convert to/from std::wstring.
     **/

    std::wstring w = boost::locale::conv::utf_to_utf<wchar_t>(source, boost::locale::conv::skip);
    w = boost::algorithm::to_upper_copy<std::wstring>(w, *globalLocale_);
    return boost::locale::conv::utf_to_utf<char>(w, boost::locale::conv::skip);
  }
#endif


  void Toolbox::InitializeOpenSsl()
  {
#if ORTHANC_ENABLE_SSL == 1
    // https://wiki.openssl.org/index.php/Library_Initialization
    SSL_library_init();
    SSL_load_error_strings();
    OpenSSL_add_all_algorithms();
    ERR_load_crypto_strings();
#endif
  }


  void Toolbox::FinalizeOpenSsl()
  {
#if ORTHANC_ENABLE_SSL == 1
    // Finalize OpenSSL
    // https://wiki.openssl.org/index.php/Library_Initialization#Cleanup
#ifdef FIPS_mode_set
    FIPS_mode_set(0);
#endif
    ENGINE_cleanup();
    CONF_modules_unload(1);
    EVP_cleanup();
    CRYPTO_cleanup_all_ex_data();
    ERR_remove_state(0);
    ERR_free_strings();
#endif
  }


  std::string Toolbox::GenerateUuid()
  {
#ifdef WIN32
    UUID uuid;
    UuidCreate ( &uuid );

    unsigned char * str;
    UuidToStringA ( &uuid, &str );

    std::string s( ( char* ) str );

    RpcStringFreeA ( &str );
#else
    uuid_t uuid;
    uuid_generate_random ( uuid );
    char s[37];
    uuid_unparse ( uuid, s );
#endif
    return s;
  }


  namespace
  {
    // Anonymous namespace to avoid clashes between compilation modules

    class VariableFormatter
    {
    public:
      typedef std::map<std::string, std::string>   Dictionary;

    private:
      const Dictionary& dictionary_;

    public:
      VariableFormatter(const Dictionary& dictionary) :
        dictionary_(dictionary)
      {
      }

      template<typename Out>
      Out operator()(const boost::smatch& what,
                     Out out) const
      {
        if (!what[1].str().empty())
        {
          // Variable without a default value
          Dictionary::const_iterator found = dictionary_.find(what[1]);

          if (found != dictionary_.end())
          {
            const std::string& value = found->second;
            out = std::copy(value.begin(), value.end(), out);
          }
        }
        else
        {
          // Variable with a default value
          std::string key;
          std::string defaultValue;

          if (!what[2].str().empty())
          {
            key = what[2].str();
            defaultValue = what[3].str();
          }
          else if (!what[4].str().empty())
          {
            key = what[4].str();
            defaultValue = what[5].str();
          }
          else if (!what[6].str().empty())
          {
            key = what[6].str();
            defaultValue = what[7].str();
          }
          else
          {
            throw OrthancException(ErrorCode_InternalError);
          }

          Dictionary::const_iterator found = dictionary_.find(key);

          if (found == dictionary_.end())
          {
            out = std::copy(defaultValue.begin(), defaultValue.end(), out);
          }
          else
          {
            const std::string& value = found->second;
            out = std::copy(value.begin(), value.end(), out);
          }
        }

        return out;
      }
    };
  }


  std::string Toolbox::SubstituteVariables(const std::string& source,
                                           const std::map<std::string, std::string>& dictionary)
  {
    const boost::regex pattern("\\$\\{([^:]*?)\\}|"                 // ${what[1]}
                               "\\$\\{([^:]*?):-([^'\"]*?)\\}|"     // ${what[2]:-what[3]}
                               "\\$\\{([^:]*?):-\"([^\"]*?)\"\\}|"  // ${what[4]:-"what[5]"}
                               "\\$\\{([^:]*?):-'([^']*?)'\\}");    // ${what[6]:-'what[7]'}

    VariableFormatter formatter(dictionary);

    return boost::regex_replace(source, pattern, formatter);
  }


  namespace Iso2022
  {
    /**
       Returns whether the string s contains a single-byte control message
       at index i
    **/
    static inline bool IsControlMessage1(const std::string& s, size_t i)
    {
      if (i < s.size())
      {
        char c = s[i];
        return
          (c == '\x0f') || // Locking shift zero
          (c == '\x0e');   // Locking shift one
      }
      else
      {
        return false;
      }
    }

    /**
       Returns whether the string s contains a double-byte control message
       at index i
    **/
    static inline size_t IsControlMessage2(const std::string& s, size_t i)
    {
      if (i + 1 < s.size())
      {
        char c1 = s[i];
        char c2 = s[i + 1];
        return (c1 == 0x1b) && (
          (c2 == '\x6e') || // Locking shift two
          (c2 == '\x6f') || // Locking shift three
          (c2 == '\x4e') || // Single shift two (alt)
          (c2 == '\x4f') || // Single shift three (alt)
          (c2 == '\x7c') || // Locking shift three right
          (c2 == '\x7d') || // Locking shift two right
          (c2 == '\x7e')    // Locking shift one right
          );
      }
      else
      {
        return false;
      }
    }

    /**
       Returns whether the string s contains a triple-byte control message
       at index i
    **/
    static inline size_t IsControlMessage3(const std::string& s, size_t i)
    {
      if (i + 2 < s.size())
      {
        char c1 = s[i];
        char c2 = s[i + 1];
        char c3 = s[i + 2];
        return ((c1 == '\x8e' && c2 == 0x1b && c3 == '\x4e') ||
                (c1 == '\x8f' && c2 == 0x1b && c3 == '\x4f'));
      }
      else
      {
        return false;
      }
    }

    /**
       This function returns true if the index i in the supplied string s:
       - is valid
       - contains the c character
       This function returns false otherwise.
    **/
    static inline bool TestCharValue(
      const std::string& s, size_t i, char c)
    {
      if (i < s.size())
        return s[i] == c;
      else
        return false;
    }

    /**
       This function returns true if the index i in the supplied string s:
       - is valid
       - has a c character that is >= cMin and <= cMax (included)
       This function returns false otherwise.
    **/
    static inline bool TestCharRange(
      const std::string& s, size_t i, char cMin, char cMax)
    {
      if (i < s.size())
        return (s[i] >= cMin) && (s[i] <= cMax);
      else
        return false;
    }

    /**
       This function returns the total length in bytes of the escape sequence
       located in string s at index i, if there is one, or 0 otherwise.
    **/
    static inline size_t GetEscapeSequenceLength(const std::string& s, size_t i)
    {
      if (TestCharValue(s, i, 0x1b))
      {
        size_t j = i+1;

        // advance reading cursor while we are in a sequence
        while (TestCharRange(s, j, '\x20', '\x2f'))
          ++j;

        // check there is a valid termination byte AND we're long enough (there
        // must be at least one byte between 0x20 and 0x2f
        if (TestCharRange(s, j, '\x30', '\x7f') && (j - i) >= 2)
          return j - i + 1;
        else
          return 0;
      }
      else
        return 0;
    }
  }


  /**
     This function will strip all ISO/IEC 2022 control codes and escape
     sequences.
     Please see https://en.wikipedia.org/wiki/ISO/IEC_2022 (as of 2019-02)
     for a list of those.

     Please note that this operation is potentially destructive, because
     it removes the character set information from the byte stream.

     However, in the case where the encoding is unique, then suppressing
     the escape sequences allows to provide us with a clean string after
     conversion to utf-8 with boost.
  **/
  void Toolbox::RemoveIso2022EscapeSequences(std::string& dest, const std::string& src)
  {
    // we need AT MOST the same size as the source string in the output
    dest.clear();
    if (dest.capacity() < src.size())
      dest.reserve(src.size());

    size_t i = 0;

    // uint8_t view to the string
    while (i < src.size())
    {
      size_t j = i;

      // The i index will only be incremented if a message is detected
      // in that case, the message is skipped and the index is set to the
      // next position to read
      if (Iso2022::IsControlMessage1(src, i))
        i += 1;
      else if (Iso2022::IsControlMessage2(src, i))
        i += 2;
      else if (Iso2022::IsControlMessage3(src, i))
        i += 3;
      else
        i += Iso2022::GetEscapeSequenceLength(src, i);

      // if the index was NOT incremented, this means there was no message at
      // this location: we then may copy the character at this index and
      // increment the index to point to the next read position
      if (j == i)
      {
        dest.push_back(src[i]);
        i++;
      }
    }
  }


  void Toolbox::Utf8ToUnicodeCharacter(uint32_t& unicode,
                                       size_t& length,
                                       const std::string& utf8,
                                       size_t position)
  {
    // https://en.wikipedia.org/wiki/UTF-8

    static const uint8_t MASK_IS_1_BYTE = 0x80;     // printf '0x%x\n' "$((2#10000000))"
    static const uint8_t TEST_IS_1_BYTE = 0x00;

    static const uint8_t MASK_IS_2_BYTES = 0xe0;    // printf '0x%x\n' "$((2#11100000))"
    static const uint8_t TEST_IS_2_BYTES = 0xc0;    // printf '0x%x\n' "$((2#11000000))"

    static const uint8_t MASK_IS_3_BYTES = 0xf0;    // printf '0x%x\n' "$((2#11110000))"
    static const uint8_t TEST_IS_3_BYTES = 0xe0;    // printf '0x%x\n' "$((2#11100000))"

    static const uint8_t MASK_IS_4_BYTES = 0xf8;    // printf '0x%x\n' "$((2#11111000))"
    static const uint8_t TEST_IS_4_BYTES = 0xf0;    // printf '0x%x\n' "$((2#11110000))"

    static const uint8_t MASK_CONTINUATION = 0xc0;  // printf '0x%x\n' "$((2#11000000))"
    static const uint8_t TEST_CONTINUATION = 0x80;  // printf '0x%x\n' "$((2#10000000))"

    if (position >= utf8.size())
    {
      throw OrthancException(ErrorCode_ParameterOutOfRange);
    }

    assert(sizeof(uint8_t) == sizeof(char));
    const uint8_t* buffer = reinterpret_cast<const uint8_t*>(utf8.c_str()) + position;

    if ((buffer[0] & MASK_IS_1_BYTE) == TEST_IS_1_BYTE)
    {
      length = 1;
      unicode = buffer[0] & ~MASK_IS_1_BYTE;
    }
    else if ((buffer[0] & MASK_IS_2_BYTES) == TEST_IS_2_BYTES &&
             position + 1 < utf8.size() &&
             (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION)
    {
      length = 2;
      uint32_t a = buffer[0] & ~MASK_IS_2_BYTES;
      uint32_t b = buffer[1] & ~MASK_CONTINUATION;
      unicode = (a << 6) | b;
    }
    else if ((buffer[0] & MASK_IS_3_BYTES) == TEST_IS_3_BYTES &&
             position + 2 < utf8.size() &&
             (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION &&
             (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION)
    {
      length = 3;
      uint32_t a = buffer[0] & ~MASK_IS_3_BYTES;
      uint32_t b = buffer[1] & ~MASK_CONTINUATION;
      uint32_t c = buffer[2] & ~MASK_CONTINUATION;
      unicode = (a << 12) | (b << 6) | c;
    }
    else if ((buffer[0] & MASK_IS_4_BYTES) == TEST_IS_4_BYTES &&
             position + 3 < utf8.size() &&
             (buffer[1] & MASK_CONTINUATION) == TEST_CONTINUATION &&
             (buffer[2] & MASK_CONTINUATION) == TEST_CONTINUATION &&
             (buffer[3] & MASK_CONTINUATION) == TEST_CONTINUATION)
    {
      length = 4;
      uint32_t a = buffer[0] & ~MASK_IS_4_BYTES;
      uint32_t b = buffer[1] & ~MASK_CONTINUATION;
      uint32_t c = buffer[2] & ~MASK_CONTINUATION;
      uint32_t d = buffer[3] & ~MASK_CONTINUATION;
      unicode = (a << 18) | (b << 12) | (c << 6) | d;
    }
    else
    {
      // This is not a valid UTF-8 encoding
      throw OrthancException(ErrorCode_BadFileFormat, "Invalid UTF-8 string");
    }
  }
}


OrthancLinesIterator* OrthancLinesIterator_Create(const std::string& content)
{
  return reinterpret_cast<OrthancLinesIterator*>(new Orthanc::Toolbox::LinesIterator(content));
}


bool OrthancLinesIterator_GetLine(std::string& target,
                                         const OrthancLinesIterator* iterator)
{
  if (iterator != NULL)
  {
    return reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator)->GetLine(target);
  }
  else
  {
    return false;
  }
}


void OrthancLinesIterator_Next(OrthancLinesIterator* iterator)
{
  if (iterator != NULL)
  {
    reinterpret_cast<Orthanc::Toolbox::LinesIterator*>(iterator)->Next();
  }
}


void OrthancLinesIterator_Free(OrthancLinesIterator* iterator)
{
  if (iterator != NULL)
  {
    delete reinterpret_cast<const Orthanc::Toolbox::LinesIterator*>(iterator);
  }
}
author	Sebastien Jodogne <s.jodogne@gmail.com>
date	Fri, 22 Mar 2019 14:06:38 +0100
parents	f0c92ecd09c8
children	2a38e00a0638