view OrthancFramework/Sources/Compression/GzipCompressor.cpp @ 4924:b5994fb2a4d4

NEWS
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 23 Feb 2022 16:08:54 +0100
parents 43e613a7756b
children 0ea402b4d901
line wrap: on
line source

/**
 * Orthanc - A Lightweight, RESTful DICOM Store
 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
 * Department, University Hospital of Liege, Belgium
 * Copyright (C) 2017-2022 Osimis S.A., Belgium
 * Copyright (C) 2021-2022 Sebastien Jodogne, ICTEAM UCLouvain, Belgium
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program. If not, see
 * <http://www.gnu.org/licenses/>.
 **/


#include "../PrecompiledHeaders.h"
#include "GzipCompressor.h"

#include <stdio.h>
#include <string.h>
#include <zlib.h>

#include "../OrthancException.h"
#include "../Logging.h"

namespace Orthanc
{
  uint64_t GzipCompressor::GuessUncompressedSize(const void* compressed,
                                                 size_t compressedSize)
  {
    /**
     * "Is there a way to find out the size of the original file which
     * is inside a GZIP file? [...] There is no truly reliable way,
     * other than gunzipping the stream. You do not need to save the
     * result of the decompression, so you can determine the size by
     * simply reading and decoding the entire file without taking up
     * space with the decompressed result.
     *
     * There is an unreliable way to determine the uncompressed size,
     * which is to look at the last four bytes of the gzip file, which
     * is the uncompressed length of that entry modulo 232 in little
     * endian order.
     * 
     * It is unreliable because a) the uncompressed data may be longer
     * than 2^32 bytes, and b) the gzip file may consist of multiple
     * gzip streams, in which case you would find the length of only
     * the last of those streams.
     * 
     * If you are in control of the source of the gzip files, you know
     * that they consist of single gzip streams, and you know that
     * they are less than 2^32 bytes uncompressed, then and only then
     * can you use those last four bytes with confidence."
     *
     * http://stackoverflow.com/a/9727599/881731
     **/

    if (compressedSize < 4)
    {
      throw OrthancException(ErrorCode_BadFileFormat);
    }

    const uint8_t* p = reinterpret_cast<const uint8_t*>(compressed) + compressedSize - 4;

    return ((static_cast<uint32_t>(p[0]) << 0) +
            (static_cast<uint32_t>(p[1]) << 8) +
            (static_cast<uint32_t>(p[2]) << 16) +
            (static_cast<uint32_t>(p[3]) << 24));            
  }


  GzipCompressor::GzipCompressor()
  {
    SetPrefixWithUncompressedSize(false);
  }


  void GzipCompressor::Compress(std::string& compressed,
                                const void* uncompressed,
                                size_t uncompressedSize)
  {
    uLongf compressedSize = compressBound(static_cast<uLong>(uncompressedSize))
      + 1024 /* security margin */;
    
    if (compressedSize == 0)
    {
      compressedSize = 1;
    }

    uint8_t* target;
    if (HasPrefixWithUncompressedSize())
    {
      compressed.resize(compressedSize + sizeof(uint64_t));
      target = reinterpret_cast<uint8_t*>(&compressed[0]) + sizeof(uint64_t);
    }
    else
    {
      compressed.resize(compressedSize);
      target = reinterpret_cast<uint8_t*>(&compressed[0]);
    }

    z_stream stream;
    memset(&stream, 0, sizeof(stream));

    stream.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(uncompressed));
    stream.next_out = reinterpret_cast<Bytef*>(target);

    stream.avail_in = static_cast<uInt>(uncompressedSize);
    stream.avail_out = static_cast<uInt>(compressedSize);

    // Ensure no overflow (if the buffer is too large for the current archicture)
    if (static_cast<size_t>(stream.avail_in) != uncompressedSize ||
        static_cast<size_t>(stream.avail_out) != compressedSize)
    {
      throw OrthancException(ErrorCode_NotEnoughMemory);
    }
    
    // Initialize the compression engine
    int error = deflateInit2(&stream, 
                             GetCompressionLevel(), 
                             Z_DEFLATED,
                             MAX_WBITS + 16,      // ask for gzip output
                             8,                   // default memory level
                             Z_DEFAULT_STRATEGY);

    if (error != Z_OK)
    {
      // Cannot initialize zlib
      compressed.clear();
      throw OrthancException(ErrorCode_InternalError);
    }

    // Compress the input buffer
    error = deflate(&stream, Z_FINISH);

    if (error != Z_STREAM_END)
    {
      deflateEnd(&stream);
      compressed.clear();

      switch (error)
      {
      case Z_MEM_ERROR:
        throw OrthancException(ErrorCode_NotEnoughMemory);

      default:
        throw OrthancException(ErrorCode_InternalError);
      }  
    }

    size_t size = stream.total_out;

    if (deflateEnd(&stream) != Z_OK)
    {
      throw OrthancException(ErrorCode_InternalError);
    }

    // The compression was successful
    if (HasPrefixWithUncompressedSize())
    {
      uint64_t s = static_cast<uint64_t>(uncompressedSize);
      memcpy(&compressed[0], &s, sizeof(uint64_t));
      compressed.resize(size + sizeof(uint64_t));
    }
    else
    {
      compressed.resize(size);
    }
  }


  void GzipCompressor::Uncompress(std::string& uncompressed,
                                  const void* compressed,
                                  size_t compressedSize)
  {
    uint64_t uncompressedSize;
    const uint8_t* source = reinterpret_cast<const uint8_t*>(compressed);

    if (HasPrefixWithUncompressedSize())
    {
      uncompressedSize = ReadUncompressedSizePrefix(compressed, compressedSize);
      source += sizeof(uint64_t);
      compressedSize -= sizeof(uint64_t);
    }
    else
    {
      uncompressedSize = GuessUncompressedSize(compressed, compressedSize);
    }

    try
    {
      uncompressed.resize(static_cast<size_t>(uncompressedSize));
    }
    catch (...)
    {
      throw OrthancException(ErrorCode_NotEnoughMemory);
    }

    z_stream stream;
    memset(&stream, 0, sizeof(stream));

    char dummy = '\0';  // zlib does not like NULL output buffers (even if the uncompressed data is empty)
    stream.next_in = const_cast<Bytef*>(source);
    stream.next_out = reinterpret_cast<Bytef*>(uncompressedSize == 0 ? &dummy : &uncompressed[0]);

    stream.avail_in = static_cast<uInt>(compressedSize);
    stream.avail_out = static_cast<uInt>(uncompressedSize);

    // Ensure no overflow (if the buffer is too large for the current archicture)
    if (static_cast<size_t>(stream.avail_in) != compressedSize ||
        static_cast<size_t>(stream.avail_out) != uncompressedSize)
    {
      throw OrthancException(ErrorCode_NotEnoughMemory);
    }

    // Initialize the compression engine
    int error = inflateInit2(&stream, 
                             MAX_WBITS + 16);  // this is a gzip input

    if (error != Z_OK)
    {
      // Cannot initialize zlib
      uncompressed.clear();
      throw OrthancException(ErrorCode_InternalError);
    }

    // Uncompress the input buffer
    error = inflate(&stream, Z_FINISH);

    if (error != Z_STREAM_END)
    {
      inflateEnd(&stream);
      uncompressed.clear();

      switch (error)
      {
        case Z_MEM_ERROR:
          throw OrthancException(ErrorCode_NotEnoughMemory);
          
        case Z_BUF_ERROR:
        case Z_NEED_DICT:
          throw OrthancException(ErrorCode_BadFileFormat);
          
        default:
          throw OrthancException(ErrorCode_InternalError);
      }
    }

    size_t size = stream.total_out;

    if (inflateEnd(&stream) != Z_OK)
    {
      uncompressed.clear();
      throw OrthancException(ErrorCode_InternalError);
    }

    if (size != uncompressedSize)
    {
      uncompressed.clear();

      // The uncompressed size was not that properly guess, presumably
      // because of a file size over 4GB. Should fallback to
      // stream-based decompression.
      throw OrthancException(ErrorCode_NotImplemented,
                             "The uncompressed size of a gzip-encoded buffer was not properly guessed");
    }
  }
}