changeset 1513:fe07f82d83d3

gzip compression
author Sebastien Jodogne <s.jodogne@gmail.com>
date Mon, 10 Aug 2015 16:01:37 +0200
parents 52dc56bcec7d
children d73a2178b319
files CMakeLists.txt Core/Compression/DeflateBaseCompressor.cpp Core/Compression/DeflateBaseCompressor.h Core/Compression/GzipCompressor.cpp Core/Compression/GzipCompressor.h Core/Compression/ZlibCompressor.cpp UnitTestsSources/UnitTestsMain.cpp
diffstat 7 files changed, 441 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/CMakeLists.txt	Mon Aug 10 14:52:10 2015 +0200
+++ b/CMakeLists.txt	Mon Aug 10 16:01:37 2015 +0200
@@ -83,6 +83,7 @@
   Core/Cache/SharedArchive.cpp
   Core/ChunkedBuffer.cpp
   Core/Compression/DeflateBaseCompressor.cpp
+  Core/Compression/GzipCompressor.cpp
   Core/Compression/HierarchicalZipWriter.cpp
   Core/Compression/ZipWriter.cpp
   Core/Compression/ZlibCompressor.cpp
--- a/Core/Compression/DeflateBaseCompressor.cpp	Mon Aug 10 14:52:10 2015 +0200
+++ b/Core/Compression/DeflateBaseCompressor.cpp	Mon Aug 10 16:01:37 2015 +0200
@@ -35,6 +35,8 @@
 
 #include "../OrthancException.h"
 
+#include <string.h>
+
 namespace Orthanc
 {
   void DeflateBaseCompressor::SetCompressionLevel(uint8_t level)
@@ -46,4 +48,25 @@
 
     compressionLevel_ = level;
   }
+
+
+  uint64_t DeflateBaseCompressor::ReadUncompressedSizePrefix(const void* compressed,
+                                                             size_t compressedSize)
+  {
+    if (compressedSize == 0)
+    {
+      return 0;
+    }
+
+    if (compressedSize < sizeof(uint64_t))
+    {
+      throw OrthancException("The compressed buffer is ill-formed");
+    }
+
+    uint64_t size;
+    memcpy(&size, compressed, sizeof(uint64_t));
+
+    return size;
+  }
+
 }
--- a/Core/Compression/DeflateBaseCompressor.h	Mon Aug 10 14:52:10 2015 +0200
+++ b/Core/Compression/DeflateBaseCompressor.h	Mon Aug 10 16:01:37 2015 +0200
@@ -44,6 +44,10 @@
     uint8_t compressionLevel_;
     bool    prefixWithUncompressedSize_;
 
+  protected:
+    uint64_t ReadUncompressedSizePrefix(const void* compressed,
+                                        size_t compressedSize);
+
   public:
     DeflateBaseCompressor() : 
       compressionLevel_(6),
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Core/Compression/GzipCompressor.cpp	Mon Aug 10 16:01:37 2015 +0200
@@ -0,0 +1,277 @@
+/**
+ * Orthanc - A Lightweight, RESTful DICOM Store
+ * Copyright (C) 2012-2015 Sebastien Jodogne, Medical Physics
+ * Department, University Hospital of Liege, Belgium
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * In addition, as a special exception, the copyright holders of this
+ * program give permission to link the code of its release with the
+ * OpenSSL project's "OpenSSL" library (or with modified versions of it
+ * that use the same license as the "OpenSSL" library), and distribute
+ * the linked executables. You must obey the GNU General Public License
+ * in all respects for all of the code used other than "OpenSSL". If you
+ * modify file(s) with this exception, you may extend this exception to
+ * your version of the file(s), but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files
+ * in the program, then also delete it here.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ **/
+
+
+#include "../PrecompiledHeaders.h"
+#include "GzipCompressor.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <zlib.h>
+
+#include "../OrthancException.h"
+#include "../Logging.h"
+
+namespace Orthanc
+{
+  uint64_t GzipCompressor::GuessUncompressedSize(const void* compressed,
+                                                 size_t compressedSize)
+  {
+    /**
+     * "Is there a way to find out the size of the original file which
+     * is inside a GZIP file? [...] There is no truly reliable way,
+     * other than gunzipping the stream. You do not need to save the
+     * result of the decompression, so you can determine the size by
+     * simply reading and decoding the entire file without taking up
+     * space with the decompressed result.
+     *
+     * There is an unreliable way to determine the uncompressed size,
+     * which is to look at the last four bytes of the gzip file, which
+     * is the uncompressed length of that entry modulo 232 in little
+     * endian order.
+     * 
+     * It is unreliable because a) the uncompressed data may be longer
+     * than 2^32 bytes, and b) the gzip file may consist of multiple
+     * gzip streams, in which case you would find the length of only
+     * the last of those streams.
+     * 
+     * If you are in control of the source of the gzip files, you know
+     * that they consist of single gzip streams, and you know that
+     * they are less than 2^32 bytes uncompressed, then and only then
+     * can you use those last four bytes with confidence."
+     *
+     * http://stackoverflow.com/a/9727599/881731
+     **/
+
+    if (compressedSize < 4)
+    {
+      throw OrthancException(ErrorCode_BadFileFormat);
+    }
+
+    const uint8_t* p = reinterpret_cast<const uint8_t*>(compressed) + compressedSize - 4;
+
+    return ((static_cast<uint32_t>(p[0]) << 0) +
+            (static_cast<uint32_t>(p[1]) << 8) +
+            (static_cast<uint32_t>(p[2]) << 16) +
+            (static_cast<uint32_t>(p[3]) << 24));            
+  }
+
+
+
+  void GzipCompressor::Compress(std::string& compressed,
+                                const void* uncompressed,
+                                size_t uncompressedSize)
+  {
+    uLongf compressedSize = compressBound(uncompressedSize) + 1024 /* security margin */;
+    if (compressedSize == 0)
+    {
+      compressedSize = 1;
+    }
+
+    uint8_t* target;
+    if (HasPrefixWithUncompressedSize())
+    {
+      compressed.resize(compressedSize + sizeof(uint64_t));
+      target = reinterpret_cast<uint8_t*>(&compressed[0]) + sizeof(uint64_t);
+    }
+    else
+    {
+      compressed.resize(compressedSize);
+      target = reinterpret_cast<uint8_t*>(&compressed[0]);
+    }
+
+    z_stream stream;
+    memset(&stream, 0, sizeof(stream));
+
+    stream.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(uncompressed));
+    stream.next_out = reinterpret_cast<Bytef*>(target);
+
+    stream.avail_in = static_cast<uInt>(uncompressedSize);
+    stream.avail_out = static_cast<uInt>(compressedSize);
+
+    // Ensure no overflow (if the buffer is too large for the current archicture)
+    if (static_cast<size_t>(stream.avail_in) != uncompressedSize ||
+        static_cast<size_t>(stream.avail_out) != compressedSize)
+    {
+      throw OrthancException(ErrorCode_NotEnoughMemory);
+    }
+    
+    // Initialize the compression engine
+    int error = deflateInit2(&stream, 
+                             GetCompressionLevel(), 
+                             Z_DEFLATED,
+                             MAX_WBITS + 16,      // ask for gzip output
+                             8,                   // default memory level
+                             Z_DEFAULT_STRATEGY);
+
+    if (error != Z_OK)
+    {
+      // Cannot initialize zlib
+      compressed.clear();
+      throw OrthancException(ErrorCode_InternalError);
+    }
+
+    // Compress the input buffer
+    error = deflate(&stream, Z_FINISH);
+
+    if (error != Z_STREAM_END)
+    {
+      deflateEnd(&stream);
+      compressed.clear();
+
+      switch (error)
+      {
+      case Z_MEM_ERROR:
+        throw OrthancException(ErrorCode_NotEnoughMemory);
+
+      default:
+        throw OrthancException(ErrorCode_InternalError);
+      }  
+    }
+
+    size_t size = stream.total_out;
+
+    if (deflateEnd(&stream) != Z_OK)
+    {
+      throw OrthancException(ErrorCode_InternalError);
+    }
+
+    // The compression was successful
+    if (HasPrefixWithUncompressedSize())
+    {
+      uint64_t s = static_cast<uint64_t>(uncompressedSize);
+      memcpy(&compressed[0], &s, sizeof(uint64_t));
+      compressed.resize(size + sizeof(uint64_t));
+    }
+    else
+    {
+      compressed.resize(size);
+    }
+  }
+
+
+  void GzipCompressor::Uncompress(std::string& uncompressed,
+                                  const void* compressed,
+                                  size_t compressedSize)
+  {
+    uint64_t uncompressedSize;
+    const uint8_t* source = reinterpret_cast<const uint8_t*>(compressed);
+
+    if (HasPrefixWithUncompressedSize())
+    {
+      uncompressedSize = ReadUncompressedSizePrefix(compressed, compressedSize);
+      source += sizeof(uint64_t);
+      compressedSize -= sizeof(uint64_t);
+    }
+    else
+    {
+      uncompressedSize = GuessUncompressedSize(compressed, compressedSize);
+    }
+
+    try
+    {
+      uncompressed.resize(uncompressedSize);
+    }
+    catch (...)
+    {
+      throw OrthancException(ErrorCode_NotEnoughMemory);
+    }
+
+    z_stream stream;
+    memset(&stream, 0, sizeof(stream));
+
+    char dummy = '\0';  // zlib does not like NULL output buffers (even if the uncompressed data is empty)
+    stream.next_in = const_cast<Bytef*>(source);
+    stream.next_out = reinterpret_cast<Bytef*>(uncompressedSize == 0 ? &dummy : &uncompressed[0]);
+
+    stream.avail_in = static_cast<uInt>(compressedSize);
+    stream.avail_out = static_cast<uInt>(uncompressedSize);
+
+    // Ensure no overflow (if the buffer is too large for the current archicture)
+    if (static_cast<size_t>(stream.avail_in) != compressedSize ||
+        static_cast<size_t>(stream.avail_out) != uncompressedSize)
+    {
+      throw OrthancException(ErrorCode_NotEnoughMemory);
+    }
+
+    // Initialize the compression engine
+    int error = inflateInit2(&stream, 
+                             MAX_WBITS + 16);  // this is a gzip input
+
+    if (error != Z_OK)
+    {
+      // Cannot initialize zlib
+      uncompressed.clear();
+      throw OrthancException(ErrorCode_InternalError);
+    }
+
+    // Uncompress the input buffer
+    error = inflate(&stream, Z_FINISH);
+
+    if (error != Z_STREAM_END)
+    {
+      inflateEnd(&stream);
+      uncompressed.clear();
+
+      switch (error)
+      {
+        case Z_MEM_ERROR:
+          throw OrthancException(ErrorCode_NotEnoughMemory);
+          
+        case Z_BUF_ERROR:
+        case Z_NEED_DICT:
+          throw OrthancException(ErrorCode_BadFileFormat);
+          
+        default:
+          throw OrthancException(ErrorCode_InternalError);
+      }
+    }
+
+    size_t size = stream.total_out;
+
+    if (inflateEnd(&stream) != Z_OK)
+    {
+      uncompressed.clear();
+      throw OrthancException(ErrorCode_InternalError);
+    }
+
+    if (size != uncompressedSize)
+    {
+      uncompressed.clear();
+
+      // The uncompressed size was not that properly guess, presumably
+      // because of a file size over 4GB. Should fallback to
+      // stream-based decompression.
+      LOG(ERROR) << "The uncompressed size of a gzip-encoded buffer was not properly guessed";
+      throw OrthancException(ErrorCode_NotImplemented);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Core/Compression/GzipCompressor.h	Mon Aug 10 16:01:37 2015 +0200
@@ -0,0 +1,59 @@
+/**
+ * Orthanc - A Lightweight, RESTful DICOM Store
+ * Copyright (C) 2012-2015 Sebastien Jodogne, Medical Physics
+ * Department, University Hospital of Liege, Belgium
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * In addition, as a special exception, the copyright holders of this
+ * program give permission to link the code of its release with the
+ * OpenSSL project's "OpenSSL" library (or with modified versions of it
+ * that use the same license as the "OpenSSL" library), and distribute
+ * the linked executables. You must obey the GNU General Public License
+ * in all respects for all of the code used other than "OpenSSL". If you
+ * modify file(s) with this exception, you may extend this exception to
+ * your version of the file(s), but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files
+ * in the program, then also delete it here.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ **/
+
+
+#pragma once
+
+#include "DeflateBaseCompressor.h"
+
+namespace Orthanc
+{
+  class GzipCompressor : public DeflateBaseCompressor
+  {
+  private:
+    uint64_t GuessUncompressedSize(const void* compressed,
+                                   size_t compressedSize);
+
+  public:
+    GzipCompressor()
+    {
+      SetPrefixWithUncompressedSize(false);
+    }
+
+    virtual void Compress(std::string& compressed,
+                          const void* uncompressed,
+                          size_t uncompressedSize);
+
+    virtual void Uncompress(std::string& uncompressed,
+                            const void* compressed,
+                            size_t compressedSize);
+  };
+}
--- a/Core/Compression/ZlibCompressor.cpp	Mon Aug 10 14:52:10 2015 +0200
+++ b/Core/Compression/ZlibCompressor.cpp	Mon Aug 10 16:01:37 2015 +0200
@@ -33,10 +33,12 @@
 #include "../PrecompiledHeaders.h"
 #include "ZlibCompressor.h"
 
+#include "../OrthancException.h"
+#include "../Logging.h"
+
 #include <stdio.h>
 #include <string.h>
 #include <zlib.h>
-#include "../OrthancException.h"
 
 namespace Orthanc
 {
@@ -112,13 +114,13 @@
       return;
     }
 
-    if (compressedSize < sizeof(uint64_t))
+    if (!HasPrefixWithUncompressedSize())
     {
-      throw OrthancException("Zlib: The compressed buffer is ill-formed");
+      LOG(ERROR) << "Cannot guess the uncompressed size of a zlib-encoded buffer";
+      throw OrthancException(ErrorCode_InternalError);
     }
 
-    uint64_t uncompressedSize;
-    memcpy(&uncompressedSize, compressed, sizeof(uint64_t));
+    uint64_t uncompressedSize = ReadUncompressedSizePrefix(compressed, compressedSize);
     
     try
     {
@@ -126,7 +128,7 @@
     }
     catch (...)
     {
-      throw OrthancException("Zlib: Corrupted compressed buffer");
+      throw OrthancException(ErrorCode_NotEnoughMemory);
     }
 
     uLongf tmp = uncompressedSize;
--- a/UnitTestsSources/UnitTestsMain.cpp	Mon Aug 10 14:52:10 2015 +0200
+++ b/UnitTestsSources/UnitTestsMain.cpp	Mon Aug 10 16:01:37 2015 +0200
@@ -38,6 +38,7 @@
 #include <ctype.h>
 
 #include "../Core/Compression/ZlibCompressor.h"
+#include "../Core/Compression/GzipCompressor.h"
 #include "../Core/DicomFormat/DicomTag.h"
 #include "../Core/HttpServer/HttpToolbox.h"
 #include "../Core/Logging.h"
@@ -99,6 +100,73 @@
 }
 
 
+TEST(Gzip, Basic)
+{
+  std::string s = "Hello world";
+ 
+  std::string compressed;
+  GzipCompressor c;
+  ASSERT_FALSE(c.HasPrefixWithUncompressedSize());
+  IBufferCompressor::Compress(compressed, c, s);
+
+  std::string uncompressed;
+  IBufferCompressor::Uncompress(uncompressed, c, compressed);
+  ASSERT_EQ(s.size(), uncompressed.size());
+  ASSERT_EQ(0, memcmp(&s[0], &uncompressed[0], s.size()));
+}
+
+
+TEST(Gzip, Empty)
+{
+  std::string s;
+ 
+  std::string compressed;
+  GzipCompressor c;
+  ASSERT_FALSE(c.HasPrefixWithUncompressedSize());
+  c.SetPrefixWithUncompressedSize(false);
+  IBufferCompressor::Compress(compressed, c, s);
+
+  Toolbox::WriteFile(compressed, "/tmp/toto.gz");
+
+  std::string uncompressed;
+  IBufferCompressor::Uncompress(uncompressed, c, compressed);
+  ASSERT_EQ(0, uncompressed.size());
+}
+
+
+TEST(Gzip, BasicWithPrefix)
+{
+  std::string s = "Hello world";
+ 
+  std::string compressed;
+  GzipCompressor c;
+  c.SetPrefixWithUncompressedSize(true);
+  ASSERT_TRUE(c.HasPrefixWithUncompressedSize());
+  IBufferCompressor::Compress(compressed, c, s);
+
+  std::string uncompressed;
+  IBufferCompressor::Uncompress(uncompressed, c, compressed);
+  ASSERT_EQ(s.size(), uncompressed.size());
+  ASSERT_EQ(0, memcmp(&s[0], &uncompressed[0], s.size()));
+}
+
+
+TEST(Gzip, EmptyWithPrefix)
+{
+  std::string s;
+ 
+  std::string compressed;
+  GzipCompressor c;
+  c.SetPrefixWithUncompressedSize(true);
+  ASSERT_TRUE(c.HasPrefixWithUncompressedSize());
+  IBufferCompressor::Compress(compressed, c, s);
+
+  std::string uncompressed;
+  IBufferCompressor::Uncompress(uncompressed, c, compressed);
+  ASSERT_EQ(0, uncompressed.size());
+}
+
+
 TEST(Zlib, Basic)
 {
   std::string s = Toolbox::GenerateUuid();
@@ -106,6 +174,7 @@
  
   std::string compressed, compressed2;
   ZlibCompressor c;
+  ASSERT_TRUE(c.HasPrefixWithUncompressedSize());
   IBufferCompressor::Compress(compressed, c, s);
 
   std::string uncompressed;