Mercurial > hg > orthanc
changeset 4670:b12faca76a52
support of output streams in ZipWriter
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Fri, 28 May 2021 18:26:40 +0200 |
parents | b14989f9ff8b |
children | 42e1f5bde40b |
files | OrthancFramework/Sources/Compression/HierarchicalZipWriter.cpp OrthancFramework/Sources/Compression/HierarchicalZipWriter.h OrthancFramework/Sources/Compression/ZipWriter.cpp OrthancFramework/Sources/Compression/ZipWriter.h OrthancFramework/UnitTestsSources/ZipTests.cpp OrthancServer/UnitTestsSources/SizeOfTests.impl.h |
diffstat | 6 files changed, 696 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/OrthancFramework/Sources/Compression/HierarchicalZipWriter.cpp Fri May 28 11:40:47 2021 +0200 +++ b/OrthancFramework/Sources/Compression/HierarchicalZipWriter.cpp Fri May 28 18:26:40 2021 +0200 @@ -153,6 +153,14 @@ writer_.Open(); } + + HierarchicalZipWriter::HierarchicalZipWriter(ZipWriter::IOutputStream* stream) + { + writer_.AcquireOutputStream(stream); + writer_.Open(); + } + + HierarchicalZipWriter::~HierarchicalZipWriter() { writer_.Close(); @@ -218,4 +226,9 @@ { writer_.Write(data); } + + HierarchicalZipWriter* HierarchicalZipWriter::CreateToMemory(std::string& target) + { + return new HierarchicalZipWriter(new ZipWriter::MemoryStream(target)); + } }
--- a/OrthancFramework/Sources/Compression/HierarchicalZipWriter.h Fri May 28 11:40:47 2021 +0200 +++ b/OrthancFramework/Sources/Compression/HierarchicalZipWriter.h Fri May 28 18:26:40 2021 +0200 @@ -83,6 +83,8 @@ public: explicit HierarchicalZipWriter(const char* path); + explicit HierarchicalZipWriter(ZipWriter::IOutputStream* stream); // transfers ownership + ~HierarchicalZipWriter(); void SetZip64(bool isZip64); @@ -108,5 +110,8 @@ void Write(const void* data, size_t length); void Write(const std::string& data); + + // The lifetime of the "target" buffer must be larger than that of HierarchicalZipWriter + static HierarchicalZipWriter* CreateToMemory(std::string& target); }; }
--- a/OrthancFramework/Sources/Compression/ZipWriter.cpp Fri May 28 11:40:47 2021 +0200 +++ b/OrthancFramework/Sources/Compression/ZipWriter.cpp Fri May 28 18:26:40 2021 +0200 @@ -66,9 +66,342 @@ namespace Orthanc { + ZipWriter::MemoryStream::MemoryStream(std::string& target) : + target_(target) + { + } + + + void ZipWriter::MemoryStream::Write(const std::string& chunk) + { + chunked_.AddChunk(chunk); + } + + + void ZipWriter::MemoryStream::Close() + { + chunked_.Flatten(target_); + } + + + void ZipWriter::BufferWithSeek::CheckInvariants() const + { +#if !defined(NDEBUG) + assert(chunks_.GetNumBytes() == 0 || + flattened_.empty()); + + assert(currentPosition_ <= GetSize()); + + if (currentPosition_ < GetSize()) + { + assert(chunks_.GetNumBytes() == 0); + assert(!flattened_.empty()); + } +#endif + } + + + ZipWriter::BufferWithSeek::BufferWithSeek() : + currentPosition_(0) + { + CheckInvariants(); + } + + + ZipWriter::BufferWithSeek::~BufferWithSeek() + { + CheckInvariants(); + } + + + size_t ZipWriter::BufferWithSeek::GetPosition() const + { + return currentPosition_; + } + + + size_t ZipWriter::BufferWithSeek::GetSize() const + { + if (flattened_.empty()) + { + return chunks_.GetNumBytes(); + } + else + { + return flattened_.size(); + } + } + + + void ZipWriter::BufferWithSeek::Write(const void* data, + size_t size) + { + CheckInvariants(); + + if (size != 0) + { + if (currentPosition_ < GetSize()) + { + if (currentPosition_ + size > flattened_.size()) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + else + { + memcpy(&flattened_[currentPosition_], data, size); + } + } + else + { + if (!flattened_.empty()) + { + assert(chunks_.GetNumBytes() == 0); + chunks_.AddChunk(flattened_); + flattened_.clear(); + } + + chunks_.AddChunk(data, size); + } + + currentPosition_ += size; + } + + CheckInvariants(); + } + + + void ZipWriter::BufferWithSeek::Write(const std::string& data) + { + if (!data.empty()) + { + Write(data.c_str(), data.size()); + } + } + + + void ZipWriter::BufferWithSeek::Seek(size_t position) + { + CheckInvariants(); + + if (currentPosition_ != position) + { + if (position < GetSize()) + { + if (chunks_.GetNumBytes() != 0) + { + assert(flattened_.empty()); + chunks_.Flatten(flattened_); + } + + assert(chunks_.GetNumBytes() == 0); + } + else if (position > GetSize()) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + + currentPosition_ = position; + } + + CheckInvariants(); + } + + + void ZipWriter::BufferWithSeek::Flush(std::string& target) + { + CheckInvariants(); + + if (flattened_.empty()) + { + chunks_.Flatten(target); + } + else + { + flattened_.swap(target); + flattened_.clear(); + } + + currentPosition_ = 0; + + CheckInvariants(); + } + + + /** + * Inside a ZIP archive, compressed files are concatenated, each + * file being prefixed by its "Local file header". The ZIP archive + * ends with the "central directory" structure. + * https://en.wikipedia.org/wiki/ZIP_(file_format) + * + * When writing one file, the minizip implementation first TELLS to + * know the current size of the archive, then WRITES the header and + * data bytes, then SEEKS backward to update the "local file header" + * with info about the compressed data (at the 14 offset, containing + * CRC-32, compressed size and uncompressed size), and finally SEEKS + * to get back at the end of the stream in order to continue adding + * files. + * + * The minizip implementation will *never* SEEK *before* the "local + * file header" of the current file. However, the current file must + * *not* be immediately sent to the stream as new bytes are written, + * because the "local file header" will be updated. + * + * Consequently, this buffer class only sends the pending bytes to + * the output stream once it receives a SEEK command that moves the + * cursor at the end of the archive. In the minizip implementation, + * such a SEEK indicates that the current file has been properly + * added to the archive. + **/ + class ZipWriter::StreamBuffer : public boost::noncopyable + { + private: + IOutputStream& stream_; + bool success_; + ZPOS64_T startCurrentFile_; + BufferWithSeek buffer_; + + public: + StreamBuffer(IOutputStream& stream) : + stream_(stream), + success_(true), + startCurrentFile_(0) + { + } + + int Close() + { + try + { + std::string s; + buffer_.Flush(s); + stream_.Write(s); + return 0; + } + catch (...) + { + success_ = false; + return 1; + } + } + + ZPOS64_T Tell() const + { + return startCurrentFile_ + static_cast<ZPOS64_T>(buffer_.GetPosition()); + } + + uLong Write(const void* buf, + uLong size) + { + if (size == 0) + { + return 0; + } + else + { + try + { + buffer_.Write(buf, size); + return size; + } + catch (...) + { + return 0; + } + } + } + + + long Seek(ZPOS64_T offset, + int origin) + { + try + { + if (origin == ZLIB_FILEFUNC_SEEK_SET && + offset >= startCurrentFile_) + { + ZPOS64_T fullSize = startCurrentFile_ + static_cast<ZPOS64_T>(buffer_.GetSize()); + assert(offset <= fullSize); + + if (offset == fullSize) + { + // We can flush to the output stream + std::string s; + buffer_.Flush(s); + stream_.Write(s); + startCurrentFile_ = fullSize; + } + else + { + buffer_.Seek(offset - startCurrentFile_); + } + + return 0; // OK + } + else + { + return 1; // Should never occur + } + } + catch (...) + { + return 1; + } + } + + + static int CloseWrapper(voidpf opaque, + voidpf stream) + { + assert(opaque != NULL); + return reinterpret_cast<StreamBuffer*>(opaque)->Close(); + } + + static voidpf OpenWrapper(voidpf opaque, + const void* filename, + int mode) + { + assert(opaque != NULL); + return opaque; + } + + static long SeekWrapper(voidpf opaque, + voidpf stream, + ZPOS64_T offset, + int origin) + { + assert(opaque != NULL); + return reinterpret_cast<StreamBuffer*>(opaque)->Seek(offset, origin); + } + + static ZPOS64_T TellWrapper(voidpf opaque, + voidpf stream) + { + assert(opaque != NULL); + return reinterpret_cast<StreamBuffer*>(opaque)->Tell(); + } + + static int TestErrorWrapper(voidpf opaque, + voidpf stream) + { + assert(opaque != NULL); + return reinterpret_cast<StreamBuffer*>(opaque)->success_ ? 0 : 1; + } + + static uLong WriteWrapper(voidpf opaque, + voidpf stream, + const void* buf, + uLong size) + { + assert(opaque != NULL); + return reinterpret_cast<StreamBuffer*>(opaque)->Write(buf, size); + } + }; + + struct ZipWriter::PImpl { zipFile file_; + std::unique_ptr<StreamBuffer> streamBuffer_; PImpl() : file_(NULL) { @@ -86,7 +419,14 @@ ZipWriter::~ZipWriter() { - Close(); + try + { + Close(); + } + catch (OrthancException& e) // Don't throw exceptions in destructors + { + LOG(ERROR) << "Catched exception in destructor: " << e.What(); + } } void ZipWriter::Close() @@ -96,6 +436,14 @@ zipClose(pimpl_->file_, "Created by Orthanc"); pimpl_->file_ = NULL; hasFileInZip_ = false; + + pimpl_->streamBuffer_.reset(NULL); + + if (outputStream_.get() != NULL) + { + outputStream_->Close(); + outputStream_.reset(NULL); + } } } @@ -110,35 +458,69 @@ { return; } + else if (outputStream_.get() != NULL) + { + // New in Orthanc 1.9.4 + hasFileInZip_ = false; - if (path_.size() == 0) + zlib_filefunc64_def funcs; + memset(&funcs, 0, sizeof(funcs)); + + pimpl_->streamBuffer_.reset(new StreamBuffer(*outputStream_)); + funcs.opaque = pimpl_->streamBuffer_.get(); + funcs.zclose_file = StreamBuffer::CloseWrapper; + funcs.zerror_file = StreamBuffer::TestErrorWrapper; + funcs.zopen64_file = StreamBuffer::OpenWrapper; + funcs.ztell64_file = StreamBuffer::TellWrapper; + funcs.zwrite_file = StreamBuffer::WriteWrapper; + funcs.zseek64_file = StreamBuffer::SeekWrapper; + + /** + * "funcs.zread_file" (ZREAD64) also appears in "minizip/zip.c", + * but is only needed by function "LoadCentralDirectoryRecord()" + * that is only used if appending new files to an already + * existing ZIP, which makes no sense for an output stream. + **/ + + pimpl_->file_ = zipOpen2_64(NULL /* no output path */, APPEND_STATUS_CREATE, + NULL /* global comment */, &funcs); + + if (!pimpl_->file_) + { + throw OrthancException(ErrorCode_CannotWriteFile, + "Cannot create new ZIP archive into an output stream"); + } + } + else if (path_.empty()) { throw OrthancException(ErrorCode_BadSequenceOfCalls, "Please call SetOutputPath() before creating the file"); } - - hasFileInZip_ = false; - - int mode = APPEND_STATUS_CREATE; - if (append_ && - boost::filesystem::exists(path_)) - { - mode = APPEND_STATUS_ADDINZIP; - } - - if (isZip64_) - { - pimpl_->file_ = zipOpen64(path_.c_str(), mode); - } else { - pimpl_->file_ = zipOpen(path_.c_str(), mode); - } + hasFileInZip_ = false; + + int mode = APPEND_STATUS_CREATE; + if (append_ && + boost::filesystem::exists(path_)) + { + mode = APPEND_STATUS_ADDINZIP; + } - if (!pimpl_->file_) - { - throw OrthancException(ErrorCode_CannotWriteFile, - "Cannot create new ZIP archive: " + path_); + if (isZip64_) + { + pimpl_->file_ = zipOpen64(path_.c_str(), mode); + } + else + { + pimpl_->file_ = zipOpen(path_.c_str(), mode); + } + + if (!pimpl_->file_) + { + throw OrthancException(ErrorCode_CannotWriteFile, + "Cannot create new ZIP archive: " + path_); + } } } @@ -268,4 +650,27 @@ { return isZip64_; } + + + void ZipWriter::AcquireOutputStream(IOutputStream* stream) + { + std::unique_ptr<IOutputStream> protection(stream); + + if (stream == NULL) + { + throw OrthancException(ErrorCode_NullPointer); + } + else + { + Close(); + path_.clear(); + outputStream_.reset(protection.release()); + } + } + + + void ZipWriter::SetMemoryOutput(std::string& target) + { + AcquireOutputStream(new MemoryStream(target)); + } }
--- a/OrthancFramework/Sources/Compression/ZipWriter.h Fri May 28 11:40:47 2021 +0200 +++ b/OrthancFramework/Sources/Compression/ZipWriter.h Fri May 28 18:26:40 2021 +0200 @@ -32,6 +32,13 @@ # error ZLIB support must be enabled to include this file #endif +#if ORTHANC_BUILD_UNIT_TESTS == 1 +# include <gtest/gtest_prod.h> +#endif + +#include "../ChunkedBuffer.h" +#include "../Compatibility.h" + #include <stdint.h> #include <string> @@ -42,7 +49,75 @@ { class ORTHANC_PUBLIC ZipWriter : public boost::noncopyable { +#if ORTHANC_BUILD_UNIT_TESTS == 1 + FRIEND_TEST(ZipWriter, BufferWithSeek); +#endif + + public: + // New in Orthanc 1.9.4 + class ORTHANC_PUBLIC IOutputStream : public boost::noncopyable + { + public: + virtual ~IOutputStream() + { + } + + virtual void Write(const std::string& chunk) = 0; + + virtual void Close() = 0; + }; + + + // The lifetime of the "target" buffer must be larger than that of ZipWriter + class ORTHANC_PUBLIC MemoryStream : public IOutputStream + { + private: + std::string& target_; + ChunkedBuffer chunked_; + + public: + MemoryStream(std::string& target); + + virtual void Write(const std::string& chunk) ORTHANC_OVERRIDE; + + virtual void Close() ORTHANC_OVERRIDE; + }; + + private: + // This class is only public for unit tests + class ORTHANC_PUBLIC BufferWithSeek : public boost::noncopyable + { + private: + size_t currentPosition_; + ChunkedBuffer chunks_; + std::string flattened_; + + void CheckInvariants() const; + + public: + BufferWithSeek(); + + ~BufferWithSeek(); + + size_t GetPosition() const; + + size_t GetSize() const; + + void Write(const void* data, + size_t size); + + void Write(const std::string& data); + + void Seek(size_t position); + + void Flush(std::string& target); + }; + + + private: + class StreamBuffer; + struct PImpl; boost::shared_ptr<PImpl> pimpl_; @@ -52,6 +127,8 @@ uint8_t compressionLevel_; std::string path_; + std::unique_ptr<IOutputStream> outputStream_; + public: ZipWriter(); @@ -84,5 +161,10 @@ void Write(const void* data, size_t length); void Write(const std::string& data); + + void AcquireOutputStream(IOutputStream* stream); // transfers ownership + + // The lifetime of the "target" buffer must be larger than that of ZipWriter + void SetMemoryOutput(std::string& target); }; }
--- a/OrthancFramework/UnitTestsSources/ZipTests.cpp Fri May 28 11:40:47 2021 +0200 +++ b/OrthancFramework/UnitTestsSources/ZipTests.cpp Fri May 28 18:26:40 2021 +0200 @@ -208,3 +208,170 @@ ASSERT_FALSE(reader->ReadNextFile(filename, content)); } + + +TEST(ZipWriter, Stream) +{ + std::string memory; + + std::string large; + large.resize(4 * 65536); + for (size_t i = 0; i < large.size(); i++) + { + large[i] = rand() % 256; + } + + for (int i = 0; i < 2; i++) + { + { + Orthanc::ZipWriter w; + w.SetZip64(i == 0); + w.SetMemoryOutput(memory); + w.Open(); + + w.OpenFile("world/hello"); + w.Write(large); + w.OpenFile("world/hello2"); + w.Write(large); + w.OpenFile("world/hello3"); + w.Write("Hello world"); + w.OpenFile("world/hello4"); + w.Write(large); + } + + std::unique_ptr<ZipReader> reader(ZipReader::CreateFromMemory(memory)); + + ASSERT_EQ(4u, reader->GetFilesCount()); + + { + std::string filename, content; + ASSERT_TRUE(reader->ReadNextFile(filename, content)); + ASSERT_EQ("world/hello", filename); + ASSERT_EQ(large.size(), content.size()); + ASSERT_TRUE(memcmp(large.c_str(), content.c_str(), large.size()) == 0); + } + + { + std::string filename, content; + ASSERT_TRUE(reader->ReadNextFile(filename, content)); + ASSERT_EQ("world/hello2", filename); + ASSERT_EQ(large.size(), content.size()); + ASSERT_TRUE(memcmp(large.c_str(), content.c_str(), large.size()) == 0); + } + + { + std::string filename, content; + ASSERT_TRUE(reader->ReadNextFile(filename, content)); + ASSERT_EQ("world/hello3", filename); + ASSERT_EQ("Hello world", content); + } + + { + std::string filename, content; + ASSERT_TRUE(reader->ReadNextFile(filename, content)); + ASSERT_EQ("world/hello4", filename); + ASSERT_EQ(large.size(), content.size()); + ASSERT_TRUE(memcmp(large.c_str(), content.c_str(), large.size()) == 0); + } + + { + std::string filename, content; + ASSERT_FALSE(reader->ReadNextFile(filename, content)); + } + } +} + + +namespace Orthanc +{ + // The namespace is necessary because of FRIEND_TEST + // http://code.google.com/p/googletest/wiki/AdvancedGuide#Private_Class_Members + + TEST(ZipWriter, BufferWithSeek) + { + ZipWriter::BufferWithSeek buffer; + ASSERT_EQ(0u, buffer.GetSize()); + + std::string s; + buffer.Flush(s); + ASSERT_TRUE(s.empty()); + + buffer.Write("hello"); + ASSERT_EQ(5u, buffer.GetSize()); + ASSERT_EQ(5u, buffer.GetPosition()); + buffer.Write("world"); + ASSERT_EQ(10u, buffer.GetSize()); + ASSERT_EQ(10u, buffer.GetPosition()); + buffer.Flush(s); + ASSERT_EQ("helloworld", s); + ASSERT_EQ(0u, buffer.GetSize()); + ASSERT_EQ(0u, buffer.GetPosition()); + + buffer.Write("hello world"); + buffer.Seek(4); + ASSERT_EQ(4u, buffer.GetPosition()); + buffer.Write("ab"); + ASSERT_EQ(6u, buffer.GetPosition()); + buffer.Flush(s); + ASSERT_EQ("hellabworld", s); + ASSERT_EQ(0u, buffer.GetPosition()); + + buffer.Seek(0); + ASSERT_EQ(0u, buffer.GetPosition()); + buffer.Write("abc"); + buffer.Write(""); + ASSERT_EQ(3u, buffer.GetPosition()); + buffer.Seek(3); + ASSERT_THROW(buffer.Seek(4), OrthancException); + ASSERT_EQ(3u, buffer.GetPosition()); + buffer.Write("de"); + buffer.Write(""); + ASSERT_EQ(5u, buffer.GetPosition()); + buffer.Seek(3); + buffer.Seek(3); + ASSERT_EQ(3u, buffer.GetPosition()); + ASSERT_THROW(buffer.Write("def"), OrthancException); + buffer.Write(""); + ASSERT_EQ(3u, buffer.GetPosition()); + buffer.Write("fg"); + ASSERT_EQ(5u, buffer.GetPosition()); + buffer.Write("hi"); + ASSERT_EQ(7u, buffer.GetPosition()); + buffer.Flush(s); + ASSERT_EQ("abcfghi", s); + ASSERT_EQ(0u, buffer.GetPosition()); + + buffer.Write("abc"); + ASSERT_EQ(3u, buffer.GetPosition()); + buffer.Seek(2); + ASSERT_EQ(2u, buffer.GetPosition()); + buffer.Write("z"); + ASSERT_EQ(3u, buffer.GetPosition()); + buffer.Seek(1); + ASSERT_EQ(1u, buffer.GetPosition()); + buffer.Write("y"); + ASSERT_EQ(2u, buffer.GetPosition()); + buffer.Flush(s); + ASSERT_EQ("ayz", s); + ASSERT_EQ(0u, buffer.GetPosition()); + + ASSERT_EQ(0u, buffer.GetPosition()); + buffer.Write("abc"); + ASSERT_EQ(3u, buffer.GetPosition()); + buffer.Seek(1); + ASSERT_EQ(1u, buffer.GetPosition()); + buffer.Write("z"); + ASSERT_EQ(2u, buffer.GetPosition()); + buffer.Seek(3); + ASSERT_EQ(3u, buffer.GetPosition()); + buffer.Write("y"); + ASSERT_EQ(4u, buffer.GetPosition()); + buffer.Flush(s); + ASSERT_EQ("azcy", s); + ASSERT_EQ(0u, buffer.GetPosition()); + + buffer.Flush(s); + ASSERT_TRUE(s.empty()); + ASSERT_EQ(0u, buffer.GetPosition()); + } +}
--- a/OrthancServer/UnitTestsSources/SizeOfTests.impl.h Fri May 28 11:40:47 2021 +0200 +++ b/OrthancServer/UnitTestsSources/SizeOfTests.impl.h Fri May 28 18:26:40 2021 +0200 @@ -111,4 +111,6 @@ printf("sizeof(::Orthanc::WebServiceParameters) == %d\n", static_cast<int>(sizeof(::Orthanc::WebServiceParameters))); printf("sizeof(::Orthanc::ZipReader) == %d\n", static_cast<int>(sizeof(::Orthanc::ZipReader))); printf("sizeof(::Orthanc::ZipWriter) == %d\n", static_cast<int>(sizeof(::Orthanc::ZipWriter))); + printf("sizeof(::Orthanc::ZipWriter::IOutputStream) == %d\n", static_cast<int>(sizeof(::Orthanc::ZipWriter::IOutputStream))); + printf("sizeof(::Orthanc::ZipWriter::MemoryStream) == %d\n", static_cast<int>(sizeof(::Orthanc::ZipWriter::MemoryStream))); printf("sizeof(::Orthanc::ZlibCompressor) == %d\n", static_cast<int>(sizeof(::Orthanc::ZlibCompressor)));