Mercurial > hg > orthanc
comparison Core/Compression/GzipCompressor.cpp @ 1513:fe07f82d83d3
gzip compression
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Mon, 10 Aug 2015 16:01:37 +0200 |
parents | |
children | 33d34bc4ac15 |
comparison
equal
deleted
inserted
replaced
1512:52dc56bcec7d | 1513:fe07f82d83d3 |
---|---|
1 /** | |
2 * Orthanc - A Lightweight, RESTful DICOM Store | |
3 * Copyright (C) 2012-2015 Sebastien Jodogne, Medical Physics | |
4 * Department, University Hospital of Liege, Belgium | |
5 * | |
6 * This program is free software: you can redistribute it and/or | |
7 * modify it under the terms of the GNU General Public License as | |
8 * published by the Free Software Foundation, either version 3 of the | |
9 * License, or (at your option) any later version. | |
10 * | |
11 * In addition, as a special exception, the copyright holders of this | |
12 * program give permission to link the code of its release with the | |
13 * OpenSSL project's "OpenSSL" library (or with modified versions of it | |
14 * that use the same license as the "OpenSSL" library), and distribute | |
15 * the linked executables. You must obey the GNU General Public License | |
16 * in all respects for all of the code used other than "OpenSSL". If you | |
17 * modify file(s) with this exception, you may extend this exception to | |
18 * your version of the file(s), but you are not obligated to do so. If | |
19 * you do not wish to do so, delete this exception statement from your | |
20 * version. If you delete this exception statement from all source files | |
21 * in the program, then also delete it here. | |
22 * | |
23 * This program is distributed in the hope that it will be useful, but | |
24 * WITHOUT ANY WARRANTY; without even the implied warranty of | |
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
26 * General Public License for more details. | |
27 * | |
28 * You should have received a copy of the GNU General Public License | |
29 * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
30 **/ | |
31 | |
32 | |
33 #include "../PrecompiledHeaders.h" | |
34 #include "GzipCompressor.h" | |
35 | |
36 #include <stdio.h> | |
37 #include <string.h> | |
38 #include <zlib.h> | |
39 | |
40 #include "../OrthancException.h" | |
41 #include "../Logging.h" | |
42 | |
43 namespace Orthanc | |
44 { | |
45 uint64_t GzipCompressor::GuessUncompressedSize(const void* compressed, | |
46 size_t compressedSize) | |
47 { | |
48 /** | |
49 * "Is there a way to find out the size of the original file which | |
50 * is inside a GZIP file? [...] There is no truly reliable way, | |
51 * other than gunzipping the stream. You do not need to save the | |
52 * result of the decompression, so you can determine the size by | |
53 * simply reading and decoding the entire file without taking up | |
54 * space with the decompressed result. | |
55 * | |
56 * There is an unreliable way to determine the uncompressed size, | |
57 * which is to look at the last four bytes of the gzip file, which | |
58 * is the uncompressed length of that entry modulo 232 in little | |
59 * endian order. | |
60 * | |
61 * It is unreliable because a) the uncompressed data may be longer | |
62 * than 2^32 bytes, and b) the gzip file may consist of multiple | |
63 * gzip streams, in which case you would find the length of only | |
64 * the last of those streams. | |
65 * | |
66 * If you are in control of the source of the gzip files, you know | |
67 * that they consist of single gzip streams, and you know that | |
68 * they are less than 2^32 bytes uncompressed, then and only then | |
69 * can you use those last four bytes with confidence." | |
70 * | |
71 * http://stackoverflow.com/a/9727599/881731 | |
72 **/ | |
73 | |
74 if (compressedSize < 4) | |
75 { | |
76 throw OrthancException(ErrorCode_BadFileFormat); | |
77 } | |
78 | |
79 const uint8_t* p = reinterpret_cast<const uint8_t*>(compressed) + compressedSize - 4; | |
80 | |
81 return ((static_cast<uint32_t>(p[0]) << 0) + | |
82 (static_cast<uint32_t>(p[1]) << 8) + | |
83 (static_cast<uint32_t>(p[2]) << 16) + | |
84 (static_cast<uint32_t>(p[3]) << 24)); | |
85 } | |
86 | |
87 | |
88 | |
89 void GzipCompressor::Compress(std::string& compressed, | |
90 const void* uncompressed, | |
91 size_t uncompressedSize) | |
92 { | |
93 uLongf compressedSize = compressBound(uncompressedSize) + 1024 /* security margin */; | |
94 if (compressedSize == 0) | |
95 { | |
96 compressedSize = 1; | |
97 } | |
98 | |
99 uint8_t* target; | |
100 if (HasPrefixWithUncompressedSize()) | |
101 { | |
102 compressed.resize(compressedSize + sizeof(uint64_t)); | |
103 target = reinterpret_cast<uint8_t*>(&compressed[0]) + sizeof(uint64_t); | |
104 } | |
105 else | |
106 { | |
107 compressed.resize(compressedSize); | |
108 target = reinterpret_cast<uint8_t*>(&compressed[0]); | |
109 } | |
110 | |
111 z_stream stream; | |
112 memset(&stream, 0, sizeof(stream)); | |
113 | |
114 stream.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(uncompressed)); | |
115 stream.next_out = reinterpret_cast<Bytef*>(target); | |
116 | |
117 stream.avail_in = static_cast<uInt>(uncompressedSize); | |
118 stream.avail_out = static_cast<uInt>(compressedSize); | |
119 | |
120 // Ensure no overflow (if the buffer is too large for the current archicture) | |
121 if (static_cast<size_t>(stream.avail_in) != uncompressedSize || | |
122 static_cast<size_t>(stream.avail_out) != compressedSize) | |
123 { | |
124 throw OrthancException(ErrorCode_NotEnoughMemory); | |
125 } | |
126 | |
127 // Initialize the compression engine | |
128 int error = deflateInit2(&stream, | |
129 GetCompressionLevel(), | |
130 Z_DEFLATED, | |
131 MAX_WBITS + 16, // ask for gzip output | |
132 8, // default memory level | |
133 Z_DEFAULT_STRATEGY); | |
134 | |
135 if (error != Z_OK) | |
136 { | |
137 // Cannot initialize zlib | |
138 compressed.clear(); | |
139 throw OrthancException(ErrorCode_InternalError); | |
140 } | |
141 | |
142 // Compress the input buffer | |
143 error = deflate(&stream, Z_FINISH); | |
144 | |
145 if (error != Z_STREAM_END) | |
146 { | |
147 deflateEnd(&stream); | |
148 compressed.clear(); | |
149 | |
150 switch (error) | |
151 { | |
152 case Z_MEM_ERROR: | |
153 throw OrthancException(ErrorCode_NotEnoughMemory); | |
154 | |
155 default: | |
156 throw OrthancException(ErrorCode_InternalError); | |
157 } | |
158 } | |
159 | |
160 size_t size = stream.total_out; | |
161 | |
162 if (deflateEnd(&stream) != Z_OK) | |
163 { | |
164 throw OrthancException(ErrorCode_InternalError); | |
165 } | |
166 | |
167 // The compression was successful | |
168 if (HasPrefixWithUncompressedSize()) | |
169 { | |
170 uint64_t s = static_cast<uint64_t>(uncompressedSize); | |
171 memcpy(&compressed[0], &s, sizeof(uint64_t)); | |
172 compressed.resize(size + sizeof(uint64_t)); | |
173 } | |
174 else | |
175 { | |
176 compressed.resize(size); | |
177 } | |
178 } | |
179 | |
180 | |
181 void GzipCompressor::Uncompress(std::string& uncompressed, | |
182 const void* compressed, | |
183 size_t compressedSize) | |
184 { | |
185 uint64_t uncompressedSize; | |
186 const uint8_t* source = reinterpret_cast<const uint8_t*>(compressed); | |
187 | |
188 if (HasPrefixWithUncompressedSize()) | |
189 { | |
190 uncompressedSize = ReadUncompressedSizePrefix(compressed, compressedSize); | |
191 source += sizeof(uint64_t); | |
192 compressedSize -= sizeof(uint64_t); | |
193 } | |
194 else | |
195 { | |
196 uncompressedSize = GuessUncompressedSize(compressed, compressedSize); | |
197 } | |
198 | |
199 try | |
200 { | |
201 uncompressed.resize(uncompressedSize); | |
202 } | |
203 catch (...) | |
204 { | |
205 throw OrthancException(ErrorCode_NotEnoughMemory); | |
206 } | |
207 | |
208 z_stream stream; | |
209 memset(&stream, 0, sizeof(stream)); | |
210 | |
211 char dummy = '\0'; // zlib does not like NULL output buffers (even if the uncompressed data is empty) | |
212 stream.next_in = const_cast<Bytef*>(source); | |
213 stream.next_out = reinterpret_cast<Bytef*>(uncompressedSize == 0 ? &dummy : &uncompressed[0]); | |
214 | |
215 stream.avail_in = static_cast<uInt>(compressedSize); | |
216 stream.avail_out = static_cast<uInt>(uncompressedSize); | |
217 | |
218 // Ensure no overflow (if the buffer is too large for the current archicture) | |
219 if (static_cast<size_t>(stream.avail_in) != compressedSize || | |
220 static_cast<size_t>(stream.avail_out) != uncompressedSize) | |
221 { | |
222 throw OrthancException(ErrorCode_NotEnoughMemory); | |
223 } | |
224 | |
225 // Initialize the compression engine | |
226 int error = inflateInit2(&stream, | |
227 MAX_WBITS + 16); // this is a gzip input | |
228 | |
229 if (error != Z_OK) | |
230 { | |
231 // Cannot initialize zlib | |
232 uncompressed.clear(); | |
233 throw OrthancException(ErrorCode_InternalError); | |
234 } | |
235 | |
236 // Uncompress the input buffer | |
237 error = inflate(&stream, Z_FINISH); | |
238 | |
239 if (error != Z_STREAM_END) | |
240 { | |
241 inflateEnd(&stream); | |
242 uncompressed.clear(); | |
243 | |
244 switch (error) | |
245 { | |
246 case Z_MEM_ERROR: | |
247 throw OrthancException(ErrorCode_NotEnoughMemory); | |
248 | |
249 case Z_BUF_ERROR: | |
250 case Z_NEED_DICT: | |
251 throw OrthancException(ErrorCode_BadFileFormat); | |
252 | |
253 default: | |
254 throw OrthancException(ErrorCode_InternalError); | |
255 } | |
256 } | |
257 | |
258 size_t size = stream.total_out; | |
259 | |
260 if (inflateEnd(&stream) != Z_OK) | |
261 { | |
262 uncompressed.clear(); | |
263 throw OrthancException(ErrorCode_InternalError); | |
264 } | |
265 | |
266 if (size != uncompressedSize) | |
267 { | |
268 uncompressed.clear(); | |
269 | |
270 // The uncompressed size was not that properly guess, presumably | |
271 // because of a file size over 4GB. Should fallback to | |
272 // stream-based decompression. | |
273 LOG(ERROR) << "The uncompressed size of a gzip-encoded buffer was not properly guessed"; | |
274 throw OrthancException(ErrorCode_NotImplemented); | |
275 } | |
276 } | |
277 } |