comparison OrthancFramework/Sources/HttpServer/MultipartStreamReader.cpp @ 4044:d25f4c0fa160 framework

splitting code into OrthancFramework and OrthancServer
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 10 Jun 2020 20:30:34 +0200
parents Core/HttpServer/MultipartStreamReader.cpp@94f4a18a79cc
children bf7b9edf6b81
comparison
equal deleted inserted replaced
4043:6c6239aec462 4044:d25f4c0fa160
1 /**
2 * Orthanc - A Lightweight, RESTful DICOM Store
3 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
4 * Department, University Hospital of Liege, Belgium
5 * Copyright (C) 2017-2020 Osimis S.A., Belgium
6 *
7 * This program is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
11 *
12 * In addition, as a special exception, the copyright holders of this
13 * program give permission to link the code of its release with the
14 * OpenSSL project's "OpenSSL" library (or with modified versions of it
15 * that use the same license as the "OpenSSL" library), and distribute
16 * the linked executables. You must obey the GNU General Public License
17 * in all respects for all of the code used other than "OpenSSL". If you
18 * modify file(s) with this exception, you may extend this exception to
19 * your version of the file(s), but you are not obligated to do so. If
20 * you do not wish to do so, delete this exception statement from your
21 * version. If you delete this exception statement from all source files
22 * in the program, then also delete it here.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 * General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program. If not, see <http://www.gnu.org/licenses/>.
31 **/
32
33
34 #include "../PrecompiledHeaders.h"
35 #include "MultipartStreamReader.h"
36
37 #include "../OrthancException.h"
38 #include "../Toolbox.h"
39
40 #include <boost/algorithm/string/predicate.hpp>
41
42 #if defined(_MSC_VER)
43 # include <BaseTsd.h> // Definition of ssize_t
44 #endif
45
46 namespace Orthanc
47 {
48 static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers,
49 StringMatcher::Iterator start,
50 StringMatcher::Iterator end)
51 {
52 std::string tmp(start, end);
53
54 std::vector<std::string> lines;
55 Toolbox::TokenizeString(lines, tmp, '\n');
56
57 headers.clear();
58
59 for (size_t i = 0; i < lines.size(); i++)
60 {
61 size_t separator = lines[i].find(':');
62 if (separator != std::string::npos)
63 {
64 std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator));
65 std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1));
66
67 Toolbox::ToLowerCase(key);
68 headers[key] = value;
69 }
70 }
71 }
72
73
74 static bool LookupHeaderSizeValue(size_t& target,
75 const MultipartStreamReader::HttpHeaders& headers,
76 const std::string& key)
77 {
78 MultipartStreamReader::HttpHeaders::const_iterator it = headers.find(key);
79 if (it == headers.end())
80 {
81 return false;
82 }
83 else
84 {
85 int64_t value;
86
87 try
88 {
89 value = boost::lexical_cast<int64_t>(it->second);
90 }
91 catch (boost::bad_lexical_cast&)
92 {
93 throw OrthancException(ErrorCode_ParameterOutOfRange);
94 }
95
96 if (value < 0)
97 {
98 throw OrthancException(ErrorCode_ParameterOutOfRange);
99 }
100 else
101 {
102 target = static_cast<size_t>(value);
103 return true;
104 }
105 }
106 }
107
108
109 void MultipartStreamReader::ParseStream()
110 {
111 if (handler_ == NULL ||
112 state_ == State_Done)
113 {
114 return;
115 }
116
117 std::string corpus;
118 buffer_.Flatten(corpus);
119
120 StringMatcher::Iterator current = corpus.begin();
121 StringMatcher::Iterator corpusEnd = corpus.end();
122
123 if (state_ == State_UnusedArea)
124 {
125 /**
126 * "Before the first boundary is an area that is ignored by
127 * MIME-compliant clients. This area is generally used to put
128 * a message to users of old non-MIME clients."
129 * https://en.wikipedia.org/wiki/MIME#Multipart_messages
130 **/
131
132 if (boundaryMatcher_.Apply(current, corpusEnd))
133 {
134 current = boundaryMatcher_.GetMatchBegin();
135 state_ = State_Content;
136 }
137 else
138 {
139 // We have not seen the end of the unused area yet
140 std::string reminder(current, corpusEnd);
141 buffer_.AddChunkDestructive(reminder);
142 return;
143 }
144 }
145
146 for (;;)
147 {
148 size_t patternSize = boundaryMatcher_.GetPattern().size();
149 size_t remainingSize = std::distance(current, corpusEnd);
150 if (remainingSize < patternSize + 2)
151 {
152 break; // Not enough data available
153 }
154
155 std::string boundary(current, current + patternSize + 2);
156 if (boundary == boundaryMatcher_.GetPattern() + "--")
157 {
158 state_ = State_Done;
159 return;
160 }
161
162 if (boundary != boundaryMatcher_.GetPattern() + "\r\n")
163 {
164 throw OrthancException(ErrorCode_NetworkProtocol,
165 "Garbage between two items in a multipart stream");
166 }
167
168 StringMatcher::Iterator start = current + patternSize + 2;
169
170 if (!headersMatcher_.Apply(start, corpusEnd))
171 {
172 break; // Not enough data available
173 }
174
175 HttpHeaders headers;
176 ParseHeaders(headers, start, headersMatcher_.GetMatchBegin());
177
178 size_t contentLength = 0;
179 if (!LookupHeaderSizeValue(contentLength, headers, "content-length"))
180 {
181 if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd))
182 {
183 size_t d = std::distance(headersMatcher_.GetMatchEnd(), boundaryMatcher_.GetMatchBegin());
184 if (d <= 1)
185 {
186 throw OrthancException(ErrorCode_NetworkProtocol);
187 }
188 else
189 {
190 contentLength = d - 2;
191 }
192 }
193 else
194 {
195 break; // Not enough data available to have a full part
196 }
197 }
198
199 // Explicit conversion to avoid warning about signed vs. unsigned comparison
200 std::iterator_traits<StringMatcher::Iterator>::difference_type d = contentLength + 2;
201 if (d > std::distance(headersMatcher_.GetMatchEnd(), corpusEnd))
202 {
203 break; // Not enough data available to have a full part
204 }
205
206 const char* p = headersMatcher_.GetPointerEnd() + contentLength;
207 if (p[0] != '\r' ||
208 p[1] != '\n')
209 {
210 throw OrthancException(ErrorCode_NetworkProtocol,
211 "No endline at the end of a part");
212 }
213
214 handler_->HandlePart(headers, headersMatcher_.GetPointerEnd(), contentLength);
215 current = headersMatcher_.GetMatchEnd() + contentLength + 2;
216 }
217
218 if (current != corpusEnd)
219 {
220 std::string reminder(current, corpusEnd);
221 buffer_.AddChunkDestructive(reminder);
222 }
223 }
224
225
226 MultipartStreamReader::MultipartStreamReader(const std::string& boundary) :
227 state_(State_UnusedArea),
228 handler_(NULL),
229 headersMatcher_("\r\n\r\n"),
230 boundaryMatcher_("--" + boundary),
231 blockSize_(10 * 1024 * 1024)
232 {
233 }
234
235
236 void MultipartStreamReader::SetBlockSize(size_t size)
237 {
238 if (size == 0)
239 {
240 throw OrthancException(ErrorCode_ParameterOutOfRange);
241 }
242 else
243 {
244 blockSize_ = size;
245 }
246 }
247
248
249 void MultipartStreamReader::AddChunk(const void* chunk,
250 size_t size)
251 {
252 if (state_ != State_Done &&
253 size != 0)
254 {
255 size_t oldSize = buffer_.GetNumBytes();
256
257 buffer_.AddChunk(chunk, size);
258
259 if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_)
260 {
261 ParseStream();
262 }
263 }
264 }
265
266
267 void MultipartStreamReader::AddChunk(const std::string& chunk)
268 {
269 if (!chunk.empty())
270 {
271 AddChunk(chunk.c_str(), chunk.size());
272 }
273 }
274
275
276 void MultipartStreamReader::CloseStream()
277 {
278 if (buffer_.GetNumBytes() != 0)
279 {
280 ParseStream();
281 }
282 }
283
284
285 bool MultipartStreamReader::GetMainContentType(std::string& contentType,
286 const HttpHeaders& headers)
287 {
288 HttpHeaders::const_iterator it = headers.find("content-type");
289
290 if (it == headers.end())
291 {
292 return false;
293 }
294 else
295 {
296 contentType = it->second;
297 return true;
298 }
299 }
300
301
302 bool MultipartStreamReader::ParseMultipartContentType(std::string& contentType,
303 std::string& subType,
304 std::string& boundary,
305 const std::string& contentTypeHeader)
306 {
307 std::vector<std::string> tokens;
308 Orthanc::Toolbox::TokenizeString(tokens, contentTypeHeader, ';');
309
310 if (tokens.empty())
311 {
312 return false;
313 }
314
315 contentType = Orthanc::Toolbox::StripSpaces(tokens[0]);
316 Orthanc::Toolbox::ToLowerCase(contentType);
317
318 if (contentType.empty())
319 {
320 return false;
321 }
322
323 bool valid = false;
324 subType.clear();
325
326 for (size_t i = 0; i < tokens.size(); i++)
327 {
328 std::vector<std::string> items;
329 Orthanc::Toolbox::TokenizeString(items, tokens[i], '=');
330
331 if (items.size() == 2)
332 {
333 if (boost::iequals("boundary", Orthanc::Toolbox::StripSpaces(items[0])))
334 {
335 boundary = Orthanc::Toolbox::StripSpaces(items[1]);
336 valid = !boundary.empty();
337 }
338 else if (boost::iequals("type", Orthanc::Toolbox::StripSpaces(items[0])))
339 {
340 subType = Orthanc::Toolbox::StripSpaces(items[1]);
341 Orthanc::Toolbox::ToLowerCase(subType);
342
343 // https://bitbucket.org/sjodogne/orthanc/issues/54/decide-what-to-do-wrt-quoting-of-multipart
344 // https://tools.ietf.org/html/rfc7231#section-3.1.1.1
345 if (subType.size() >= 2 &&
346 subType[0] == '"' &&
347 subType[subType.size() - 1] == '"')
348 {
349 subType = subType.substr(1, subType.size() - 2);
350 }
351 }
352 }
353 }
354
355 return valid;
356 }
357 }