comparison Core/HttpServer/MultipartStreamReader.cpp @ 3398:4acd1431e603

new classes: StringMatcher and MultipartStreamReader
author Sebastien Jodogne <s.jodogne@gmail.com>
date Fri, 07 Jun 2019 13:36:43 +0200
parents
children 4e8205871967
comparison
equal deleted inserted replaced
3397:9019279dbfd7 3398:4acd1431e603
1 /**
2 * Orthanc - A Lightweight, RESTful DICOM Store
3 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
4 * Department, University Hospital of Liege, Belgium
5 * Copyright (C) 2017-2019 Osimis S.A., Belgium
6 *
7 * This program is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation, either version 3 of the
10 * License, or (at your option) any later version.
11 *
12 * In addition, as a special exception, the copyright holders of this
13 * program give permission to link the code of its release with the
14 * OpenSSL project's "OpenSSL" library (or with modified versions of it
15 * that use the same license as the "OpenSSL" library), and distribute
16 * the linked executables. You must obey the GNU General Public License
17 * in all respects for all of the code used other than "OpenSSL". If you
18 * modify file(s) with this exception, you may extend this exception to
19 * your version of the file(s), but you are not obligated to do so. If
20 * you do not wish to do so, delete this exception statement from your
21 * version. If you delete this exception statement from all source files
22 * in the program, then also delete it here.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 * General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program. If not, see <http://www.gnu.org/licenses/>.
31 **/
32
33
34 #include "../PrecompiledHeaders.h"
35 #include "MultipartStreamReader.h"
36
37 #include "../OrthancException.h"
38 #include "../Toolbox.h"
39
40 #include <boost/algorithm/string/predicate.hpp>
41
42 namespace Orthanc
43 {
44 static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers,
45 StringMatcher::Iterator start,
46 StringMatcher::Iterator end)
47 {
48 std::string tmp(start, end);
49
50 std::vector<std::string> lines;
51 Toolbox::TokenizeString(lines, tmp, '\n');
52
53 headers.clear();
54
55 for (size_t i = 0; i < lines.size(); i++)
56 {
57 size_t separator = lines[i].find(':');
58 if (separator != std::string::npos)
59 {
60 std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator));
61 std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1));
62
63 Toolbox::ToLowerCase(key);
64 headers[key] = value;
65 }
66 }
67 }
68
69
70 static bool LookupHeaderSizeValue(size_t& target,
71 const MultipartStreamReader::HttpHeaders& headers,
72 const std::string& key)
73 {
74 MultipartStreamReader::HttpHeaders::const_iterator it = headers.find(key);
75 if (it == headers.end())
76 {
77 return false;
78 }
79 else
80 {
81 int64_t value;
82
83 try
84 {
85 value = boost::lexical_cast<int64_t>(it->second);
86 }
87 catch (boost::bad_lexical_cast&)
88 {
89 throw OrthancException(ErrorCode_ParameterOutOfRange);
90 }
91
92 if (value < 0)
93 {
94 throw OrthancException(ErrorCode_ParameterOutOfRange);
95 }
96 else
97 {
98 target = static_cast<size_t>(value);
99 return true;
100 }
101 }
102 }
103
104
105 void MultipartStreamReader::ParseStream()
106 {
107 if (handler_ == NULL ||
108 state_ == State_Done)
109 {
110 return;
111 }
112
113 std::string corpus;
114 buffer_.Flatten(corpus);
115
116 StringMatcher::Iterator current = corpus.begin();
117 StringMatcher::Iterator corpusEnd = corpus.end();
118
119 if (state_ == State_UnusedArea)
120 {
121 /**
122 * "Before the first boundary is an area that is ignored by
123 * MIME-compliant clients. This area is generally used to put
124 * a message to users of old non-MIME clients."
125 * https://en.wikipedia.org/wiki/MIME#Multipart_messages
126 **/
127
128 if (boundaryMatcher_.Apply(current, corpusEnd))
129 {
130 current = boundaryMatcher_.GetMatchBegin();
131 state_ = State_Content;
132 }
133 else
134 {
135 // We have not seen the end of the unused area yet
136 std::string reminder(current, corpusEnd);
137 buffer_.AddChunkDestructive(reminder);
138 return;
139 }
140 }
141
142 for (;;)
143 {
144 size_t patternSize = boundaryMatcher_.GetPattern().size();
145 size_t remainingSize = std::distance(current, corpusEnd);
146 if (remainingSize < patternSize + 2)
147 {
148 break; // Not enough data available
149 }
150
151 std::string boundary(current, current + patternSize + 2);
152 if (boundary == boundaryMatcher_.GetPattern() + "--")
153 {
154 state_ = State_Done;
155 return;
156 }
157
158 if (boundary != boundaryMatcher_.GetPattern() + "\r\n")
159 {
160 throw OrthancException(ErrorCode_NetworkProtocol,
161 "Garbage between two items in a multipart stream");
162 }
163
164 StringMatcher::Iterator start = current + patternSize + 2;
165
166 if (!headersMatcher_.Apply(start, corpusEnd))
167 {
168 break; // Not enough data available
169 }
170
171 HttpHeaders headers;
172 ParseHeaders(headers, start, headersMatcher_.GetMatchBegin());
173
174 size_t contentLength;
175 if (!LookupHeaderSizeValue(contentLength, headers, "content-length"))
176 {
177 if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd))
178 {
179 size_t d = std::distance(headersMatcher_.GetMatchEnd(), boundaryMatcher_.GetMatchBegin());
180 if (d <= 1)
181 {
182 throw OrthancException(ErrorCode_NetworkProtocol);
183 }
184 else
185 {
186 contentLength = d - 2;
187 }
188 }
189 else
190 {
191 break; // Not enough data available to have a full part
192 }
193 }
194
195 if (headersMatcher_.GetMatchEnd() + contentLength + 2 > corpusEnd)
196 {
197 break; // Not enough data available to have a full part
198 }
199
200 const char* p = headersMatcher_.GetPointerEnd() + contentLength;
201 if (p[0] != '\r' ||
202 p[1] != '\n')
203 {
204 throw OrthancException(ErrorCode_NetworkProtocol,
205 "No endline at the end of a part");
206 }
207
208 handler_->Apply(headers, headersMatcher_.GetPointerEnd(), contentLength);
209 current = headersMatcher_.GetMatchEnd() + contentLength + 2;
210 }
211
212 if (current != corpusEnd)
213 {
214 std::string reminder(current, corpusEnd);
215 buffer_.AddChunkDestructive(reminder);
216 }
217 }
218
219
220 MultipartStreamReader::MultipartStreamReader(const std::string& boundary) :
221 state_(State_UnusedArea),
222 handler_(NULL),
223 headersMatcher_("\r\n\r\n"),
224 boundaryMatcher_("--" + boundary),
225 blockSize_(10 * 1024 * 1024)
226 {
227 }
228
229
230 void MultipartStreamReader::SetBlockSize(size_t size)
231 {
232 if (size == 0)
233 {
234 throw OrthancException(ErrorCode_ParameterOutOfRange);
235 }
236 else
237 {
238 blockSize_ = size;
239 }
240 }
241
242
243 void MultipartStreamReader::AddChunk(const void* chunk,
244 size_t size)
245 {
246 if (state_ != State_Done &&
247 size != 0)
248 {
249 size_t oldSize = buffer_.GetNumBytes();
250
251 buffer_.AddChunk(chunk, size);
252
253 if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_)
254 {
255 ParseStream();
256 }
257 }
258 }
259
260
261 void MultipartStreamReader::AddChunk(const std::string& chunk)
262 {
263 if (!chunk.empty())
264 {
265 AddChunk(chunk.c_str(), chunk.size());
266 }
267 }
268
269
270 void MultipartStreamReader::CloseStream()
271 {
272 if (buffer_.GetNumBytes() != 0)
273 {
274 ParseStream();
275 }
276 }
277
278
279 bool MultipartStreamReader::GetMainContentType(std::string& contentType,
280 const HttpHeaders& headers)
281 {
282 HttpHeaders::const_iterator it = headers.find("content-type");
283
284 if (it == headers.end())
285 {
286 return false;
287 }
288 else
289 {
290 contentType = it->second;
291 return true;
292 }
293 }
294
295
296 bool MultipartStreamReader::ParseMultipartHeaders(std::string& contentType,
297 std::string& subType,
298 std::string& boundary,
299 const HttpHeaders& headers)
300 {
301 std::string tmp;
302 if (!GetMainContentType(tmp, headers))
303 {
304 return false;
305 }
306
307 std::vector<std::string> tokens;
308 Orthanc::Toolbox::TokenizeString(tokens, tmp, ';');
309
310 if (tokens.empty())
311 {
312 return false;
313 }
314
315 contentType = Orthanc::Toolbox::StripSpaces(tokens[0]);
316 Orthanc::Toolbox::ToLowerCase(contentType);
317
318 if (contentType.empty())
319 {
320 return false;
321 }
322
323 bool valid = false;
324 subType.clear();
325
326 for (size_t i = 0; i < tokens.size(); i++)
327 {
328 std::vector<std::string> items;
329 Orthanc::Toolbox::TokenizeString(items, tokens[i], '=');
330
331 if (items.size() == 2)
332 {
333 if (boost::iequals("boundary", Orthanc::Toolbox::StripSpaces(items[0])))
334 {
335 boundary = Orthanc::Toolbox::StripSpaces(items[1]);
336 valid = !boundary.empty();
337 }
338 else if (boost::iequals("type", Orthanc::Toolbox::StripSpaces(items[0])))
339 {
340 subType = Orthanc::Toolbox::StripSpaces(items[1]);
341 Orthanc::Toolbox::ToLowerCase(subType);
342
343 // https://bitbucket.org/sjodogne/orthanc/issues/54/decide-what-to-do-wrt-quoting-of-multipart
344 // https://tools.ietf.org/html/rfc7231#section-3.1.1.1
345 if (subType.size() >= 2 &&
346 subType[0] == '"' &&
347 subType[subType.size() - 1] == '"')
348 {
349 subType = subType.substr(1, subType.size() - 2);
350 }
351 }
352 }
353 }
354
355 return valid;
356 }
357 }