Mercurial > hg > orthanc
comparison Core/HttpServer/MultipartStreamReader.cpp @ 3398:4acd1431e603
new classes: StringMatcher and MultipartStreamReader
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Fri, 07 Jun 2019 13:36:43 +0200 |
parents | |
children | 4e8205871967 |
comparison
equal
deleted
inserted
replaced
3397:9019279dbfd7 | 3398:4acd1431e603 |
---|---|
1 /** | |
2 * Orthanc - A Lightweight, RESTful DICOM Store | |
3 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics | |
4 * Department, University Hospital of Liege, Belgium | |
5 * Copyright (C) 2017-2019 Osimis S.A., Belgium | |
6 * | |
7 * This program is free software: you can redistribute it and/or | |
8 * modify it under the terms of the GNU General Public License as | |
9 * published by the Free Software Foundation, either version 3 of the | |
10 * License, or (at your option) any later version. | |
11 * | |
12 * In addition, as a special exception, the copyright holders of this | |
13 * program give permission to link the code of its release with the | |
14 * OpenSSL project's "OpenSSL" library (or with modified versions of it | |
15 * that use the same license as the "OpenSSL" library), and distribute | |
16 * the linked executables. You must obey the GNU General Public License | |
17 * in all respects for all of the code used other than "OpenSSL". If you | |
18 * modify file(s) with this exception, you may extend this exception to | |
19 * your version of the file(s), but you are not obligated to do so. If | |
20 * you do not wish to do so, delete this exception statement from your | |
21 * version. If you delete this exception statement from all source files | |
22 * in the program, then also delete it here. | |
23 * | |
24 * This program is distributed in the hope that it will be useful, but | |
25 * WITHOUT ANY WARRANTY; without even the implied warranty of | |
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
27 * General Public License for more details. | |
28 * | |
29 * You should have received a copy of the GNU General Public License | |
30 * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
31 **/ | |
32 | |
33 | |
34 #include "../PrecompiledHeaders.h" | |
35 #include "MultipartStreamReader.h" | |
36 | |
37 #include "../OrthancException.h" | |
38 #include "../Toolbox.h" | |
39 | |
40 #include <boost/algorithm/string/predicate.hpp> | |
41 | |
42 namespace Orthanc | |
43 { | |
44 static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers, | |
45 StringMatcher::Iterator start, | |
46 StringMatcher::Iterator end) | |
47 { | |
48 std::string tmp(start, end); | |
49 | |
50 std::vector<std::string> lines; | |
51 Toolbox::TokenizeString(lines, tmp, '\n'); | |
52 | |
53 headers.clear(); | |
54 | |
55 for (size_t i = 0; i < lines.size(); i++) | |
56 { | |
57 size_t separator = lines[i].find(':'); | |
58 if (separator != std::string::npos) | |
59 { | |
60 std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator)); | |
61 std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1)); | |
62 | |
63 Toolbox::ToLowerCase(key); | |
64 headers[key] = value; | |
65 } | |
66 } | |
67 } | |
68 | |
69 | |
70 static bool LookupHeaderSizeValue(size_t& target, | |
71 const MultipartStreamReader::HttpHeaders& headers, | |
72 const std::string& key) | |
73 { | |
74 MultipartStreamReader::HttpHeaders::const_iterator it = headers.find(key); | |
75 if (it == headers.end()) | |
76 { | |
77 return false; | |
78 } | |
79 else | |
80 { | |
81 int64_t value; | |
82 | |
83 try | |
84 { | |
85 value = boost::lexical_cast<int64_t>(it->second); | |
86 } | |
87 catch (boost::bad_lexical_cast&) | |
88 { | |
89 throw OrthancException(ErrorCode_ParameterOutOfRange); | |
90 } | |
91 | |
92 if (value < 0) | |
93 { | |
94 throw OrthancException(ErrorCode_ParameterOutOfRange); | |
95 } | |
96 else | |
97 { | |
98 target = static_cast<size_t>(value); | |
99 return true; | |
100 } | |
101 } | |
102 } | |
103 | |
104 | |
105 void MultipartStreamReader::ParseStream() | |
106 { | |
107 if (handler_ == NULL || | |
108 state_ == State_Done) | |
109 { | |
110 return; | |
111 } | |
112 | |
113 std::string corpus; | |
114 buffer_.Flatten(corpus); | |
115 | |
116 StringMatcher::Iterator current = corpus.begin(); | |
117 StringMatcher::Iterator corpusEnd = corpus.end(); | |
118 | |
119 if (state_ == State_UnusedArea) | |
120 { | |
121 /** | |
122 * "Before the first boundary is an area that is ignored by | |
123 * MIME-compliant clients. This area is generally used to put | |
124 * a message to users of old non-MIME clients." | |
125 * https://en.wikipedia.org/wiki/MIME#Multipart_messages | |
126 **/ | |
127 | |
128 if (boundaryMatcher_.Apply(current, corpusEnd)) | |
129 { | |
130 current = boundaryMatcher_.GetMatchBegin(); | |
131 state_ = State_Content; | |
132 } | |
133 else | |
134 { | |
135 // We have not seen the end of the unused area yet | |
136 std::string reminder(current, corpusEnd); | |
137 buffer_.AddChunkDestructive(reminder); | |
138 return; | |
139 } | |
140 } | |
141 | |
142 for (;;) | |
143 { | |
144 size_t patternSize = boundaryMatcher_.GetPattern().size(); | |
145 size_t remainingSize = std::distance(current, corpusEnd); | |
146 if (remainingSize < patternSize + 2) | |
147 { | |
148 break; // Not enough data available | |
149 } | |
150 | |
151 std::string boundary(current, current + patternSize + 2); | |
152 if (boundary == boundaryMatcher_.GetPattern() + "--") | |
153 { | |
154 state_ = State_Done; | |
155 return; | |
156 } | |
157 | |
158 if (boundary != boundaryMatcher_.GetPattern() + "\r\n") | |
159 { | |
160 throw OrthancException(ErrorCode_NetworkProtocol, | |
161 "Garbage between two items in a multipart stream"); | |
162 } | |
163 | |
164 StringMatcher::Iterator start = current + patternSize + 2; | |
165 | |
166 if (!headersMatcher_.Apply(start, corpusEnd)) | |
167 { | |
168 break; // Not enough data available | |
169 } | |
170 | |
171 HttpHeaders headers; | |
172 ParseHeaders(headers, start, headersMatcher_.GetMatchBegin()); | |
173 | |
174 size_t contentLength; | |
175 if (!LookupHeaderSizeValue(contentLength, headers, "content-length")) | |
176 { | |
177 if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd)) | |
178 { | |
179 size_t d = std::distance(headersMatcher_.GetMatchEnd(), boundaryMatcher_.GetMatchBegin()); | |
180 if (d <= 1) | |
181 { | |
182 throw OrthancException(ErrorCode_NetworkProtocol); | |
183 } | |
184 else | |
185 { | |
186 contentLength = d - 2; | |
187 } | |
188 } | |
189 else | |
190 { | |
191 break; // Not enough data available to have a full part | |
192 } | |
193 } | |
194 | |
195 if (headersMatcher_.GetMatchEnd() + contentLength + 2 > corpusEnd) | |
196 { | |
197 break; // Not enough data available to have a full part | |
198 } | |
199 | |
200 const char* p = headersMatcher_.GetPointerEnd() + contentLength; | |
201 if (p[0] != '\r' || | |
202 p[1] != '\n') | |
203 { | |
204 throw OrthancException(ErrorCode_NetworkProtocol, | |
205 "No endline at the end of a part"); | |
206 } | |
207 | |
208 handler_->Apply(headers, headersMatcher_.GetPointerEnd(), contentLength); | |
209 current = headersMatcher_.GetMatchEnd() + contentLength + 2; | |
210 } | |
211 | |
212 if (current != corpusEnd) | |
213 { | |
214 std::string reminder(current, corpusEnd); | |
215 buffer_.AddChunkDestructive(reminder); | |
216 } | |
217 } | |
218 | |
219 | |
220 MultipartStreamReader::MultipartStreamReader(const std::string& boundary) : | |
221 state_(State_UnusedArea), | |
222 handler_(NULL), | |
223 headersMatcher_("\r\n\r\n"), | |
224 boundaryMatcher_("--" + boundary), | |
225 blockSize_(10 * 1024 * 1024) | |
226 { | |
227 } | |
228 | |
229 | |
230 void MultipartStreamReader::SetBlockSize(size_t size) | |
231 { | |
232 if (size == 0) | |
233 { | |
234 throw OrthancException(ErrorCode_ParameterOutOfRange); | |
235 } | |
236 else | |
237 { | |
238 blockSize_ = size; | |
239 } | |
240 } | |
241 | |
242 | |
243 void MultipartStreamReader::AddChunk(const void* chunk, | |
244 size_t size) | |
245 { | |
246 if (state_ != State_Done && | |
247 size != 0) | |
248 { | |
249 size_t oldSize = buffer_.GetNumBytes(); | |
250 | |
251 buffer_.AddChunk(chunk, size); | |
252 | |
253 if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_) | |
254 { | |
255 ParseStream(); | |
256 } | |
257 } | |
258 } | |
259 | |
260 | |
261 void MultipartStreamReader::AddChunk(const std::string& chunk) | |
262 { | |
263 if (!chunk.empty()) | |
264 { | |
265 AddChunk(chunk.c_str(), chunk.size()); | |
266 } | |
267 } | |
268 | |
269 | |
270 void MultipartStreamReader::CloseStream() | |
271 { | |
272 if (buffer_.GetNumBytes() != 0) | |
273 { | |
274 ParseStream(); | |
275 } | |
276 } | |
277 | |
278 | |
279 bool MultipartStreamReader::GetMainContentType(std::string& contentType, | |
280 const HttpHeaders& headers) | |
281 { | |
282 HttpHeaders::const_iterator it = headers.find("content-type"); | |
283 | |
284 if (it == headers.end()) | |
285 { | |
286 return false; | |
287 } | |
288 else | |
289 { | |
290 contentType = it->second; | |
291 return true; | |
292 } | |
293 } | |
294 | |
295 | |
296 bool MultipartStreamReader::ParseMultipartHeaders(std::string& contentType, | |
297 std::string& subType, | |
298 std::string& boundary, | |
299 const HttpHeaders& headers) | |
300 { | |
301 std::string tmp; | |
302 if (!GetMainContentType(tmp, headers)) | |
303 { | |
304 return false; | |
305 } | |
306 | |
307 std::vector<std::string> tokens; | |
308 Orthanc::Toolbox::TokenizeString(tokens, tmp, ';'); | |
309 | |
310 if (tokens.empty()) | |
311 { | |
312 return false; | |
313 } | |
314 | |
315 contentType = Orthanc::Toolbox::StripSpaces(tokens[0]); | |
316 Orthanc::Toolbox::ToLowerCase(contentType); | |
317 | |
318 if (contentType.empty()) | |
319 { | |
320 return false; | |
321 } | |
322 | |
323 bool valid = false; | |
324 subType.clear(); | |
325 | |
326 for (size_t i = 0; i < tokens.size(); i++) | |
327 { | |
328 std::vector<std::string> items; | |
329 Orthanc::Toolbox::TokenizeString(items, tokens[i], '='); | |
330 | |
331 if (items.size() == 2) | |
332 { | |
333 if (boost::iequals("boundary", Orthanc::Toolbox::StripSpaces(items[0]))) | |
334 { | |
335 boundary = Orthanc::Toolbox::StripSpaces(items[1]); | |
336 valid = !boundary.empty(); | |
337 } | |
338 else if (boost::iequals("type", Orthanc::Toolbox::StripSpaces(items[0]))) | |
339 { | |
340 subType = Orthanc::Toolbox::StripSpaces(items[1]); | |
341 Orthanc::Toolbox::ToLowerCase(subType); | |
342 | |
343 // https://bitbucket.org/sjodogne/orthanc/issues/54/decide-what-to-do-wrt-quoting-of-multipart | |
344 // https://tools.ietf.org/html/rfc7231#section-3.1.1.1 | |
345 if (subType.size() >= 2 && | |
346 subType[0] == '"' && | |
347 subType[subType.size() - 1] == '"') | |
348 { | |
349 subType = subType.substr(1, subType.size() - 2); | |
350 } | |
351 } | |
352 } | |
353 } | |
354 | |
355 return valid; | |
356 } | |
357 } |