Mercurial > hg > orthanc
diff Core/HttpServer/MultipartStreamReader.cpp @ 3398:4acd1431e603
new classes: StringMatcher and MultipartStreamReader
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Fri, 07 Jun 2019 13:36:43 +0200 |
parents | |
children | 4e8205871967 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Core/HttpServer/MultipartStreamReader.cpp Fri Jun 07 13:36:43 2019 +0200 @@ -0,0 +1,357 @@ +/** + * Orthanc - A Lightweight, RESTful DICOM Store + * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics + * Department, University Hospital of Liege, Belgium + * Copyright (C) 2017-2019 Osimis S.A., Belgium + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * In addition, as a special exception, the copyright holders of this + * program give permission to link the code of its release with the + * OpenSSL project's "OpenSSL" library (or with modified versions of it + * that use the same license as the "OpenSSL" library), and distribute + * the linked executables. You must obey the GNU General Public License + * in all respects for all of the code used other than "OpenSSL". If you + * modify file(s) with this exception, you may extend this exception to + * your version of the file(s), but you are not obligated to do so. If + * you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files + * in the program, then also delete it here. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + **/ + + +#include "../PrecompiledHeaders.h" +#include "MultipartStreamReader.h" + +#include "../OrthancException.h" +#include "../Toolbox.h" + +#include <boost/algorithm/string/predicate.hpp> + +namespace Orthanc +{ + static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers, + StringMatcher::Iterator start, + StringMatcher::Iterator end) + { + std::string tmp(start, end); + + std::vector<std::string> lines; + Toolbox::TokenizeString(lines, tmp, '\n'); + + headers.clear(); + + for (size_t i = 0; i < lines.size(); i++) + { + size_t separator = lines[i].find(':'); + if (separator != std::string::npos) + { + std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator)); + std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1)); + + Toolbox::ToLowerCase(key); + headers[key] = value; + } + } + } + + + static bool LookupHeaderSizeValue(size_t& target, + const MultipartStreamReader::HttpHeaders& headers, + const std::string& key) + { + MultipartStreamReader::HttpHeaders::const_iterator it = headers.find(key); + if (it == headers.end()) + { + return false; + } + else + { + int64_t value; + + try + { + value = boost::lexical_cast<int64_t>(it->second); + } + catch (boost::bad_lexical_cast&) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + + if (value < 0) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + else + { + target = static_cast<size_t>(value); + return true; + } + } + } + + + void MultipartStreamReader::ParseStream() + { + if (handler_ == NULL || + state_ == State_Done) + { + return; + } + + std::string corpus; + buffer_.Flatten(corpus); + + StringMatcher::Iterator current = corpus.begin(); + StringMatcher::Iterator corpusEnd = corpus.end(); + + if (state_ == State_UnusedArea) + { + /** + * "Before the first boundary is an area that is ignored by + * MIME-compliant clients. This area is generally used to put + * a message to users of old non-MIME clients." + * https://en.wikipedia.org/wiki/MIME#Multipart_messages + **/ + + if (boundaryMatcher_.Apply(current, corpusEnd)) + { + current = boundaryMatcher_.GetMatchBegin(); + state_ = State_Content; + } + else + { + // We have not seen the end of the unused area yet + std::string reminder(current, corpusEnd); + buffer_.AddChunkDestructive(reminder); + return; + } + } + + for (;;) + { + size_t patternSize = boundaryMatcher_.GetPattern().size(); + size_t remainingSize = std::distance(current, corpusEnd); + if (remainingSize < patternSize + 2) + { + break; // Not enough data available + } + + std::string boundary(current, current + patternSize + 2); + if (boundary == boundaryMatcher_.GetPattern() + "--") + { + state_ = State_Done; + return; + } + + if (boundary != boundaryMatcher_.GetPattern() + "\r\n") + { + throw OrthancException(ErrorCode_NetworkProtocol, + "Garbage between two items in a multipart stream"); + } + + StringMatcher::Iterator start = current + patternSize + 2; + + if (!headersMatcher_.Apply(start, corpusEnd)) + { + break; // Not enough data available + } + + HttpHeaders headers; + ParseHeaders(headers, start, headersMatcher_.GetMatchBegin()); + + size_t contentLength; + if (!LookupHeaderSizeValue(contentLength, headers, "content-length")) + { + if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd)) + { + size_t d = std::distance(headersMatcher_.GetMatchEnd(), boundaryMatcher_.GetMatchBegin()); + if (d <= 1) + { + throw OrthancException(ErrorCode_NetworkProtocol); + } + else + { + contentLength = d - 2; + } + } + else + { + break; // Not enough data available to have a full part + } + } + + if (headersMatcher_.GetMatchEnd() + contentLength + 2 > corpusEnd) + { + break; // Not enough data available to have a full part + } + + const char* p = headersMatcher_.GetPointerEnd() + contentLength; + if (p[0] != '\r' || + p[1] != '\n') + { + throw OrthancException(ErrorCode_NetworkProtocol, + "No endline at the end of a part"); + } + + handler_->Apply(headers, headersMatcher_.GetPointerEnd(), contentLength); + current = headersMatcher_.GetMatchEnd() + contentLength + 2; + } + + if (current != corpusEnd) + { + std::string reminder(current, corpusEnd); + buffer_.AddChunkDestructive(reminder); + } + } + + + MultipartStreamReader::MultipartStreamReader(const std::string& boundary) : + state_(State_UnusedArea), + handler_(NULL), + headersMatcher_("\r\n\r\n"), + boundaryMatcher_("--" + boundary), + blockSize_(10 * 1024 * 1024) + { + } + + + void MultipartStreamReader::SetBlockSize(size_t size) + { + if (size == 0) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + else + { + blockSize_ = size; + } + } + + + void MultipartStreamReader::AddChunk(const void* chunk, + size_t size) + { + if (state_ != State_Done && + size != 0) + { + size_t oldSize = buffer_.GetNumBytes(); + + buffer_.AddChunk(chunk, size); + + if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_) + { + ParseStream(); + } + } + } + + + void MultipartStreamReader::AddChunk(const std::string& chunk) + { + if (!chunk.empty()) + { + AddChunk(chunk.c_str(), chunk.size()); + } + } + + + void MultipartStreamReader::CloseStream() + { + if (buffer_.GetNumBytes() != 0) + { + ParseStream(); + } + } + + + bool MultipartStreamReader::GetMainContentType(std::string& contentType, + const HttpHeaders& headers) + { + HttpHeaders::const_iterator it = headers.find("content-type"); + + if (it == headers.end()) + { + return false; + } + else + { + contentType = it->second; + return true; + } + } + + + bool MultipartStreamReader::ParseMultipartHeaders(std::string& contentType, + std::string& subType, + std::string& boundary, + const HttpHeaders& headers) + { + std::string tmp; + if (!GetMainContentType(tmp, headers)) + { + return false; + } + + std::vector<std::string> tokens; + Orthanc::Toolbox::TokenizeString(tokens, tmp, ';'); + + if (tokens.empty()) + { + return false; + } + + contentType = Orthanc::Toolbox::StripSpaces(tokens[0]); + Orthanc::Toolbox::ToLowerCase(contentType); + + if (contentType.empty()) + { + return false; + } + + bool valid = false; + subType.clear(); + + for (size_t i = 0; i < tokens.size(); i++) + { + std::vector<std::string> items; + Orthanc::Toolbox::TokenizeString(items, tokens[i], '='); + + if (items.size() == 2) + { + if (boost::iequals("boundary", Orthanc::Toolbox::StripSpaces(items[0]))) + { + boundary = Orthanc::Toolbox::StripSpaces(items[1]); + valid = !boundary.empty(); + } + else if (boost::iequals("type", Orthanc::Toolbox::StripSpaces(items[0]))) + { + subType = Orthanc::Toolbox::StripSpaces(items[1]); + Orthanc::Toolbox::ToLowerCase(subType); + + // https://bitbucket.org/sjodogne/orthanc/issues/54/decide-what-to-do-wrt-quoting-of-multipart + // https://tools.ietf.org/html/rfc7231#section-3.1.1.1 + if (subType.size() >= 2 && + subType[0] == '"' && + subType[subType.size() - 1] == '"') + { + subType = subType.substr(1, subType.size() - 2); + } + } + } + } + + return valid; + } +}