Mercurial > hg > orthanc-dicomweb
changeset 275:0d1815ffe865
cont
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Mon, 03 Jun 2019 14:33:58 +0200 |
parents | c0e41608bed3 |
children | 14419134e05e |
files | UnitTestsSources/UnitTestsMain.cpp |
diffstat | 1 files changed, 278 insertions(+), 73 deletions(-) [+] |
line wrap: on
line diff
--- a/UnitTestsSources/UnitTestsMain.cpp Mon Jun 03 11:34:57 2019 +0200 +++ b/UnitTestsSources/UnitTestsMain.cpp Mon Jun 03 14:33:58 2019 +0200 @@ -99,9 +99,9 @@ { } - size_t GetPatternSize() const + const std::string& GetPattern() const { - return pattern_.size(); + return pattern_; } bool IsValid() const @@ -160,6 +160,16 @@ throw OrthancException(ErrorCode_BadSequenceOfCalls); } } + + const void* GetPointerBegin() const + { + return &GetMatchBegin()[0]; + } + + const void* GetPointerEnd() const + { + return &GetMatchEnd()[0]; + } }; @@ -173,39 +183,173 @@ { } - virtual void Handle(const std::map<std::string, std::string>& headers, - const std::string& part) = 0; + virtual void Apply(const std::map<std::string, std::string>& headers, + const void* part, + size_t size) = 0; }; private: + enum State + { + State_MainHeaders, + State_UnusedArea, + State_Content, + State_Done + }; + + + typedef std::map<std::string, std::string> Dictionary; + typedef boost::algorithm::boyer_moore<std::string::const_iterator> Search; //typedef boost::algorithm::boyer_moore_horspool<std::string::const_iterator> Search; //typedef boost::algorithm::knuth_morris_pratt<std::string::const_iterator> Search; + State state_; + Dictionary mainHeaders_; IHandler* handler_; - StringMatcher searchHeadersEnd_; - std::auto_ptr<StringMatcher> patternMatcher_; - //std::auto_ptr<Search> patternMatcher_; + StringMatcher headersMatcher_; + std::auto_ptr<StringMatcher> boundaryMatcher_; + //std::auto_ptr<Search> boundaryMatcher_; ChunkedBuffer buffer_; size_t blockSize_; + static void ParseHeaders(Dictionary& headers, + StringMatcher::Iterator start, + StringMatcher::Iterator end) + { + std::string tmp(start, end); + + std::vector<std::string> lines; + Toolbox::TokenizeString(lines, tmp, '\n'); + + headers.clear(); + + for (size_t i = 0; i < lines.size(); i++) + { + size_t separator = lines[i].find(':'); + if (separator != std::string::npos) + { + std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator)); + std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1)); + + Toolbox::ToLowerCase(key); + headers[key] = value; + } + } + } + + + static bool LookupHeaderSizeValue(size_t& target, + const Dictionary& headers, + const std::string& key) + { + Dictionary::const_iterator it = headers.find(key); + if (it == headers.end()) + { + return false; + } + else + { + int64_t value; + + try + { + value = boost::lexical_cast<int64_t>(it->second); + } + catch (boost::bad_lexical_cast&) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + + if (value < 0) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + else + { + target = static_cast<size_t>(value); + return true; + } + } + } + + + static bool ParseHeaderValues(Dictionary& values, + const Dictionary& headers, + const std::string& key) + { + Dictionary::const_iterator it = headers.find(key); + + if (it == headers.end()) + { + return false; + } + else + { + values.clear(); + + std::vector<std::string> tokens; + Toolbox::TokenizeString(tokens, it->second, ';'); + + for (size_t i = 0; i < tokens.size(); i++) + { + size_t separator = tokens[i].find('='); + if (separator != std::string::npos) + { + std::string key = Toolbox::StripSpaces(tokens[i].substr(0, separator)); + std::string value = Toolbox::StripSpaces(tokens[i].substr(separator + 1)); + + if (!key.empty()) + { + Toolbox::ToLowerCase(key); + values[key] = value; + } + } + } + + return true; + } + } + + + void InitializeMultipart(const Dictionary& headers) + { + Dictionary values; + if (!ParseHeaderValues(values, headers, "content-type")) + { + throw OrthancException(ErrorCode_NetworkProtocol, + "Multipart stream without a Content-Type"); + } + + Dictionary::const_iterator boundary = values.find("boundary"); + if (boundary == values.end()) + { + throw OrthancException(ErrorCode_NetworkProtocol, + "Missing boundary in the Content-Type of a multipart stream"); + } + + LOG(INFO) << "Starting decoding of a multipart stream with boundary: " << boundary->second; + boundaryMatcher_.reset(new StringMatcher("--" + boundary->second)); + } + + void ParsePart(std::string::const_iterator start, std::string::const_iterator end) { - searchHeadersEnd_.Apply(start, end); + headersMatcher_.Apply(start, end); #if 0 - if (searchHeadersEnd_.GetIterator() != end) + if (headersMatcher_.GetIterator() != end) { - std::string s(start, searchHeadersEnd_.GetIterator()); + std::string s(start, headersMatcher_.GetIterator()); printf("[%s]\n", s.c_str()); //std::map<std::string, std::string> headers; - //std::string part(searchHeadersEnd_.GetIterator(), end); + //std::string part(headersMatcher_.GetIterator(), end); //std::string part; - //handler_->Handle(headers, part); + //handler_->Apply(headers, part); } //printf("%d \n", size); @@ -218,8 +362,7 @@ void ParseStream() { printf("."); fflush(stdout); - if (patternMatcher_.get() == NULL || - handler_ == NULL) + if (handler_ == NULL) { return; } @@ -227,75 +370,132 @@ std::string corpus; buffer_.Flatten(corpus); - printf("------------------------------\n"); fflush(stdout); + StringMatcher::Iterator current = corpus.begin(); + StringMatcher::Iterator corpusEnd = corpus.end(); - StringMatcher::Iterator start = corpus.begin(); + if (state_ == State_MainHeaders) + { + if (headersMatcher_.Apply(corpus)) + { + ParseHeaders(mainHeaders_, current, headersMatcher_.GetMatchBegin()); + InitializeMultipart(mainHeaders_); // (*) + state_ = State_UnusedArea; + current = headersMatcher_.GetMatchEnd(); + } + else + { + // The headers are not completely received yet, recycle the corpus for next iteration + buffer_.AddChunk(corpus); + return; + } + } - while (patternMatcher_->Apply(start, corpus.end())) + assert(boundaryMatcher_.get() != NULL); // This is initialized at (*) + + if (state_ == State_UnusedArea) { - std::string s(patternMatcher_->GetMatchBegin(), patternMatcher_->GetMatchEnd()); - printf("ICI [%s]\n", s.c_str()); + /** + * "Before the first boundary is an area that is ignored by + * MIME-compliant clients. This area is generally used to put + * a message to users of old non-MIME clients." + * https://en.wikipedia.org/wiki/MIME#Multipart_messages + **/ - start = patternMatcher_->GetMatchEnd(); + if (boundaryMatcher_->Apply(current, corpusEnd)) + { + current = boundaryMatcher_->GetMatchBegin(); + state_ = State_Content; + } + else + { + // We have not seen the end of the unused area yet + return; + } } -#if 0 - std::string::const_iterator previous = corpus.end(); + StringMatcher::Iterator processed = current; -#if BOOST_VERSION >= 106200 - std::string::const_iterator current = (*patternMatcher_) (corpus.begin(), corpus.end()).first; -#else - std::string::const_iterator current = (*patternMatcher_) (corpus.begin(), corpus.end()); -#endif - - while (current != corpus.end()) + for (;;) { - if (previous == corpus.end() && - std::distance(current, reinterpret_cast<const std::string&>(corpus).begin()) != 0) + size_t patternSize = boundaryMatcher_->GetPattern().size(); + size_t remainingSize = std::distance(current, corpusEnd); + if (remainingSize < patternSize + 2) { - // TODO - There is heading garbage! => Decide what to do! - throw OrthancException(ErrorCode_NetworkProtocol); + break; // Not enough data available + } + + std::string boundary(current, current + patternSize + 2); + if (boundary == boundaryMatcher_->GetPattern() + "--") + { + state_ = State_Done; + return; } - if (previous != corpus.end()) + if (boundary != boundaryMatcher_->GetPattern() + "\r\n") { - std::string::const_iterator start = previous + pattern_.size(); - size_t size = std::distance(start, current); + throw OrthancException(ErrorCode_NetworkProtocol, + "Garbage between two items in a multipart stream"); + } - if (size > 0) - { - ParsePart(start, current); - } + StringMatcher::Iterator start = processed + patternSize + 2; + + if (!headersMatcher_.Apply(start, corpusEnd)) + { + break; // Not enough data available } - previous = current; - current += pattern_.size(); - -#if BOOST_VERSION >= 106200 - current = (*patternMatcher_) (current, reinterpret_cast<const std::string&>(corpus).end()).first; -#else - current = (*patternMatcher_) (current, reinterpret_cast<const std::string&>(corpus).end()); -#endif + Dictionary headers; + ParseHeaders(headers, start, headersMatcher_.GetMatchBegin()); + + size_t contentLength; + if (LookupHeaderSizeValue(contentLength, headers, "content-length")) + { + if (headersMatcher_.GetMatchEnd() + contentLength <= corpusEnd) + { + printf("-"); + handler_->Apply(headers, headersMatcher_.GetPointerEnd(), contentLength); + processed = headersMatcher_.GetMatchEnd() + contentLength; + } + else + { + break; // Not enough data available + } + } + else + { + // No "Content-Length" header: Search for the next boundary in the stream + if (boundaryMatcher_->Apply(headersMatcher_.GetMatchEnd(), corpusEnd)) + { + printf("+"); + handler_->Apply(headers, headersMatcher_.GetPointerEnd(), + std::distance(headersMatcher_.GetMatchEnd(), boundaryMatcher_->GetMatchBegin())); + processed = boundaryMatcher_->GetMatchBegin(); + } + else + { + break; // Not enough data available + } + } } - if (previous == corpus.end()) + if (processed == corpusEnd) { // No part found, recycle the entire corpus for next iteration buffer_.AddChunkDestructive(corpus); } else { - std::string reminder(previous, reinterpret_cast<const std::string&>(corpus).end()); + std::string reminder(processed, corpusEnd); buffer_.AddChunkDestructive(reminder); } -#endif } public: MultipartStreamParser() : + state_(State_MainHeaders), handler_(NULL), - searchHeadersEnd_("\r\n\r\n"), + headersMatcher_("\r\n\r\n"), blockSize_(10 * 1024 * 1024) { } @@ -322,11 +522,6 @@ handler_ = &handler; } - void SetSeparator(const std::string& separator) - { - patternMatcher_.reset(new StringMatcher("--" + separator)); - } - void AddChunk(const void* chunk, size_t size) { @@ -375,10 +570,14 @@ { } - virtual void Handle(const std::map<std::string, std::string>& headers, - const std::string& part) + virtual void Apply(const std::map<std::string, std::string>& headers, + const void* part, + size_t size) { //printf(">> %d\n", part.size()); + + std::string s((const char*) part, size); + printf("[%s]\n", s.c_str()); count_++; } @@ -391,9 +590,9 @@ -TEST(Multipart, Optimization) +TEST(Multipart, DISABLED_Optimization) { - std::string separator = "123456789123456789"; + std::string boundary = "123456789123456789"; std::string corpus; @@ -408,7 +607,7 @@ for (size_t i = 0; i < 10; i++) { - std::string s = "--" + separator + "\r\n\r\n\r\n"; + std::string s = "--" + boundary + "\r\n\r\n\r\n"; if (i != 0) s = "\r\n" + s; @@ -417,7 +616,7 @@ buffer.AddChunk(f); } - buffer.AddChunk("\r\n--" + separator + "--"); + buffer.AddChunk("\r\n--" + boundary + "--"); buffer.Flatten(corpus); Orthanc::SystemToolbox::WriteFile(corpus, "tutu"); @@ -433,7 +632,7 @@ { std::vector<OrthancPlugins::MultipartItem> items; - OrthancPlugins::ParseMultipartBody(items, corpus.c_str(), corpus.size(), separator); + OrthancPlugins::ParseMultipartBody(items, corpus.c_str(), corpus.size(), boundary); printf(">> %d\n", (int) items.size()); } @@ -447,7 +646,7 @@ boost::posix_time::ptime start = boost::posix_time::microsec_clock::local_time(); { - std::string pattern("--" + separator + "\r\n"); + std::string pattern("--" + boundary + "\r\n"); boost::algorithm::boyer_moore<std::string::const_iterator> search(pattern.begin(), pattern.end()); @@ -491,7 +690,7 @@ Orthanc::MultipartStreamParser parser; //parser.SetBlockSize(127); - parser.SetSeparator(separator); + //parser.SetBoundary(boundary); parser.SetHandler(toto); #if 1 @@ -523,7 +722,7 @@ TEST(Multipart, Optimization2) { - std::string separator = "123456789123456789"; + std::string boundary = "123456789123456789"; std::string f; /*f.resize(512*512*2); @@ -540,18 +739,24 @@ Orthanc::MultipartStreamParser parser; //parser.SetBlockSize(127); - parser.SetSeparator(separator); + //parser.SetBoundary(boundary); parser.SetHandler(toto); - for (size_t i = 0; i < 2; i++) + parser.AddChunk("Coucou: a\r\n"); + parser.AddChunk("Hello: b\r\n"); + parser.AddChunk("Content-Type: multipart/mixed; boundary=" + boundary + "\r\n"); + parser.AddChunk("World: c\r\n\r\n"); + + for (size_t i = 0; i < 10; i++) { - parser.AddChunk("--" + separator + "\r\n"); - parser.AddChunk("Content-Type: toto\r\n"); - parser.AddChunk("Content-Length: " + boost::lexical_cast<std::string>(f.size()) + "\r\n\r\n"); + parser.AddChunk("--" + boundary + "\r\n"); + if (i % 2 == 0) + parser.AddChunk("Content-Length: " + boost::lexical_cast<std::string>(f.size()) + "\r\n"); + parser.AddChunk("Content-Type: toto\r\n\r\n"); parser.AddChunk(f); } - parser.AddChunk("--" + separator + "--"); + parser.AddChunk("--" + boundary + "--"); parser.CloseStream(); printf("%d\n", toto.GetCount());