# HG changeset patch # User Sebastien Jodogne # Date 1620208214 -7200 # Node ID 0ad5736c8d62436b137c4d1c16f315fe36a803e5 # Parent 365f51fae4130d1dcc83591df833dd24140be522 use plain C strings in MultipartStreamReader instead of std::string to allow further optimizations diff -r 365f51fae413 -r 0ad5736c8d62 OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake --- a/OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake Wed May 05 09:47:39 2021 +0200 +++ b/OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake Wed May 05 11:50:14 2021 +0200 @@ -155,6 +155,7 @@ ${CMAKE_CURRENT_LIST_DIR}/../../Sources/Enumerations.cpp ${CMAKE_CURRENT_LIST_DIR}/../../Sources/FileStorage/FileInfo.cpp ${CMAKE_CURRENT_LIST_DIR}/../../Sources/FileStorage/MemoryStorageArea.cpp + ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/CStringMatcher.cpp ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/HttpContentNegociation.cpp ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/HttpToolbox.cpp ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/MultipartStreamReader.cpp diff -r 365f51fae413 -r 0ad5736c8d62 OrthancFramework/Sources/HttpServer/CStringMatcher.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrthancFramework/Sources/HttpServer/CStringMatcher.cpp Wed May 05 11:50:14 2021 +0200 @@ -0,0 +1,146 @@ +/** + * Orthanc - A Lightweight, RESTful DICOM Store + * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics + * Department, University Hospital of Liege, Belgium + * Copyright (C) 2017-2021 Osimis S.A., Belgium + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see + * . + **/ + + +#include "../PrecompiledHeaders.h" +#include "CStringMatcher.h" + +#include "../OrthancException.h" + +#include + +namespace Orthanc +{ + class CStringMatcher::Search : public boost::noncopyable + { + private: + typedef boost::algorithm::boyer_moore Algorithm; + + Algorithm algorithm_; + + public: + // WARNING - The lifetime of "pattern_" must be larger than + // "search_", as the latter internally keeps a pointer to "pattern" (*) + explicit Search(const std::string& pattern) : + algorithm_(pattern.c_str(), pattern.c_str() + pattern.size()) + { + } + + const char* Apply(const char* start, + const char* end) const + { +#if BOOST_VERSION >= 106200 + return algorithm_(start, end).first; +#else + return algorithm_(start, end); +#endif + } + }; + + + + CStringMatcher::CStringMatcher(const std::string& pattern) : + pattern_(pattern), + valid_(false) + { + // WARNING - Don't use "pattern" (local variable, will be + // destroyed once exiting the constructor) but "pattern_" + // (variable member, will last as long as the algorithm), + // otherwise lifetime is bad! (*) + search_.reset(new Search(pattern_)); + } + + const std::string& CStringMatcher::GetPattern() const + { + return pattern_; + } + + bool CStringMatcher::IsValid() const + { + return valid_; + } + + + bool CStringMatcher::Apply(const char* start, + const char* end) + { + assert(search_.get() != NULL); + + if (start > end) + { + throw OrthancException(ErrorCode_ParameterOutOfRange); + } + + matchBegin_ = search_->Apply(start, end); + + if (matchBegin_ == end) + { + valid_ = false; + } + else + { + matchEnd_ = matchBegin_ + pattern_.size(); + assert(matchEnd_ <= end); + valid_ = true; + } + + return valid_; + } + + + bool CStringMatcher::Apply(const std::string& corpus) + { + if (corpus.empty()) + { + return false; + } + else + { + return Apply(corpus.c_str(), corpus.c_str() + corpus.size()); + } + } + + + const char* CStringMatcher::GetMatchBegin() const + { + if (valid_) + { + return matchBegin_; + } + else + { + throw OrthancException(ErrorCode_BadSequenceOfCalls); + } + } + + + const char* CStringMatcher::GetMatchEnd() const + { + if (valid_) + { + return matchEnd_; + } + else + { + throw OrthancException(ErrorCode_BadSequenceOfCalls); + } + } +} diff -r 365f51fae413 -r 0ad5736c8d62 OrthancFramework/Sources/HttpServer/CStringMatcher.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrthancFramework/Sources/HttpServer/CStringMatcher.h Wed May 05 11:50:14 2021 +0200 @@ -0,0 +1,62 @@ +/** + * Orthanc - A Lightweight, RESTful DICOM Store + * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics + * Department, University Hospital of Liege, Belgium + * Copyright (C) 2017-2021 Osimis S.A., Belgium + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see + * . + **/ + + +#pragma once + +#include "../OrthancFramework.h" + +#include +#include +#include + +namespace Orthanc +{ + // Convenience class that wraps a Boost algorithm for string matching + class ORTHANC_PUBLIC CStringMatcher : public boost::noncopyable + { + private: + class Search; + + boost::shared_ptr search_; // PImpl pattern + std::string pattern_; + bool valid_; + const char* matchBegin_; + const char* matchEnd_; + + public: + explicit CStringMatcher(const std::string& pattern); + + const std::string& GetPattern() const; + + bool IsValid() const; + + // "end" is non-inclusive + bool Apply(const char* start, + const char* end); + + bool Apply(const std::string& corpus); + + const char* GetMatchBegin() const; + + const char* GetMatchEnd() const; + }; +} diff -r 365f51fae413 -r 0ad5736c8d62 OrthancFramework/Sources/HttpServer/MultipartStreamReader.cpp --- a/OrthancFramework/Sources/HttpServer/MultipartStreamReader.cpp Wed May 05 09:47:39 2021 +0200 +++ b/OrthancFramework/Sources/HttpServer/MultipartStreamReader.cpp Wed May 05 11:50:14 2021 +0200 @@ -37,10 +37,11 @@ namespace Orthanc { static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers, - StringMatcher::Iterator start, - StringMatcher::Iterator end) + const char* start, + const char* end /* exclusive */) { - std::string tmp(start, end); + assert(start <= end); + std::string tmp(start, end - start); std::vector lines; Toolbox::TokenizeString(lines, tmp, '\n'); @@ -108,8 +109,13 @@ std::string corpus; buffer_.Flatten(corpus); - StringMatcher::Iterator current = corpus.begin(); - StringMatcher::Iterator corpusEnd = corpus.end(); + if (corpus.empty()) + { + return; + } + + const char* current = corpus.c_str(); + const char* corpusEnd = corpus.c_str() + corpus.size(); if (state_ == State_UnusedArea) { @@ -128,15 +134,18 @@ else { // We have not seen the end of the unused area yet - buffer_.AddChunk(current, corpusEnd); + assert(current <= corpusEnd); + buffer_.AddChunk(current, corpusEnd - current); return; } } for (;;) { + assert(current <= corpusEnd); + size_t patternSize = boundaryMatcher_.GetPattern().size(); - size_t remainingSize = std::distance(current, corpusEnd); + size_t remainingSize = corpusEnd - current; if (remainingSize < patternSize + 2) { break; // Not enough data available @@ -155,7 +164,7 @@ "Garbage between two items in a multipart stream"); } - StringMatcher::Iterator start = current + patternSize + 2; + const char* start = current + patternSize + 2; if (!headersMatcher_.Apply(start, corpusEnd)) { @@ -170,7 +179,8 @@ { if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd)) { - size_t d = std::distance(headersMatcher_.GetMatchEnd(), boundaryMatcher_.GetMatchBegin()); + assert(headersMatcher_.GetMatchEnd() <= boundaryMatcher_.GetMatchBegin()); + size_t d = boundaryMatcher_.GetMatchBegin() - headersMatcher_.GetMatchEnd(); if (d <= 1) { throw OrthancException(ErrorCode_NetworkProtocol); @@ -186,14 +196,14 @@ } } - // Explicit conversion to avoid warning about signed vs. unsigned comparison - std::iterator_traits::difference_type d = contentLength + 2; - if (d > std::distance(headersMatcher_.GetMatchEnd(), corpusEnd)) + // "static_cast<>" to avoid warning about signed vs. unsigned comparison + assert(headersMatcher_.GetMatchEnd() <= corpusEnd); + if (contentLength + 2 > static_cast(corpusEnd - headersMatcher_.GetMatchEnd())) { break; // Not enough data available to have a full part } - const char* p = headersMatcher_.GetPointerEnd() + contentLength; + const char* p = headersMatcher_.GetMatchEnd() + contentLength; if (p[0] != '\r' || p[1] != '\n') { @@ -201,13 +211,14 @@ "No endline at the end of a part"); } - handler_->HandlePart(headers, headersMatcher_.GetPointerEnd(), contentLength); + handler_->HandlePart(headers, headersMatcher_.GetMatchEnd(), contentLength); current = headersMatcher_.GetMatchEnd() + contentLength + 2; } if (current != corpusEnd) { - buffer_.AddChunk(current, corpusEnd); + assert(current < corpusEnd); + buffer_.AddChunk(current, corpusEnd - current); } } diff -r 365f51fae413 -r 0ad5736c8d62 OrthancFramework/Sources/HttpServer/MultipartStreamReader.h --- a/OrthancFramework/Sources/HttpServer/MultipartStreamReader.h Wed May 05 09:47:39 2021 +0200 +++ b/OrthancFramework/Sources/HttpServer/MultipartStreamReader.h Wed May 05 11:50:14 2021 +0200 @@ -22,7 +22,7 @@ #pragma once -#include "StringMatcher.h" +#include "CStringMatcher.h" #include "../ChunkedBuffer.h" #include @@ -56,8 +56,8 @@ State state_; IHandler* handler_; - StringMatcher headersMatcher_; - StringMatcher boundaryMatcher_; + CStringMatcher headersMatcher_; + CStringMatcher boundaryMatcher_; ChunkedBuffer buffer_; size_t blockSize_; diff -r 365f51fae413 -r 0ad5736c8d62 OrthancFramework/Sources/HttpServer/StringMatcher.cpp --- a/OrthancFramework/Sources/HttpServer/StringMatcher.cpp Wed May 05 09:47:39 2021 +0200 +++ b/OrthancFramework/Sources/HttpServer/StringMatcher.cpp Wed May 05 11:50:14 2021 +0200 @@ -31,7 +31,7 @@ namespace Orthanc { - class StringMatcher::Search + class StringMatcher::Search : public boost::noncopyable { private: typedef boost::algorithm::boyer_moore Algorithm; @@ -70,7 +70,7 @@ search_.reset(new Search(pattern_)); } - const std::string &StringMatcher::GetPattern() const + const std::string& StringMatcher::GetPattern() const { return pattern_; } @@ -101,7 +101,7 @@ return valid_; } - bool StringMatcher::Apply(const std::string &corpus) + bool StringMatcher::Apply(const std::string& corpus) { return Apply(corpus.begin(), corpus.end()); } diff -r 365f51fae413 -r 0ad5736c8d62 OrthancFramework/UnitTestsSources/RestApiTests.cpp --- a/OrthancFramework/UnitTestsSources/RestApiTests.cpp Wed May 05 09:47:39 2021 +0200 +++ b/OrthancFramework/UnitTestsSources/RestApiTests.cpp Wed May 05 11:50:14 2021 +0200 @@ -31,6 +31,7 @@ #include "../Sources/Compression/ZlibCompressor.h" #include "../Sources/HttpServer/HttpContentNegociation.h" #include "../Sources/HttpServer/MultipartStreamReader.h" +#include "../Sources/HttpServer/StringMatcher.h" #include "../Sources/Logging.h" #include "../Sources/OrthancException.h" #include "../Sources/RestApi/RestApiHierarchy.h" @@ -722,7 +723,6 @@ { std::string s(10u, '\0'); // String with null values ASSERT_EQ(10u, s.size()); - ASSERT_EQ(10u, s.size()); ASSERT_FALSE(matcher.Apply(s)); s[9] = '-'; @@ -741,6 +741,89 @@ } +TEST(CStringMatcher, Basic) +{ + CStringMatcher matcher("---"); + + ASSERT_THROW(matcher.GetMatchBegin(), OrthancException); + + { + ASSERT_FALSE(matcher.Apply(NULL, 0)); + + const std::string s = ""; + ASSERT_FALSE(matcher.Apply(s)); + } + + { + const char* s = "abc---def"; + ASSERT_TRUE(matcher.Apply(s, s + 9)); + + ASSERT_EQ('a', matcher.GetMatchBegin()[-3]); + ASSERT_EQ('b', matcher.GetMatchBegin()[-2]); + ASSERT_EQ('c', matcher.GetMatchBegin()[-1]); + ASSERT_EQ('-', matcher.GetMatchBegin()[0]); + ASSERT_EQ('-', matcher.GetMatchBegin()[1]); + ASSERT_EQ('-', matcher.GetMatchBegin()[2]); + ASSERT_EQ('d', matcher.GetMatchBegin()[3]); + ASSERT_EQ('e', matcher.GetMatchBegin()[4]); + ASSERT_EQ('f', matcher.GetMatchBegin()[5]); + ASSERT_EQ('\0', matcher.GetMatchBegin()[6]); + + ASSERT_EQ('a', matcher.GetMatchEnd()[-6]); + ASSERT_EQ('b', matcher.GetMatchEnd()[-5]); + ASSERT_EQ('c', matcher.GetMatchEnd()[-4]); + ASSERT_EQ('-', matcher.GetMatchEnd()[-3]); + ASSERT_EQ('-', matcher.GetMatchEnd()[-2]); + ASSERT_EQ('-', matcher.GetMatchEnd()[-1]); + ASSERT_EQ('d', matcher.GetMatchEnd()[0]); + ASSERT_EQ('e', matcher.GetMatchEnd()[1]); + ASSERT_EQ('f', matcher.GetMatchEnd()[2]); + ASSERT_EQ('\0', matcher.GetMatchEnd()[3]); + } + + { + const std::string s = "abc----def"; + ASSERT_TRUE(matcher.Apply(s)); + ASSERT_EQ(3, std::distance(s.c_str(), matcher.GetMatchBegin())); + ASSERT_EQ("---", std::string(matcher.GetMatchBegin(), matcher.GetMatchEnd())); + } + + { + const std::string s = "abc---"; + ASSERT_TRUE(matcher.Apply(s)); + ASSERT_EQ(3, std::distance(s.c_str(), matcher.GetMatchBegin())); + ASSERT_EQ(s.c_str() + s.size(), matcher.GetMatchEnd()); + ASSERT_EQ("---", std::string(matcher.GetMatchBegin(), matcher.GetMatchEnd())); + ASSERT_EQ("", std::string(matcher.GetMatchEnd(), s.c_str() + s.size())); + } + + { + const std::string s = "abc--def"; + ASSERT_FALSE(matcher.Apply(s)); + ASSERT_THROW(matcher.GetMatchBegin(), OrthancException); + ASSERT_THROW(matcher.GetMatchEnd(), OrthancException); + } + + { + std::string s(10u, '\0'); // String with null values + ASSERT_EQ(10u, s.size()); + ASSERT_FALSE(matcher.Apply(s)); + + s[9] = '-'; + ASSERT_FALSE(matcher.Apply(s)); + + s[8] = '-'; + ASSERT_FALSE(matcher.Apply(s)); + + s[7] = '-'; + ASSERT_TRUE(matcher.Apply(s)); + ASSERT_EQ(s.c_str() + 7, matcher.GetMatchBegin()); + ASSERT_EQ(s.c_str() + 10, matcher.GetMatchEnd()); + ASSERT_EQ(s.c_str() + s.size() - 3, matcher.GetMatchBegin()); + ASSERT_EQ(s.c_str() + s.size(), matcher.GetMatchEnd()); + } +} + class MultipartTester : public MultipartStreamReader::IHandler {