changeset 4652:0ad5736c8d62

use plain C strings in MultipartStreamReader instead of std::string to allow further optimizations
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 05 May 2021 11:50:14 +0200
parents 365f51fae413
children 3d5d6e2dcf3f
files OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake OrthancFramework/Sources/HttpServer/CStringMatcher.cpp OrthancFramework/Sources/HttpServer/CStringMatcher.h OrthancFramework/Sources/HttpServer/MultipartStreamReader.cpp OrthancFramework/Sources/HttpServer/MultipartStreamReader.h OrthancFramework/Sources/HttpServer/StringMatcher.cpp OrthancFramework/UnitTestsSources/RestApiTests.cpp
diffstat 7 files changed, 325 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake	Wed May 05 09:47:39 2021 +0200
+++ b/OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake	Wed May 05 11:50:14 2021 +0200
@@ -155,6 +155,7 @@
   ${CMAKE_CURRENT_LIST_DIR}/../../Sources/Enumerations.cpp
   ${CMAKE_CURRENT_LIST_DIR}/../../Sources/FileStorage/FileInfo.cpp
   ${CMAKE_CURRENT_LIST_DIR}/../../Sources/FileStorage/MemoryStorageArea.cpp
+  ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/CStringMatcher.cpp
   ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/HttpContentNegociation.cpp
   ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/HttpToolbox.cpp
   ${CMAKE_CURRENT_LIST_DIR}/../../Sources/HttpServer/MultipartStreamReader.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/OrthancFramework/Sources/HttpServer/CStringMatcher.cpp	Wed May 05 11:50:14 2021 +0200
@@ -0,0 +1,146 @@
+/**
+ * Orthanc - A Lightweight, RESTful DICOM Store
+ * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
+ * Department, University Hospital of Liege, Belgium
+ * Copyright (C) 2017-2021 Osimis S.A., Belgium
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **/
+
+
+#include "../PrecompiledHeaders.h"
+#include "CStringMatcher.h"
+
+#include "../OrthancException.h"
+
+#include <boost/algorithm/searching/boyer_moore.hpp>
+
+namespace Orthanc
+{
+  class CStringMatcher::Search : public boost::noncopyable
+  {
+  private:
+    typedef boost::algorithm::boyer_moore<const char*>  Algorithm;
+
+    Algorithm algorithm_;
+
+  public:
+    // WARNING - The lifetime of "pattern_" must be larger than
+    // "search_", as the latter internally keeps a pointer to "pattern" (*)
+    explicit Search(const std::string& pattern) :
+      algorithm_(pattern.c_str(), pattern.c_str() + pattern.size())
+    {
+    }
+
+    const char* Apply(const char* start,
+                      const char* end) const
+    {
+#if BOOST_VERSION >= 106200
+      return algorithm_(start, end).first;
+#else
+      return algorithm_(start, end);
+#endif
+    }
+  };
+
+
+
+  CStringMatcher::CStringMatcher(const std::string& pattern) :
+    pattern_(pattern),
+    valid_(false)
+  {
+    // WARNING - Don't use "pattern" (local variable, will be
+    // destroyed once exiting the constructor) but "pattern_"
+    // (variable member, will last as long as the algorithm),
+    // otherwise lifetime is bad! (*)
+    search_.reset(new Search(pattern_));
+  }
+
+  const std::string& CStringMatcher::GetPattern() const
+  {
+    return pattern_;
+  }
+
+  bool CStringMatcher::IsValid() const
+  {
+    return valid_;
+  }
+  
+
+  bool CStringMatcher::Apply(const char* start,
+                             const char* end)
+  {
+    assert(search_.get() != NULL);
+
+    if (start > end)
+    {
+      throw OrthancException(ErrorCode_ParameterOutOfRange);
+    }
+
+    matchBegin_ = search_->Apply(start, end);
+    
+    if (matchBegin_ == end)
+    {
+      valid_ = false;
+    }
+    else
+    {
+      matchEnd_ = matchBegin_ + pattern_.size();
+      assert(matchEnd_ <= end);
+      valid_ = true;
+    }
+
+    return valid_;
+  }
+
+  
+  bool CStringMatcher::Apply(const std::string& corpus)
+  {
+    if (corpus.empty())
+    {
+      return false;
+    }
+    else
+    {
+      return Apply(corpus.c_str(), corpus.c_str() + corpus.size());
+    }
+  }
+
+
+  const char* CStringMatcher::GetMatchBegin() const
+  {
+    if (valid_)
+    {
+      return matchBegin_;
+    }
+    else
+    {
+      throw OrthancException(ErrorCode_BadSequenceOfCalls);
+    }
+  }
+
+
+  const char* CStringMatcher::GetMatchEnd() const
+  {
+    if (valid_)
+    {
+      return matchEnd_;
+    }
+    else
+    {
+      throw OrthancException(ErrorCode_BadSequenceOfCalls);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/OrthancFramework/Sources/HttpServer/CStringMatcher.h	Wed May 05 11:50:14 2021 +0200
@@ -0,0 +1,62 @@
+/**
+ * Orthanc - A Lightweight, RESTful DICOM Store
+ * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
+ * Department, University Hospital of Liege, Belgium
+ * Copyright (C) 2017-2021 Osimis S.A., Belgium
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **/
+
+
+#pragma once
+
+#include "../OrthancFramework.h"
+
+#include <boost/noncopyable.hpp>
+#include <boost/shared_ptr.hpp>
+#include <string>
+
+namespace Orthanc
+{
+  // Convenience class that wraps a Boost algorithm for string matching
+  class ORTHANC_PUBLIC CStringMatcher : public boost::noncopyable
+  {
+  private:
+    class Search;
+      
+    boost::shared_ptr<Search>  search_;  // PImpl pattern
+    std::string                pattern_;
+    bool                       valid_;
+    const char*                matchBegin_;
+    const char*                matchEnd_;
+    
+  public:
+    explicit CStringMatcher(const std::string& pattern);
+
+    const std::string& GetPattern() const;
+
+    bool IsValid() const;
+
+    // "end" is non-inclusive
+    bool Apply(const char* start,
+               const char* end);
+
+    bool Apply(const std::string& corpus);
+
+    const char* GetMatchBegin() const;
+
+    const char* GetMatchEnd() const;
+  };
+}
--- a/OrthancFramework/Sources/HttpServer/MultipartStreamReader.cpp	Wed May 05 09:47:39 2021 +0200
+++ b/OrthancFramework/Sources/HttpServer/MultipartStreamReader.cpp	Wed May 05 11:50:14 2021 +0200
@@ -37,10 +37,11 @@
 namespace Orthanc
 {
   static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers,
-                           StringMatcher::Iterator start,
-                           StringMatcher::Iterator end)
+                           const char* start,
+                           const char* end /* exclusive */)
   {
-    std::string tmp(start, end);
+    assert(start <= end);
+    std::string tmp(start, end - start);
 
     std::vector<std::string> lines;
     Toolbox::TokenizeString(lines, tmp, '\n');
@@ -108,8 +109,13 @@
     std::string corpus;
     buffer_.Flatten(corpus);
 
-    StringMatcher::Iterator current = corpus.begin();
-    StringMatcher::Iterator corpusEnd = corpus.end();
+    if (corpus.empty())
+    {
+      return;
+    }
+
+    const char* current = corpus.c_str();
+    const char* corpusEnd = corpus.c_str() + corpus.size();
 
     if (state_ == State_UnusedArea)
     {
@@ -128,15 +134,18 @@
       else
       {
         // We have not seen the end of the unused area yet
-        buffer_.AddChunk(current, corpusEnd);
+        assert(current <= corpusEnd);
+        buffer_.AddChunk(current, corpusEnd - current);
         return;
       }          
     } 
       
     for (;;)
     {
+      assert(current <= corpusEnd);
+      
       size_t patternSize = boundaryMatcher_.GetPattern().size();
-      size_t remainingSize = std::distance(current, corpusEnd);
+      size_t remainingSize = corpusEnd - current;
       if (remainingSize < patternSize + 2)
       {
         break;  // Not enough data available
@@ -155,7 +164,7 @@
                                "Garbage between two items in a multipart stream");
       }
 
-      StringMatcher::Iterator start = current + patternSize + 2;
+      const char* start = current + patternSize + 2;
         
       if (!headersMatcher_.Apply(start, corpusEnd))
       {
@@ -170,7 +179,8 @@
       {
         if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd))
         {
-          size_t d = std::distance(headersMatcher_.GetMatchEnd(), boundaryMatcher_.GetMatchBegin());
+          assert(headersMatcher_.GetMatchEnd() <= boundaryMatcher_.GetMatchBegin());
+          size_t d = boundaryMatcher_.GetMatchBegin() - headersMatcher_.GetMatchEnd();
           if (d <= 1)
           {
             throw OrthancException(ErrorCode_NetworkProtocol);
@@ -186,14 +196,14 @@
         }
       }
 
-      // Explicit conversion to avoid warning about signed vs. unsigned comparison
-      std::iterator_traits<StringMatcher::Iterator>::difference_type d = contentLength + 2;
-      if (d > std::distance(headersMatcher_.GetMatchEnd(), corpusEnd))
+      // "static_cast<>" to avoid warning about signed vs. unsigned comparison
+      assert(headersMatcher_.GetMatchEnd() <= corpusEnd);
+      if (contentLength + 2 > static_cast<size_t>(corpusEnd - headersMatcher_.GetMatchEnd()))
       {
         break;  // Not enough data available to have a full part
       }
 
-      const char* p = headersMatcher_.GetPointerEnd() + contentLength;
+      const char* p = headersMatcher_.GetMatchEnd() + contentLength;
       if (p[0] != '\r' ||
           p[1] != '\n')
       {
@@ -201,13 +211,14 @@
                                "No endline at the end of a part");
       }
           
-      handler_->HandlePart(headers, headersMatcher_.GetPointerEnd(), contentLength);
+      handler_->HandlePart(headers, headersMatcher_.GetMatchEnd(), contentLength);
       current = headersMatcher_.GetMatchEnd() + contentLength + 2;
     }
 
     if (current != corpusEnd)
     {
-      buffer_.AddChunk(current, corpusEnd);
+      assert(current < corpusEnd);
+      buffer_.AddChunk(current, corpusEnd - current);
     }
   }
 
--- a/OrthancFramework/Sources/HttpServer/MultipartStreamReader.h	Wed May 05 09:47:39 2021 +0200
+++ b/OrthancFramework/Sources/HttpServer/MultipartStreamReader.h	Wed May 05 11:50:14 2021 +0200
@@ -22,7 +22,7 @@
 
 #pragma once
 
-#include "StringMatcher.h"
+#include "CStringMatcher.h"
 #include "../ChunkedBuffer.h"
 
 #include <map>
@@ -56,8 +56,8 @@
     
     State          state_;
     IHandler*      handler_;
-    StringMatcher  headersMatcher_;
-    StringMatcher  boundaryMatcher_;
+    CStringMatcher headersMatcher_;
+    CStringMatcher boundaryMatcher_;
     ChunkedBuffer  buffer_;
     size_t         blockSize_;
 
--- a/OrthancFramework/Sources/HttpServer/StringMatcher.cpp	Wed May 05 09:47:39 2021 +0200
+++ b/OrthancFramework/Sources/HttpServer/StringMatcher.cpp	Wed May 05 11:50:14 2021 +0200
@@ -31,7 +31,7 @@
 
 namespace Orthanc
 {
-  class StringMatcher::Search
+  class StringMatcher::Search : public boost::noncopyable
   {
   private:
     typedef boost::algorithm::boyer_moore<Iterator>  Algorithm;
@@ -70,7 +70,7 @@
     search_.reset(new Search(pattern_));
   }
 
-  const std::string &StringMatcher::GetPattern() const
+  const std::string& StringMatcher::GetPattern() const
   {
     return pattern_;
   }
@@ -101,7 +101,7 @@
     return valid_;
   }
 
-  bool StringMatcher::Apply(const std::string &corpus)
+  bool StringMatcher::Apply(const std::string& corpus)
   {
     return Apply(corpus.begin(), corpus.end());
   }
--- a/OrthancFramework/UnitTestsSources/RestApiTests.cpp	Wed May 05 09:47:39 2021 +0200
+++ b/OrthancFramework/UnitTestsSources/RestApiTests.cpp	Wed May 05 11:50:14 2021 +0200
@@ -31,6 +31,7 @@
 #include "../Sources/Compression/ZlibCompressor.h"
 #include "../Sources/HttpServer/HttpContentNegociation.h"
 #include "../Sources/HttpServer/MultipartStreamReader.h"
+#include "../Sources/HttpServer/StringMatcher.h"
 #include "../Sources/Logging.h"
 #include "../Sources/OrthancException.h"
 #include "../Sources/RestApi/RestApiHierarchy.h"
@@ -722,7 +723,6 @@
   {
     std::string s(10u, '\0');  // String with null values
     ASSERT_EQ(10u, s.size());
-    ASSERT_EQ(10u, s.size());
     ASSERT_FALSE(matcher.Apply(s));
 
     s[9] = '-';
@@ -741,6 +741,89 @@
 }
 
 
+TEST(CStringMatcher, Basic)
+{
+  CStringMatcher matcher("---");
+
+  ASSERT_THROW(matcher.GetMatchBegin(), OrthancException);
+
+  {
+    ASSERT_FALSE(matcher.Apply(NULL, 0));
+    
+    const std::string s = "";
+    ASSERT_FALSE(matcher.Apply(s));
+  }
+
+  {
+    const char* s = "abc---def";
+    ASSERT_TRUE(matcher.Apply(s, s + 9));
+    
+    ASSERT_EQ('a', matcher.GetMatchBegin()[-3]);
+    ASSERT_EQ('b', matcher.GetMatchBegin()[-2]);
+    ASSERT_EQ('c', matcher.GetMatchBegin()[-1]);
+    ASSERT_EQ('-', matcher.GetMatchBegin()[0]);
+    ASSERT_EQ('-', matcher.GetMatchBegin()[1]);
+    ASSERT_EQ('-', matcher.GetMatchBegin()[2]);
+    ASSERT_EQ('d', matcher.GetMatchBegin()[3]);
+    ASSERT_EQ('e', matcher.GetMatchBegin()[4]);
+    ASSERT_EQ('f', matcher.GetMatchBegin()[5]);
+    ASSERT_EQ('\0', matcher.GetMatchBegin()[6]);
+
+    ASSERT_EQ('a', matcher.GetMatchEnd()[-6]);
+    ASSERT_EQ('b', matcher.GetMatchEnd()[-5]);
+    ASSERT_EQ('c', matcher.GetMatchEnd()[-4]);
+    ASSERT_EQ('-', matcher.GetMatchEnd()[-3]);
+    ASSERT_EQ('-', matcher.GetMatchEnd()[-2]);
+    ASSERT_EQ('-', matcher.GetMatchEnd()[-1]);
+    ASSERT_EQ('d', matcher.GetMatchEnd()[0]);
+    ASSERT_EQ('e', matcher.GetMatchEnd()[1]);
+    ASSERT_EQ('f', matcher.GetMatchEnd()[2]);
+    ASSERT_EQ('\0', matcher.GetMatchEnd()[3]);
+  }
+
+  {
+    const std::string s = "abc----def";
+    ASSERT_TRUE(matcher.Apply(s));
+    ASSERT_EQ(3, std::distance(s.c_str(), matcher.GetMatchBegin()));
+    ASSERT_EQ("---", std::string(matcher.GetMatchBegin(), matcher.GetMatchEnd()));
+  }
+
+  {
+    const std::string s = "abc---";
+    ASSERT_TRUE(matcher.Apply(s));
+    ASSERT_EQ(3, std::distance(s.c_str(), matcher.GetMatchBegin()));
+    ASSERT_EQ(s.c_str() + s.size(), matcher.GetMatchEnd());
+    ASSERT_EQ("---", std::string(matcher.GetMatchBegin(), matcher.GetMatchEnd()));
+    ASSERT_EQ("", std::string(matcher.GetMatchEnd(), s.c_str() + s.size()));
+  }
+
+  {
+    const std::string s = "abc--def";
+    ASSERT_FALSE(matcher.Apply(s));
+    ASSERT_THROW(matcher.GetMatchBegin(), OrthancException);
+    ASSERT_THROW(matcher.GetMatchEnd(), OrthancException);
+  }
+
+  {
+    std::string s(10u, '\0');  // String with null values
+    ASSERT_EQ(10u, s.size());
+    ASSERT_FALSE(matcher.Apply(s));
+
+    s[9] = '-';
+    ASSERT_FALSE(matcher.Apply(s));
+
+    s[8] = '-';
+    ASSERT_FALSE(matcher.Apply(s));
+
+    s[7] = '-';
+    ASSERT_TRUE(matcher.Apply(s));
+    ASSERT_EQ(s.c_str() + 7, matcher.GetMatchBegin());
+    ASSERT_EQ(s.c_str() + 10, matcher.GetMatchEnd());
+    ASSERT_EQ(s.c_str() + s.size() - 3, matcher.GetMatchBegin());
+    ASSERT_EQ(s.c_str() + s.size(), matcher.GetMatchEnd());
+  }
+}
+
 
 class MultipartTester : public MultipartStreamReader::IHandler
 {