changeset 271:932f8d384a15

cont
author Sebastien Jodogne <s.jodogne@gmail.com>
date Sat, 01 Jun 2019 17:00:23 +0200
parents f5abf0b4211a
children 775ad0dd5859
files UnitTestsSources/UnitTestsMain.cpp
diffstat 1 files changed, 337 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/UnitTestsSources/UnitTestsMain.cpp	Fri May 31 17:38:22 2019 +0200
+++ b/UnitTestsSources/UnitTestsMain.cpp	Sat Jun 01 17:00:23 2019 +0200
@@ -61,18 +61,222 @@
 
 
 #include <Core/ChunkedBuffer.h>
+#include <Core/OrthancException.h>
+#include <Core/SystemToolbox.h>
 #include <Core/Toolbox.h>
-#include <Core/SystemToolbox.h>
+#include <boost/algorithm/searching/boyer_moore.hpp>
+#include <boost/algorithm/searching/boyer_moore_horspool.hpp>
+#include <boost/algorithm/searching/knuth_morris_pratt.hpp>
 #include <boost/date_time/posix_time/posix_time.hpp>
-#include <boost/algorithm/searching/boyer_moore.hpp>
 #include <boost/version.hpp>
 
+
+namespace Orthanc
+{
+  class MultipartStreamParser : public boost::noncopyable
+  {
+  public:
+    class IHandler : public boost::noncopyable
+    {
+    public:
+      virtual ~IHandler()
+      {
+      }
+      
+      virtual void Handle(const std::string& part) = 0;
+    };
+    
+    
+  private:
+    typedef boost::algorithm::boyer_moore<std::string::const_iterator>  Search;
+    //typedef boost::algorithm::boyer_moore_horspool<std::string::const_iterator>  Search;
+    //typedef boost::algorithm::knuth_morris_pratt<std::string::const_iterator>  Search;
+
+    IHandler*              handler_;
+    std::auto_ptr<Search>  search_;
+    std::string            pattern_;
+    ChunkedBuffer          buffer_;
+    size_t                 blockSize_;
+
+
+    void ParsePart(const char* part,
+                   size_t size)
+    {
+      printf("%d \n", size);
+      
+      // TODO - Parse headers
+      //handler_->Handle(part);
+    }
+    
+
+    void ParseStream()
+    {
+      printf("."); fflush(stdout);
+      if (search_.get() == NULL ||
+          handler_ == NULL)
+      {
+        return;
+      }
+      
+      std::string corpus;
+      buffer_.Flatten(corpus);
+
+      std::string::iterator previous = corpus.end();
+
+#if BOOST_VERSION >= 106200
+      std::string::iterator current = (*search_) (corpus.begin(), corpus.end()).first;
+#else
+      std::string::iterator current = (*search_) (corpus.begin(), corpus.end());
+#endif
+
+      while (current != corpus.end())
+      {
+        if (previous == corpus.end() &&
+            std::distance(current, corpus.begin()) != 0)
+        {
+          // TODO - There is heading garbage! => Decide what to do!
+          throw OrthancException(ErrorCode_NetworkProtocol);
+        }
+        
+        if (previous != corpus.end())
+        {
+          std::string::iterator start = previous + pattern_.size();
+          size_t size = std::distance(start, current);
+
+          if (size > 0)
+          {
+            ParsePart(&start[0], size);
+          }
+        }
+
+        previous = current;
+        current += pattern_.size();
+        
+#if BOOST_VERSION >= 106200
+        current = (*search_) (current, corpus.end()).first;
+#else
+        current = (*search_) (current, corpus.end());
+#endif
+      }
+
+      if (previous == corpus.end())
+      {
+        // No part found, recycle the entire corpus for next iteration
+        buffer_.AddChunk(corpus);
+      }
+      else
+      {
+        std::string reminder(previous, corpus.end());
+        buffer_.AddChunk(reminder);
+      }
+    }
+
+
+  public:
+    MultipartStreamParser() :
+      handler_(NULL),
+      blockSize_(10 * 1024 * 1024)
+    {
+    }
+
+    void SetBlockSize(size_t size)
+    {
+      if (size == 0)
+      {
+        throw OrthancException(ErrorCode_ParameterOutOfRange);
+      }
+      else
+      {
+        blockSize_ = size;
+      }        
+    }
+
+    size_t GetBlockSize() const
+    {
+      return blockSize_;
+    }
+
+    void SetHandler(IHandler& handler)
+    {
+      handler_ = &handler;
+    }
+    
+    void SetSeparator(const std::string& separator)
+    {
+      pattern_ = "--" + separator;
+      search_.reset(new Search(pattern_.begin(), pattern_.end()));
+    }
+    
+    void AddChunk(const void* chunk,
+                  size_t size)
+    {
+      if (size != 0)
+      {
+        size_t oldSize = buffer_.GetNumBytes();
+      
+        buffer_.AddChunk(chunk, size);
+
+        if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_)
+        {
+          ParseStream();
+        }
+      }
+    }
+
+    void AddChunk(const std::string& chunk)
+    {
+      if (!chunk.empty())
+      {
+        AddChunk(chunk.c_str(), chunk.size());
+      }
+    }
+
+    void CloseStream()
+    {
+      if (buffer_.GetNumBytes() != 0)
+      {
+        ParseStream();
+      }
+
+      std::string tmp;
+      buffer_.Flatten(tmp);
+      printf("Reminder: [%s]\n", tmp.c_str());
+    }
+  };
+
+
+  class Toto : public MultipartStreamParser::IHandler
+  {
+  private:
+    unsigned int count_;
+    
+  public:
+    Toto() : count_(0)
+    {
+    }
+    
+    virtual void Handle(const std::string& part)
+    {
+      //printf(">> %d\n", part.size());
+      count_++;
+    }
+
+    unsigned int GetCount() const
+    {
+      return count_;
+    }
+  };
+}
+
+
+
 TEST(Multipart, Optimization)
 {
-  std::string separator = Orthanc::Toolbox::GenerateUuid();
+  std::string separator = "123456789123456789";
 
   std::string corpus;
 
+  if (1)
   {
     std::string f;
     f.resize(512*512*2);
@@ -81,7 +285,7 @@
   
     Orthanc::ChunkedBuffer buffer;
 
-    for (size_t i = 0; i < 100; i++)
+    for (size_t i = 0; i < 10; i++)
     {
       std::string s = "--" + separator + "\r\n\r\n\r\n";
 
@@ -94,55 +298,144 @@
 
     buffer.AddChunk("\r\n--" + separator + "--");
     buffer.Flatten(corpus);
+
+    Orthanc::SystemToolbox::WriteFile(corpus, "tutu");
   }
+  else
+  {
+    Orthanc::SystemToolbox::ReadFile(corpus, "tutu");
+  }
+
+  if (1)
+  {
+    boost::posix_time::ptime start = boost::posix_time::microsec_clock::local_time();
+
+    {
+      std::vector<OrthancPlugins::MultipartItem> items;
+      OrthancPlugins::ParseMultipartBody(items, corpus.c_str(), corpus.size(), separator);
+      printf(">> %d\n", (int) items.size());
+    }
+
+    boost::posix_time::ptime end = boost::posix_time::microsec_clock::local_time();
+
+    printf("Parsing 1: %d ms\n", (int) (end - start).total_milliseconds());
+  }
+
+  if (0)
+  {
+    boost::posix_time::ptime start = boost::posix_time::microsec_clock::local_time();
+
+    {
+      std::string pattern("--" + separator + "\r\n");
+
+      boost::algorithm::boyer_moore<std::string::const_iterator>
+        search(pattern.begin(), pattern.end());
+
+#if BOOST_VERSION >= 106200
+      std::string::iterator it = search(corpus.begin(), corpus.end()).first;
+#else
+      std::string::iterator it = search(corpus.begin(), corpus.end());
+#endif
+
+      unsigned int c = 0;
+      while (it != corpus.end())
+      {
+        std::string t(it, it + pattern.size());
+        //printf("[%s]\n", t.c_str());
+      
+        c++;
+      
+#if BOOST_VERSION >= 106200
+        it = search(std::next(it, pattern.size()), corpus.end()).first;
+#else
+        it = search(std::next(it, pattern.size()), corpus.end());
+#endif
+      }
+
+      printf("count: %d\n", c);
+    }
+
+    boost::posix_time::ptime end = boost::posix_time::microsec_clock::local_time();
+
+    printf("Parsing 2: %d ms\n", (int) (end - start).total_milliseconds());
+  }
+
+  if (1)
+  {
+    boost::posix_time::ptime start = boost::posix_time::microsec_clock::local_time();
+
+    {
+      Orthanc::Toto toto;
+
+      Orthanc::MultipartStreamParser parser;
+
+      //parser.SetBlockSize(127);
+      parser.SetSeparator(separator);
+      parser.SetHandler(toto);
+
+#if 1
+      size_t bs = corpus.size() / 101;
+
+      const char* pos = corpus.c_str();
+      for (size_t i = 0; i < corpus.size() / bs; i++, pos += bs)
+      {
+        parser.AddChunk(pos, bs);
+      }
+
+      parser.AddChunk(pos, corpus.size() % bs);
+#else
+      parser.AddChunk(corpus);
+#endif
+
+      parser.CloseStream();
+
+      printf("%d\n", toto.GetCount());
+    }
+
+    boost::posix_time::ptime end = boost::posix_time::microsec_clock::local_time();
+
+    printf("Parsing 3: %d ms\n", (int) (end - start).total_milliseconds());
+  }
+}
+
+
+
+TEST(Multipart, Optimization2)
+{
+  std::string separator = "123456789123456789";
+
+  std::string f;
+  f.resize(512*512*2);
+  for (size_t i = 0; i < f.size(); i++)
+    f[i] = i % 256;
   
+
   boost::posix_time::ptime start = boost::posix_time::microsec_clock::local_time();
 
   {
-    std::vector<OrthancPlugins::MultipartItem> items;
-    OrthancPlugins::ParseMultipartBody(items, corpus.c_str(), corpus.size(), separator);
-    printf(">> %d\n", (int) items.size());
+    Orthanc::Toto toto;
+
+    Orthanc::MultipartStreamParser parser;
+
+      //parser.SetBlockSize(127);
+    parser.SetSeparator(separator);
+    parser.SetHandler(toto);
+
+    for (size_t i = 0; i < 10; i++)
+    {
+      parser.AddChunk("--" + separator + "\r\n\r\n");
+      parser.AddChunk(f);
+    }
+
+    parser.AddChunk("--" + separator + "--");
+    parser.CloseStream();
+    
+    printf("%d\n", toto.GetCount());
   }
 
   boost::posix_time::ptime end = boost::posix_time::microsec_clock::local_time();
 
-  printf("Parsing 1: %d ms\n", (int) (end - start).total_milliseconds());
-
-  start = boost::posix_time::microsec_clock::local_time();
-
-  {
-    std::string pattern("--" + separator + "\r\n");
-
-    boost::algorithm::boyer_moore<std::string::const_iterator>
-      search(pattern.begin(), pattern.end());
-
-#if BOOST_VERSION >= 106200
-    std::string::iterator it = search(corpus.begin(), corpus.end()).first;
-#else
-    std::string::iterator it = search(corpus.begin(), corpus.end());
-#endif
-
-    unsigned int c = 0;
-    while (it != corpus.end())
-    {
-      std::string t(it, it + pattern.size());
-      printf("[%s]\n", t.c_str());
-      
-      c++;
-      
-#if BOOST_VERSION >= 106200
-      it = search(std::next(it, pattern.size()), corpus.end()).first;
-#else
-      it = search(std::next(it, pattern.size()), corpus.end());
-#endif
-    }
-
-    printf("count: %d\n", c);
-  }
-
-  end = boost::posix_time::microsec_clock::local_time();
-
-  printf("Parsing 2: %d ms\n", (int) (end - start).total_milliseconds());
+  printf("Parsing: %d ms\n", (int) (end - start).total_milliseconds());
 }