# HG changeset patch # User Sebastien Jodogne # Date 1559554497 -7200 # Node ID c0e41608bed3c13af1985d3b0f0abf4d18e21a0e # Parent 8853ca513c1ee647e6c47c5a74076bcbf6615f95 new class: StringMatcher diff -r 8853ca513c1e -r c0e41608bed3 UnitTestsSources/UnitTestsMain.cpp --- a/UnitTestsSources/UnitTestsMain.cpp Mon Jun 03 10:38:05 2019 +0200 +++ b/UnitTestsSources/UnitTestsMain.cpp Mon Jun 03 11:34:57 2019 +0200 @@ -73,6 +73,96 @@ namespace Orthanc { + // Convenience class that wraps a Boost algorithm for string matching + class StringMatcher : public boost::noncopyable + { + public: + typedef std::string::const_iterator Iterator; + + private: + typedef boost::algorithm::boyer_moore Search; + //typedef boost::algorithm::boyer_moore_horspool Search; + + // WARNING - The lifetime of "pattern_" must be larger than + // "search_", as the latter references "pattern_" + std::string pattern_; + Search search_; + bool valid_; + Iterator matchBegin_; + Iterator matchEnd_; + + public: + StringMatcher(const std::string& pattern) : + pattern_(pattern), + search_(pattern_.begin(), pattern_.end()), + valid_(false) + { + } + + size_t GetPatternSize() const + { + return pattern_.size(); + } + + bool IsValid() const + { + return valid_; + } + + bool Apply(Iterator start, + Iterator end) + { +#if BOOST_VERSION >= 106200 + matchBegin_ = search_(start, end).first; +#else + matchBegin_ = search_(start, end); +#endif + + if (matchBegin_ == end) + { + valid_ = false; + } + else + { + matchEnd_ = matchBegin_ + pattern_.size(); + assert(matchEnd_ <= end); + valid_ = true; + } + + return valid_; + } + + bool Apply(const std::string& corpus) + { + return Apply(corpus.begin(), corpus.end()); + } + + Iterator GetMatchBegin() const + { + if (valid_) + { + return matchBegin_; + } + else + { + throw OrthancException(ErrorCode_BadSequenceOfCalls); + } + } + + Iterator GetMatchEnd() const + { + if (valid_) + { + return matchEnd_; + } + else + { + throw OrthancException(ErrorCode_BadSequenceOfCalls); + } + } + }; + + class MultipartStreamParser : public boost::noncopyable { public: @@ -94,9 +184,9 @@ //typedef boost::algorithm::knuth_morris_pratt Search; IHandler* handler_; - std::auto_ptr searchHeadersEnd_; - std::auto_ptr searchPattern_; - std::string pattern_; + StringMatcher searchHeadersEnd_; + std::auto_ptr patternMatcher_; + //std::auto_ptr patternMatcher_; ChunkedBuffer buffer_; size_t blockSize_; @@ -104,26 +194,31 @@ void ParsePart(std::string::const_iterator start, std::string::const_iterator end) { -#if BOOST_VERSION >= 106200 - std::string::const_iterator pos = (*searchHeadersEnd_) (start, end).first; -#else - std::string::const_iterator pos = (*searchHeadersEnd_) (start, end); -#endif + searchHeadersEnd_.Apply(start, end); - std::string s(start, pos); - printf("[%s]\n", s.c_str()); +#if 0 + if (searchHeadersEnd_.GetIterator() != end) + { + std::string s(start, searchHeadersEnd_.GetIterator()); + printf("[%s]\n", s.c_str()); + + //std::map headers; + //std::string part(searchHeadersEnd_.GetIterator(), end); + //std::string part; + //handler_->Handle(headers, part); + } //printf("%d \n", size); // TODO - Parse headers - //handler_->Handle(part); +#endif } void ParseStream() { printf("."); fflush(stdout); - if (searchPattern_.get() == NULL || + if (patternMatcher_.get() == NULL || handler_ == NULL) { return; @@ -132,12 +227,25 @@ std::string corpus; buffer_.Flatten(corpus); + printf("------------------------------\n"); fflush(stdout); + + StringMatcher::Iterator start = corpus.begin(); + + while (patternMatcher_->Apply(start, corpus.end())) + { + std::string s(patternMatcher_->GetMatchBegin(), patternMatcher_->GetMatchEnd()); + printf("ICI [%s]\n", s.c_str()); + + start = patternMatcher_->GetMatchEnd(); + } + +#if 0 std::string::const_iterator previous = corpus.end(); #if BOOST_VERSION >= 106200 - std::string::const_iterator current = (*searchPattern_) (corpus.begin(), corpus.end()).first; + std::string::const_iterator current = (*patternMatcher_) (corpus.begin(), corpus.end()).first; #else - std::string::const_iterator current = (*searchPattern_) (corpus.begin(), corpus.end()); + std::string::const_iterator current = (*patternMatcher_) (corpus.begin(), corpus.end()); #endif while (current != corpus.end()) @@ -164,9 +272,9 @@ current += pattern_.size(); #if BOOST_VERSION >= 106200 - current = (*searchPattern_) (current, reinterpret_cast(corpus).end()).first; + current = (*patternMatcher_) (current, reinterpret_cast(corpus).end()).first; #else - current = (*searchPattern_) (current, reinterpret_cast(corpus).end()); + current = (*patternMatcher_) (current, reinterpret_cast(corpus).end()); #endif } @@ -180,16 +288,16 @@ std::string reminder(previous, reinterpret_cast(corpus).end()); buffer_.AddChunkDestructive(reminder); } +#endif } public: MultipartStreamParser() : handler_(NULL), + searchHeadersEnd_("\r\n\r\n"), blockSize_(10 * 1024 * 1024) { - const std::string s = "\r\n\r\n"; - searchHeadersEnd_.reset(new Search(s.begin(), s.end())); } void SetBlockSize(size_t size) @@ -216,8 +324,7 @@ void SetSeparator(const std::string& separator) { - pattern_ = "--" + separator; - searchPattern_.reset(new Search(pattern_.begin(), pattern_.end())); + patternMatcher_.reset(new StringMatcher("--" + separator)); } void AddChunk(const void* chunk, @@ -419,9 +526,10 @@ std::string separator = "123456789123456789"; std::string f; - f.resize(512*512*2); + /*f.resize(512*512*2); for (size_t i = 0; i < f.size(); i++) - f[i] = i % 256; + f[i] = i % 256;*/ + f = "hello"; boost::posix_time::ptime start = boost::posix_time::microsec_clock::local_time(); @@ -435,7 +543,7 @@ parser.SetSeparator(separator); parser.SetHandler(toto); - for (size_t i = 0; i < 100; i++) + for (size_t i = 0; i < 2; i++) { parser.AddChunk("--" + separator + "\r\n"); parser.AddChunk("Content-Type: toto\r\n"); @@ -455,6 +563,37 @@ } +TEST(StringMatcher, Basic) +{ + Orthanc::StringMatcher matcher("---"); + + ASSERT_THROW(matcher.GetMatchBegin(), Orthanc::OrthancException); + + { + const std::string s = "abc----def"; + ASSERT_TRUE(matcher.Apply(s)); + ASSERT_EQ(3, std::distance(s.begin(), matcher.GetMatchBegin())); + ASSERT_EQ("---", std::string(matcher.GetMatchBegin(), matcher.GetMatchEnd())); + } + + { + const std::string s = "abc---"; + ASSERT_TRUE(matcher.Apply(s)); + ASSERT_EQ(3, std::distance(s.begin(), matcher.GetMatchBegin())); + ASSERT_EQ(s.end(), matcher.GetMatchEnd()); + ASSERT_EQ("---", std::string(matcher.GetMatchBegin(), matcher.GetMatchEnd())); + ASSERT_EQ("", std::string(matcher.GetMatchEnd(), s.end())); + } + + { + const std::string s = "abc--def"; + ASSERT_FALSE(matcher.Apply(s)); + ASSERT_THROW(matcher.GetMatchBegin(), Orthanc::OrthancException); + ASSERT_THROW(matcher.GetMatchEnd(), Orthanc::OrthancException); + } +} + + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv);