changeset 162:4917ca9c0e30

Improved robustness in the STOW-RS server
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 02 Nov 2016 17:35:05 +0100
parents 4f543f8b9b5d
children 0eb221cc085f
files NEWS Plugin/Configuration.cpp Plugin/StowRs.cpp Resources/Samples/Python/SendStow.py
diffstat 4 files changed, 147 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Fri Sep 16 09:20:33 2016 +0200
+++ b/NEWS	Wed Nov 02 17:35:05 2016 +0100
@@ -1,6 +1,7 @@
 Pending changes in the mainline
 ===============================
 
+* Improved robustness in the STOW-RS server (occurrences of "\r\n\r\n" in DICOM are supported)
 * Performance warning if runtime debug assertions are turned on
 
 
--- a/Plugin/Configuration.cpp	Fri Sep 16 09:20:33 2016 +0200
+++ b/Plugin/Configuration.cpp	Wed Nov 02 17:35:05 2016 +0100
@@ -83,6 +83,120 @@
   }
 
 
+  static const boost::regex MULTIPART_HEADERS_ENDING("(.*?\r\n)\r\n(.*)");
+  static const boost::regex MULTIPART_HEADERS_LINE(".*?\r\n");
+
+  static void ParseMultipartHeaders(bool& hasLength /* out */,
+                                    size_t& length /* out */,
+                                    std::string& contentType /* out */,
+                                    OrthancPluginContext* context,
+                                    const char* startHeaders,
+                                    const char* endHeaders)
+  {
+    hasLength = false;
+    contentType = "application/octet-stream";
+
+    // Loop over the HTTP headers of this multipart item
+    boost::cregex_token_iterator it(startHeaders, endHeaders, MULTIPART_HEADERS_LINE, 0);
+    boost::cregex_token_iterator iteratorEnd;
+
+    for (; it != iteratorEnd; ++it)
+    {
+      const std::string line(*it);
+      size_t colon = line.find(':');
+      size_t eol = line.find('\r');
+
+      if (colon != std::string::npos &&
+          eol != std::string::npos &&
+          colon < eol &&
+          eol + 2 == line.length())
+      {
+        std::string key = Orthanc::Toolbox::StripSpaces(line.substr(0, colon));
+        Orthanc::Toolbox::ToLowerCase(key);
+
+        const std::string value = Orthanc::Toolbox::StripSpaces(line.substr(colon + 1, eol - colon - 1));
+
+        if (key == "content-length")
+        {
+          try
+          {
+            int tmp = boost::lexical_cast<int>(value);
+            if (tmp >= 0)
+            {
+              hasLength = true;
+              length = tmp;
+            }
+          }
+          catch (boost::bad_lexical_cast&)
+          {
+            OrthancPluginLogWarning(context, "Unable to parse the Content-Length of a multipart item");
+          }
+        }
+        else if (key == "content-type")
+        {
+          contentType = value;
+        }
+      }
+    }
+  }
+
+
+  static const char* ParseMultipartItem(std::vector<MultipartItem>& result,
+                                        OrthancPluginContext* context,
+                                        const char* start,
+                                        const char* end,
+                                        const boost::regex& nextSeparator)
+  {
+    // Just before "start", it is guaranteed that "--[BOUNDARY]\r\n" is present
+
+    boost::cmatch what;
+    if (!boost::regex_match(start, end, what, MULTIPART_HEADERS_ENDING, boost::match_perl))
+    {
+      // Cannot find the HTTP headers of this multipart item
+      throw OrthancPlugins::PluginException(OrthancPluginErrorCode_NetworkProtocol);
+    }
+
+    // Some aliases for more clarity
+    assert(what[1].first == start);
+    const char* startHeaders = what[1].first;
+    const char* endHeaders = what[1].second;
+    const char* startBody = what[2].first;
+
+    bool hasLength;
+    size_t length;
+    std::string contentType;
+    ParseMultipartHeaders(hasLength, length, contentType, context, startHeaders, endHeaders);
+
+    boost::cmatch separator;
+
+    if (hasLength)
+    {
+      if (!boost::regex_match(startBody + length, end, separator, nextSeparator, boost::match_perl) ||
+          startBody + length != separator[1].first)
+      {
+        // Cannot find the separator after skipping the "Content-Length" bytes
+        throw OrthancPlugins::PluginException(OrthancPluginErrorCode_NetworkProtocol);
+      }
+    }
+    else
+    {
+      if (!boost::regex_match(startBody, end, separator, nextSeparator, boost::match_perl))
+      {
+        // No more occurrence of the boundary separator
+        throw OrthancPlugins::PluginException(OrthancPluginErrorCode_NetworkProtocol);
+      }
+    }
+
+    MultipartItem item;
+    item.data_ = startBody;
+    item.size_ = separator[1].first - startBody;
+    item.contentType_ = contentType;
+    result.push_back(item);
+
+    return separator[1].second;  // Return the end of the separator
+  }
+
+
   void ParseMultipartBody(std::vector<MultipartItem>& result,
                           OrthancPluginContext* context,
                           const char* body,
@@ -94,96 +208,38 @@
 
     result.clear();
 
-    const boost::regex separator("(^|\r\n)--" + boundary + "(--|\r\n)");
-    const boost::regex encapsulation("(.*)\r\n\r\n(.*)");
- 
-    std::vector< std::pair<const char*, const char*> > parts;
-    
-    const char* start = body;
+    // Look for the first boundary separator in the body (note the "?"
+    // to request non-greedy search)
+    const boost::regex firstSeparator1("--" + boundary + "(--|\r\n).*");
+    const boost::regex firstSeparator2(".*?\r\n--" + boundary + "(--|\r\n).*");
+
+    // Look for the next boundary separator in the body (note the "?"
+    // to request non-greedy search)
+    const boost::regex nextSeparator(".*?(\r\n--" + boundary + ").*");
+
     const char* end = body + bodySize;
 
     boost::cmatch what;
-    boost::match_flag_type flags = boost::match_perl | boost::match_single_line;
-    while (boost::regex_search(start, end, what, separator, flags))   
+    if (boost::regex_match(body, end, what, firstSeparator1, boost::match_perl | boost::match_single_line) ||
+        boost::regex_match(body, end, what, firstSeparator2, boost::match_perl | boost::match_single_line))
     {
-      if (start != body)  // Ignore the first separator
-      {
-        parts.push_back(std::make_pair(start, what[0].first));
-      }
-
-      if (*what[2].first == '-')
-      {
-        // This is the last separator (there is a trailing "--")
-        break;
-      }
-
-      start = what[0].second;
-      flags |= boost::match_prev_avail;
-    }
+      const char* current = what[1].first;
 
-    for (size_t i = 0; i < parts.size(); i++)
-    {
-      if (boost::regex_match(parts[i].first, parts[i].second, what, encapsulation, boost::match_perl))
+      while (current != NULL &&
+             current + 2 < end)
       {
-        size_t dicomSize = what[2].second - what[2].first;
-
-        std::string contentType = "application/octet-stream";
-        std::vector<std::string> headers;
-
-        {
-          std::string tmp;
-          tmp.assign(what[1].first, what[1].second);
-          Orthanc::Toolbox::TokenizeString(headers, tmp, '\n');
-        }
-
-        bool valid = true;
-
-        for (size_t j = 0; j < headers.size(); j++)
+        if (current[0] != '\r' ||
+            current[1] != '\n')
         {
-          std::vector<std::string> tokens;
-          Orthanc::Toolbox::TokenizeString(tokens, headers[j], ':');
-
-          if (tokens.size() == 2)
-          {
-            std::string key = Orthanc::Toolbox::StripSpaces(tokens[0]);
-            std::string value = Orthanc::Toolbox::StripSpaces(tokens[1]);
-            Orthanc::Toolbox::ToLowerCase(key);
-
-            if (key == "content-type")
-            {
-              contentType = value;
-            }
-            else if (key == "content-length")
-            {
-              try
-              {
-                size_t s = boost::lexical_cast<size_t>(value);
-                if (s != dicomSize)
-                {
-                  valid = false;
-                }
-              }
-              catch (boost::bad_lexical_cast&)
-              {
-                valid = false;
-              }
-            }
-          }
-        }
-
-        if (valid)
-        {
-          MultipartItem item;
-          item.data_ = what[2].first;
-          item.size_ = dicomSize;
-          item.contentType_ = contentType;
-          result.push_back(item);          
+          // We reached a separator with a trailing "--", which
+          // means that reading the multipart body is done
+          break;
         }
         else
         {
-          OrthancPluginLogWarning(context, "Ignoring a badly-formatted item in a multipart body");
+          current = ParseMultipartItem(result, context, current + 2, end, nextSeparator);
         }
-      }      
+      }
     }
   }
 
--- a/Plugin/StowRs.cpp	Fri Sep 16 09:20:33 2016 +0200
+++ b/Plugin/StowRs.cpp	Wed Nov 02 17:35:05 2016 +0100
@@ -153,6 +153,12 @@
   std::vector<OrthancPlugins::MultipartItem> items;
   OrthancPlugins::ParseMultipartBody(items, context, request->body, request->bodySize, boundary);
 
+  for (size_t i = 0; i < items.size(); i++)
+  {
+    OrthancPlugins::Configuration::LogInfo("Detected multipart item with content type \"" + 
+                                           items[i].contentType_ + "\" of size " + 
+                                           boost::lexical_cast<std::string>(items[i].size_));
+  }  
 
   for (size_t i = 0; i < items.size(); i++)
   {
--- a/Resources/Samples/Python/SendStow.py	Fri Sep 16 09:20:33 2016 +0200
+++ b/Resources/Samples/Python/SendStow.py	Wed Nov 02 17:35:05 2016 +0100
@@ -30,7 +30,8 @@
 import json
 import uuid
 
-if len(sys.argv) < 2:
+#if len(sys.argv) < 2:
+if len(sys.argv) < 1:
     print('Usage: %s <StowUri> <file>...' % sys.argv[0])
     print('')
     print('Example: %s http://localhost:8042/dicom-web/studies hello.dcm world.dcm' % sys.argv[0])
@@ -45,9 +46,11 @@
 for i in range(2, len(sys.argv)):
     try:
         with open(sys.argv[i], 'rb') as f:
+            content = f.read()
             body += bytearray('--%s\r\n' % boundary, 'ascii')
+            body += bytearray('Content-Length: %d\r\n' % len(content), 'ascii')
             body += bytearray('Content-Type: application/dicom\r\n\r\n', 'ascii')
-            body += f.read()
+            body += content
             body += bytearray('\r\n', 'ascii')
     except:
         print('Ignoring directory %s' % sys.argv[i])