changeset 247:c9b3ba0fd140

path management in zip files
author Sebastien Jodogne <s.jodogne@gmail.com>
date Tue, 04 Dec 2012 17:27:23 +0100
parents fe6ba20d00a8
children 2442033911d6
files CMakeLists.txt Core/Compression/HierarchicalZipWriter.cpp Core/Compression/HierarchicalZipWriter.h Core/Compression/ZipWriter.h Core/Toolbox.cpp Core/Toolbox.h UnitTests/Zip.cpp UnitTests/main.cpp
diffstat 8 files changed, 427 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/CMakeLists.txt	Fri Nov 30 17:09:52 2012 +0100
+++ b/CMakeLists.txt	Tue Dec 04 17:27:23 2012 +0100
@@ -97,6 +97,7 @@
   Core/Compression/BufferCompressor.cpp
   Core/Compression/ZlibCompressor.cpp
   Core/Compression/ZipWriter.cpp
+  Core/Compression/HierarchicalZipWriter.cpp
   Core/OrthancException.cpp
   Core/DicomFormat/DicomArray.cpp
   Core/DicomFormat/DicomMap.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Core/Compression/HierarchicalZipWriter.cpp	Tue Dec 04 17:27:23 2012 +0100
@@ -0,0 +1,174 @@
+/**
+ * Orthanc - A Lightweight, RESTful DICOM Store
+ * Copyright (C) 2012 Medical Physics Department, CHU of Liege,
+ * Belgium
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * In addition, as a special exception, the copyright holders of this
+ * program give permission to link the code of its release with the
+ * OpenSSL project's "OpenSSL" library (or with modified versions of it
+ * that use the same license as the "OpenSSL" library), and distribute
+ * the linked executables. You must obey the GNU General Public License
+ * in all respects for all of the code used other than "OpenSSL". If you
+ * modify file(s) with this exception, you may extend this exception to
+ * your version of the file(s), but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files
+ * in the program, then also delete it here.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ **/
+
+
+#include "HierarchicalZipWriter.h"
+
+#include "../Toolbox.h"
+#include "../OrthancException.h"
+
+#include <boost/lexical_cast.hpp>
+
+namespace Orthanc
+{
+  std::string HierarchicalZipWriter::Index::KeepAlphanumeric(const std::string& source)
+  {
+    std::string result;
+
+    bool lastSpace = false;
+
+    result.reserve(source.size());
+    for (size_t i = 0; i < source.size(); i++)
+    {
+      if (source[i] < 128 && 
+          source[i] >= 0)
+      {
+        if (isspace(source[i])) 
+        {
+          if (!lastSpace)
+          {
+            lastSpace = true;
+            result.push_back(' ');
+          }
+        }
+        else if (isalnum(source[i]))
+        {
+          result.push_back(source[i]);
+          lastSpace = false;
+        }
+      }
+    }
+
+    return Toolbox::StripSpaces(result);
+  }
+
+  std::string HierarchicalZipWriter::Index::GetCurrentDirectoryPath() const
+  {
+    std::string result;
+
+    Stack::const_iterator it = stack_.begin();
+    it++;  // Skip the root node (to avoid absolute paths)
+
+    while (it != stack_.end())
+    {
+      result += (*it)->name_ + "/";
+      it++;
+    }
+
+    return result;
+  }
+
+  std::string HierarchicalZipWriter::Index::EnsureUniqueFilename(const char* filename)
+  {
+    std::string standardized = KeepAlphanumeric(filename);
+
+    Directory& d = *stack_.back();
+    Directory::Content::iterator it = d.content_.find(standardized);
+
+    if (it == d.content_.end())
+    {
+      d.content_[standardized] = 1;
+      return standardized;
+    }
+    else
+    {
+      it->second++;
+      return standardized + "-" + boost::lexical_cast<std::string>(it->second);
+    }    
+  }
+
+  HierarchicalZipWriter::Index::Index()
+  {
+    stack_.push_back(new Directory);
+  }
+
+  HierarchicalZipWriter::Index::~Index()
+  {
+    for (Stack::iterator it = stack_.begin(); it != stack_.end(); it++)
+    {
+      delete *it;
+    }
+  }
+
+  std::string HierarchicalZipWriter::Index::CreateFile(const char* name)
+  {
+    return GetCurrentDirectoryPath() + EnsureUniqueFilename(name);
+  }
+
+  void HierarchicalZipWriter::Index::CreateDirectory(const char* name)
+  {
+    std::string d = EnsureUniqueFilename(name);
+
+    // Push the new directory onto the stack
+    stack_.push_back(new Directory);
+    stack_.back()->name_ = d;
+  }
+
+  void HierarchicalZipWriter::Index::CloseDirectory()
+  {
+    if (IsRoot())
+    {
+      // Cannot close the root node
+      throw OrthancException(ErrorCode_BadSequenceOfCalls);
+    }
+
+    delete stack_.back();
+    stack_.pop_back();
+  }
+
+
+  HierarchicalZipWriter::HierarchicalZipWriter(const char* path)
+  {
+    writer_.SetOutputPath(path);
+    writer_.Open();
+  }
+
+  HierarchicalZipWriter::~HierarchicalZipWriter()
+  {
+    writer_.Close();
+  }
+
+  void HierarchicalZipWriter::CreateFile(const char* name)
+  {
+    std::string p = indexer_.CreateFile(name);
+    writer_.CreateFileInZip(p.c_str());
+  }
+
+  void HierarchicalZipWriter::CreateDirectory(const char* name)
+  {
+    indexer_.CreateDirectory(name);
+  }
+
+  void HierarchicalZipWriter::CloseDirectory()
+  {
+    indexer_.CloseDirectory();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Core/Compression/HierarchicalZipWriter.h	Tue Dec 04 17:27:23 2012 +0100
@@ -0,0 +1,121 @@
+/**
+ * Orthanc - A Lightweight, RESTful DICOM Store
+ * Copyright (C) 2012 Medical Physics Department, CHU of Liege,
+ * Belgium
+ *
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * In addition, as a special exception, the copyright holders of this
+ * program give permission to link the code of its release with the
+ * OpenSSL project's "OpenSSL" library (or with modified versions of it
+ * that use the same license as the "OpenSSL" library), and distribute
+ * the linked executables. You must obey the GNU General Public License
+ * in all respects for all of the code used other than "OpenSSL". If you
+ * modify file(s) with this exception, you may extend this exception to
+ * your version of the file(s), but you are not obligated to do so. If
+ * you do not wish to do so, delete this exception statement from your
+ * version. If you delete this exception statement from all source files
+ * in the program, then also delete it here.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ **/
+
+
+#pragma once
+
+#include "ZipWriter.h"
+
+#include <map>
+#include <list>
+#include <boost/lexical_cast.hpp>
+
+namespace Orthanc
+{
+  class HierarchicalZipWriter
+  {
+    FRIEND_TEST(HierarchicalZipWriter, Index);
+    FRIEND_TEST(HierarchicalZipWriter, Filenames);
+
+  private:
+    class Index
+    {
+    private:
+      struct Directory
+      {
+        typedef std::map<std::string, unsigned int>  Content;
+
+        std::string name_;
+        Content  content_;
+      };
+
+      typedef std::list<Directory*> Stack;
+  
+      Stack stack_;
+
+      std::string GetCurrentDirectoryPath() const;
+
+      std::string EnsureUniqueFilename(const char* filename);
+
+    public:
+      Index();
+
+      ~Index();
+
+      bool IsRoot() const
+      {
+        return stack_.size() == 1;
+      }
+
+      std::string CreateFile(const char* name);
+
+      void CreateDirectory(const char* name);
+
+      void CloseDirectory();
+
+      static std::string KeepAlphanumeric(const std::string& source);
+    };
+
+    Index indexer_;
+    ZipWriter writer_;
+
+  public:
+    HierarchicalZipWriter(const char* path);
+
+    ~HierarchicalZipWriter();
+
+    void SetCompressionLevel(uint8_t level)
+    {
+      writer_.SetCompressionLevel(level);
+    }
+
+    uint8_t GetCompressionLevel() const
+    {
+      return writer_.GetCompressionLevel();
+    }
+
+    void CreateFile(const char* name);
+
+    void CreateDirectory(const char* name);
+
+    void CloseDirectory();
+
+    void Write(const char* data, size_t length)
+    {
+      writer_.Write(data, length);
+    }
+
+    void Write(const std::string& data)
+    {
+      writer_.Write(data);
+    }
+  };
+}
--- a/Core/Compression/ZipWriter.h	Fri Nov 30 17:09:52 2012 +0100
+++ b/Core/Compression/ZipWriter.h	Tue Dec 04 17:27:23 2012 +0100
@@ -35,6 +35,7 @@
 #include <stdint.h>
 #include <string>
 #include <boost/shared_ptr.hpp>
+#include <gtest/gtest_prod.h>
 
 namespace Orthanc
 {
--- a/Core/Toolbox.cpp	Fri Nov 30 17:09:52 2012 +0100
+++ b/Core/Toolbox.cpp	Tue Dec 04 17:27:23 2012 +0100
@@ -566,5 +566,30 @@
     return boost::posix_time::to_iso_string(now);
   }
 
+  std::string Toolbox::StripSpaces(const std::string& source)
+  {
+    size_t first = 0;
 
+    while (first < source.length() &&
+           isspace(source[first]))
+    {
+      first++;
+    }
+
+    if (first == source.length())
+    {
+      // String containing only spaces
+      return "";
+    }
+
+    size_t last = source.length();
+    while (last > first &&
+           isspace(source[last - 1]))
+    {
+      last--;
+    }          
+    
+    assert(first <= last);
+    return source.substr(first, last - first);
+  }
 }
--- a/Core/Toolbox.h	Fri Nov 30 17:09:52 2012 +0100
+++ b/Core/Toolbox.h	Tue Dec 04 17:27:23 2012 +0100
@@ -87,6 +87,8 @@
 
     std::string ConvertToAscii(const std::string& source);
 
+    std::string StripSpaces(const std::string& source);
+
     std::string GetNowIsoString();
   }
 }
--- a/UnitTests/Zip.cpp	Fri Nov 30 17:09:52 2012 +0100
+++ b/UnitTests/Zip.cpp	Tue Dec 04 17:27:23 2012 +0100
@@ -2,8 +2,12 @@
 
 #include "../Core/OrthancException.h"
 #include "../Core/Compression/ZipWriter.h"
+#include "../Core/Compression/HierarchicalZipWriter.h"
+#include "../Core/Toolbox.h"
 
 
+using namespace Orthanc;
+
 TEST(ZipWriter, Basic)
 {
   Orthanc::ZipWriter w;
@@ -22,3 +26,95 @@
   w.Open();
   ASSERT_THROW(w.Write("hello world"), Orthanc::OrthancException);
 }
+
+
+
+
+
+namespace Orthanc
+{
+  // The namespace is necessary
+  // http://code.google.com/p/googletest/wiki/AdvancedGuide#Private_Class_Members
+
+  TEST(HierarchicalZipWriter, Index)
+  {
+    HierarchicalZipWriter::Index i;
+    ASSERT_EQ("hello", i.CreateFile("hello"));
+    ASSERT_EQ("hello-2", i.CreateFile("hello"));
+    ASSERT_EQ("coucou", i.CreateFile("coucou"));
+    ASSERT_EQ("hello-3", i.CreateFile("hello"));
+
+    i.CreateDirectory("coucou");
+
+    ASSERT_EQ("coucou-2/world", i.CreateFile("world"));
+    ASSERT_EQ("coucou-2/world-2", i.CreateFile("world"));
+
+    i.CreateDirectory("world");
+  
+    ASSERT_EQ("coucou-2/world-3/hello", i.CreateFile("hello"));
+    ASSERT_EQ("coucou-2/world-3/hello-2", i.CreateFile("hello"));
+
+    i.CloseDirectory();
+
+    ASSERT_EQ("coucou-2/world-4", i.CreateFile("world"));
+
+    i.CloseDirectory();
+
+    ASSERT_EQ("coucou-3", i.CreateFile("coucou"));
+
+    ASSERT_THROW(i.CloseDirectory(), OrthancException);
+  }
+
+
+  TEST(HierarchicalZipWriter, Filenames)
+  {
+    ASSERT_EQ("trE hell", HierarchicalZipWriter::Index::KeepAlphanumeric("    ÊtrE hellô  "));
+    ASSERT_EQ("Hello world", HierarchicalZipWriter::Index::KeepAlphanumeric("    Hel^^lo  \t  <world>  "));
+  }
+}
+
+
+TEST(HierarchicalZipWriter, Basic)
+{
+  static const std::string SPACES = "                             ";
+
+  HierarchicalZipWriter w("hello2.zip");
+
+  w.SetCompressionLevel(0);
+
+  // Inside "/"
+  w.CreateFile("hello");
+  w.Write(SPACES + "hello\n");
+  w.CreateFile("hello");
+  w.Write(SPACES + "hello-2\n");
+  w.CreateDirectory("hello");
+
+  // Inside "/hello-3"
+  w.CreateFile("hello");
+  w.Write(SPACES + "hello\n");
+  w.CreateDirectory("hello");
+
+  w.SetCompressionLevel(9);
+
+  // Inside "/hello-3/hello-2"
+  w.CreateFile("hello");
+  w.Write(SPACES + "hello\n");
+  w.CreateFile("hello");
+  w.Write(SPACES + "hello-2\n");
+  w.CloseDirectory();
+
+  // Inside "/hello-3"
+  w.CreateFile("hello");
+  w.Write(SPACES + "hello-3\n");
+
+  /**
+
+     TO CHECK THE CONTENT OF THE "hello2.zip" FILE:
+
+     # unzip -v hello2.zip 
+
+     => There must be 6 files. The first 3 files must have a negative
+     compression ratio.
+
+  **/
+}
--- a/UnitTests/main.cpp	Fri Nov 30 17:09:52 2012 +0100
+++ b/UnitTests/main.cpp	Tue Dec 04 17:27:23 2012 +0100
@@ -245,6 +245,13 @@
   printf("[%s]\n", Toolbox::GetDirectoryOfExecutable().c_str());
 }
 
+TEST(Toolbox, StripSpaces)
+{
+  ASSERT_EQ("", Toolbox::StripSpaces("       \t  \r   \n  "));
+  ASSERT_EQ("coucou", Toolbox::StripSpaces("    coucou   \t  \r   \n  "));
+  ASSERT_EQ("cou   cou", Toolbox::StripSpaces("    cou   cou    \n  "));
+  ASSERT_EQ("c", Toolbox::StripSpaces("    \n\t c\r    \n  "));
+}
 
 
 #include <glog/logging.h>