changeset 107:3b45473c0a73

replace boost::locale with iconv for debian
author Sebastien Jodogne <s.jodogne@gmail.com>
date Thu, 04 Oct 2012 11:22:20 +0200
parents 332fec038d52
children a6e41de88a53
files Core/DicomFormat/DicomIntegerPixelAccessor.cpp Core/Toolbox.cpp Core/Toolbox.h OrthancServer/FromDcmtkBridge.cpp Resources/CMake/BoostConfiguration.cmake UnitTests/main.cpp
diffstat 6 files changed, 176 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/Core/DicomFormat/DicomIntegerPixelAccessor.cpp	Wed Oct 03 17:53:23 2012 +0200
+++ b/Core/DicomFormat/DicomIntegerPixelAccessor.cpp	Thu Oct 04 11:22:20 2012 +0200
@@ -27,6 +27,7 @@
 #include "../OrthancException.h"
 #include <boost/lexical_cast.hpp>
 #include <limits>
+#include <cassert>
 
 namespace Orthanc
 {
--- a/Core/Toolbox.cpp	Wed Oct 03 17:53:23 2012 +0200
+++ b/Core/Toolbox.cpp	Thu Oct 04 11:22:20 2012 +0200
@@ -26,6 +26,7 @@
 #include <boost/filesystem.hpp>
 #include <boost/filesystem/fstream.hpp>
 #include <algorithm>
+#include <ctype.h>
 
 #if defined(_WIN32)
 #include <windows.h>
@@ -42,9 +43,73 @@
 #include <unistd.h>
 #endif
 
+#if BOOST_HAS_LOCALE == 1
+#include <boost/locale.hpp>
+#else
+#include <iconv.h>
+#endif
+
 #include "../Resources/md5/md5.h"
 #include "../Resources/base64/base64.h"
 
+
+#if BOOST_HAS_LOCALE == 0
+namespace
+{
+  class IconvRabi
+  {
+  private:
+    iconv_t context_;
+
+  public:
+    IconvRabi(const char* tocode, const char* fromcode)
+    {
+      context_ = iconv_open(tocode, fromcode);
+      if (!context_)
+      {
+        throw Orthanc::OrthancException("Unknown code page");
+      }
+    }
+    
+    ~IconvRabi()
+    {
+      iconv_close(context_);
+    }
+
+    std::string Convert(const std::string& source)
+    {
+      if (source.size() == 0)
+      {
+        return "";
+      }
+
+      std::string result;
+      char* sourcePos = const_cast<char*>(&source[0]);
+      size_t sourceLeft = source.size();
+
+      std::vector<char> storage(source.size() + 10);
+      
+      while (sourceLeft > 0)
+      {
+        char* tmp = &storage[0];
+        size_t outputLeft = storage.size();
+        size_t err = iconv(context_, &sourcePos, &sourceLeft, &tmp, &outputLeft);
+        if (err < 0)
+        {
+          throw Orthanc::OrthancException("Bad character in sequence");
+        }
+
+        size_t count = storage.size() - outputLeft;
+        result += std::string(&storage[0], count);
+      }
+
+      return result;
+    }
+  };
+}
+#endif
+
+
 namespace Orthanc
 {
   static bool finish;
@@ -398,4 +463,49 @@
     return p.parent_path().string();
   }
 
+
+  std::string Toolbox::ConvertToUtf8(const std::string& source,
+                                     const char* fromEncoding)
+  {
+#if BOOST_HAS_LOCALE == 1
+    try
+    {
+      return boost::locale::conv::to_utf<char>(source, fromEncoding);
+    }
+    catch (std::runtime_error&)
+    {
+      // Bad input string or bad encoding
+      return ConvertToAscii(source);
+    }
+#else
+    IconvRabi iconv("UTF-8", fromEncoding);
+    try
+    {
+      return iconv.Convert(source);
+    }
+    catch (OrthancException)
+    {
+      return ConvertToAscii(source);
+    }
+#endif
+  }
+
+
+  std::string Toolbox::ConvertToAscii(const std::string& source)
+  {
+    std::string result;
+
+    result.reserve(source.size());
+    for (size_t i = 0; i < source.size(); i++)
+    {
+      if (source[i] < 128 && source[i] >= 0 && !iscntrl(source[i]))
+      {
+        result.push_back(source[i]);
+      }
+    }
+
+    return result;
+  }
+
+
 }
--- a/Core/Toolbox.h	Wed Oct 03 17:53:23 2012 +0200
+++ b/Core/Toolbox.h	Thu Oct 04 11:22:20 2012 +0200
@@ -66,5 +66,10 @@
     std::string GetPathToExecutable();
 
     std::string GetDirectoryOfExecutable();
+
+    std::string ConvertToUtf8(const std::string& source,
+                              const char* fromEncoding);
+
+    std::string ConvertToAscii(const std::string& source);
   }
 }
--- a/OrthancServer/FromDcmtkBridge.cpp	Wed Oct 03 17:53:23 2012 +0200
+++ b/OrthancServer/FromDcmtkBridge.cpp	Thu Oct 04 11:22:20 2012 +0200
@@ -24,6 +24,7 @@
 #include "FromDcmtkBridge.h"
 
 #include "ToDcmtkBridge.h"
+#include "../Core/Toolbox.h"
 #include "../Core/OrthancException.h"
 #include "../Core/PngWriter.h"
 #include "../Core/DicomFormat/DicomString.h"
@@ -32,7 +33,6 @@
 
 #include <limits>
 
-#include <boost/locale.hpp>
 #include <boost/lexical_cast.hpp>
 
 #include <dcmtk/dcmdata/dcdicent.h>
@@ -88,17 +88,7 @@
           c != NULL)
       {
         std::string s(c);
-        std::string utf8;
-        try
-        {
-          utf8 = boost::locale::conv::to_utf<char>(s, "ISO-8859-1"); // TODO Parameter?
-        }
-        catch (std::runtime_error&)
-        {
-          // Bad input string or bad encoding
-          utf8 = s;
-        }
-
+        std::string utf8 = Toolbox::ConvertToUtf8(s, "ISO-8859-1"); // TODO Parameter?
         return new DicomString(utf8);
       }
       else
--- a/Resources/CMake/BoostConfiguration.cmake	Wed Oct 03 17:53:23 2012 +0200
+++ b/Resources/CMake/BoostConfiguration.cmake	Thu Oct 04 11:22:20 2012 +0200
@@ -1,23 +1,31 @@
 if (${STATIC_BUILD})
   SET(BOOST_STATIC 1)
 else()
+  include(FindBoost)
+
+  SET(BOOST_STATIC 0)
+  set(Boost_DEBUG 1)
+  #set(Boost_USE_STATIC_LIBS ON)
+
   find_package(Boost
-    COMPONENTS filesystem thread system locale)
+    COMPONENTS filesystem thread system)
+
+  if (NOT Boost_FOUND)
+    message(FATAL_ERROR "Unable to locate Boost on this system")
+  endif()
 
-  if (${Boost_VERSION} LESS 104800)
-    # boost::locale is only available from 1.48.00
-    message("Too old version of Boost (${Boost_LIB_VERSION}): Building the static version")
-    SET(BOOST_STATIC 1)
-  else()
-    SET(BOOST_STATIC 0)
+  #if (${Boost_VERSION} LESS 104800)
+  # boost::locale is only available from 1.48.00
+  #message("Too old version of Boost (${Boost_LIB_VERSION}): Building the static version")
+  #  SET(BOOST_STATIC 1)
+  #endif()
 
-    add_definitions(
-      -DBOOST_FILESYSTEM_VERSION=3
-      )
+  #add_definitions(
+  #  -DBOOST_FILESYSTEM_VERSION=1
+  #  )
 
-    include_directories(${Boost_INCLUDE_DIRS})
-    link_libraries(${Boost_LIBRARIES})
-  endif()
+  include_directories(${Boost_INCLUDE_DIRS})
+  link_libraries(${Boost_LIBRARIES})
 endif()
 
 
@@ -71,6 +79,7 @@
     -DBOOST_REGEX_NO_LIB
     -DBOOST_SYSTEM_NO_LIB
     -DBOOST_LOCALE_NO_LIB
+    -DBOOST_HAS_LOCALE=1
     )
 
   if (${CMAKE_COMPILER_IS_GNUCXX})
@@ -82,4 +91,8 @@
     )
 
   source_group(ThirdParty\\Boost REGULAR_EXPRESSION ${BOOST_SOURCES_DIR}/.*)
+else()
+  add_definitions(
+    -DBOOST_HAS_LOCALE=0
+    )
 endif()
--- a/UnitTests/main.cpp	Wed Oct 03 17:53:23 2012 +0200
+++ b/UnitTests/main.cpp	Thu Oct 04 11:22:20 2012 +0200
@@ -288,6 +288,38 @@
   LOG(INFO) << "I say hello";
 }
 
+TEST(Toolbox, ConvertFromLatin1)
+{
+  // This is a Latin-1 test string
+  const unsigned char data[10] = { 0xe0, 0xe9, 0xea, 0xe7, 0x26, 0xc6, 0x61, 0x62, 0x63, 0x00 };
+  
+  /*FILE* f = fopen("/tmp/tutu", "w");
+  fwrite(&data[0], 9, 1, f);
+  fclose(f);*/
+
+  std::string s((char*) &data[0], 10);
+  ASSERT_EQ("&abc", Toolbox::ConvertToAscii(s));
+
+  // Open in Emacs, then save with UTF-8 encoding, then "hexdump -C"
+  std::string utf8 = Toolbox::ConvertToUtf8(s, "ISO-8859-1");
+  ASSERT_EQ(15, utf8.size());
+  ASSERT_EQ(0xc3, static_cast<unsigned char>(utf8[0]));
+  ASSERT_EQ(0xa0, static_cast<unsigned char>(utf8[1]));
+  ASSERT_EQ(0xc3, static_cast<unsigned char>(utf8[2]));
+  ASSERT_EQ(0xa9, static_cast<unsigned char>(utf8[3]));
+  ASSERT_EQ(0xc3, static_cast<unsigned char>(utf8[4]));
+  ASSERT_EQ(0xaa, static_cast<unsigned char>(utf8[5]));
+  ASSERT_EQ(0xc3, static_cast<unsigned char>(utf8[6]));
+  ASSERT_EQ(0xa7, static_cast<unsigned char>(utf8[7]));
+  ASSERT_EQ(0x26, static_cast<unsigned char>(utf8[8]));
+  ASSERT_EQ(0xc3, static_cast<unsigned char>(utf8[9]));
+  ASSERT_EQ(0x86, static_cast<unsigned char>(utf8[10]));
+  ASSERT_EQ(0x61, static_cast<unsigned char>(utf8[11]));
+  ASSERT_EQ(0x62, static_cast<unsigned char>(utf8[12]));
+  ASSERT_EQ(0x63, static_cast<unsigned char>(utf8[13]));
+  ASSERT_EQ(0x00, static_cast<unsigned char>(utf8[14]));  // Null-terminated string
+}
+
 
 int main(int argc, char **argv)
 {