changeset 1557:ad1e127b4ed5

fix in encodings
author Sebastien Jodogne <s.jodogne@gmail.com>
date Thu, 20 Aug 2015 17:29:42 +0200
parents b8dc2f855a83
children 124de28b32ed
files Core/Toolbox.cpp Core/Toolbox.h OrthancServer/FromDcmtkBridge.cpp OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp
diffstat 4 files changed, 84 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/Core/Toolbox.cpp	Thu Aug 20 17:05:05 2015 +0200
+++ b/Core/Toolbox.cpp	Thu Aug 20 17:29:42 2015 +0200
@@ -566,77 +566,89 @@
   }
 
 
-  std::string Toolbox::ConvertToUtf8(const std::string& source,
-                                     const Encoding sourceEncoding)
+  static const char* GetBoostLocaleEncoding(const Encoding sourceEncoding)
   {
-    const char* encoding;
-
-
-    // http://bradleyross.users.sourceforge.net/docs/dicom/doc/src-html/org/dcm4che2/data/SpecificCharacterSet.html
     switch (sourceEncoding)
     {
       case Encoding_Utf8:
-        // Already in UTF-8: No conversion is required
-        return source;
+        return "UTF-8";
 
       case Encoding_Ascii:
-        return ConvertToAscii(source);
+        return "ASCII";
 
       case Encoding_Latin1:
-        encoding = "ISO-8859-1";
+        return "ISO-8859-1";
         break;
 
       case Encoding_Latin2:
-        encoding = "ISO-8859-2";
+        return "ISO-8859-2";
         break;
 
       case Encoding_Latin3:
-        encoding = "ISO-8859-3";
+        return "ISO-8859-3";
         break;
 
       case Encoding_Latin4:
-        encoding = "ISO-8859-4";
+        return "ISO-8859-4";
         break;
 
       case Encoding_Latin5:
-        encoding = "ISO-8859-9";
+        return "ISO-8859-9";
         break;
 
       case Encoding_Cyrillic:
-        encoding = "ISO-8859-5";
+        return "ISO-8859-5";
         break;
 
       case Encoding_Windows1251:
-        encoding = "WINDOWS-1251";
+        return "WINDOWS-1251";
         break;
 
       case Encoding_Arabic:
-        encoding = "ISO-8859-6";
+        return "ISO-8859-6";
         break;
 
       case Encoding_Greek:
-        encoding = "ISO-8859-7";
+        return "ISO-8859-7";
         break;
 
       case Encoding_Hebrew:
-        encoding = "ISO-8859-8";
+        return "ISO-8859-8";
         break;
         
       case Encoding_Japanese:
-        encoding = "SHIFT-JIS";
+        return "SHIFT-JIS";
         break;
 
       case Encoding_Chinese:
-        encoding = "GB18030";
+        return "GB18030";
         break;
 
       case Encoding_Thai:
-        encoding = "TIS620.2533-0";
+        return "TIS620.2533-0";
         break;
 
       default:
         throw OrthancException(ErrorCode_NotImplemented);
     }
+  }
+
+
+  std::string Toolbox::ConvertToUtf8(const std::string& source,
+                                     Encoding sourceEncoding)
+  {
+    if (sourceEncoding == Encoding_Utf8)
+    {
+      // Already in UTF-8: No conversion is required
+      return source;
+    }
+
+    if (sourceEncoding == Encoding_Ascii)
+    {
+      return ConvertToAscii(source);
+    }
+
+    const char* encoding = GetBoostLocaleEncoding(sourceEncoding);
 
     try
     {
@@ -650,6 +662,34 @@
   }
 
 
+  std::string Toolbox::ConvertFromUtf8(const std::string& source,
+                                       Encoding targetEncoding)
+  {
+    if (targetEncoding == Encoding_Utf8)
+    {
+      // Already in UTF-8: No conversion is required
+      return source;
+    }
+
+    if (targetEncoding == Encoding_Ascii)
+    {
+      return ConvertToAscii(source);
+    }
+
+    const char* encoding = GetBoostLocaleEncoding(targetEncoding);
+
+    try
+    {
+      return boost::locale::conv::from_utf<char>(source, encoding);
+    }
+    catch (std::runtime_error&)
+    {
+      // Bad input string or bad encoding
+      return ConvertToAscii(source);
+    }
+  }
+
+
   std::string Toolbox::ConvertToAscii(const std::string& source)
   {
     std::string result;
--- a/Core/Toolbox.h	Thu Aug 20 17:05:05 2015 +0200
+++ b/Core/Toolbox.h	Thu Aug 20 17:29:42 2015 +0200
@@ -120,7 +120,10 @@
     std::string GetDirectoryOfExecutable();
 
     std::string ConvertToUtf8(const std::string& source,
-                              const Encoding sourceEncoding);
+                              Encoding sourceEncoding);
+
+    std::string ConvertFromUtf8(const std::string& source,
+                                Encoding targetEncoding);
 
     std::string ConvertToAscii(const std::string& source);
 
--- a/OrthancServer/FromDcmtkBridge.cpp	Thu Aug 20 17:05:05 2015 +0200
+++ b/OrthancServer/FromDcmtkBridge.cpp	Thu Aug 20 17:29:42 2015 +0200
@@ -122,7 +122,7 @@
   Encoding FromDcmtkBridge::DetectEncoding(DcmDataset& dataset)
   {
     // By default, Latin1 encoding is assumed
-    std::string s = Configuration::GetGlobalStringParameter("DefaultEncoding", "");
+    std::string s = Configuration::GetGlobalStringParameter("DefaultEncoding", "Latin1");
     Encoding encoding = s.empty() ? Encoding_Latin1 : StringToEncoding(s.c_str());
 
     OFString tmp;
--- a/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp	Thu Aug 20 17:05:05 2015 +0200
+++ b/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp	Thu Aug 20 17:29:42 2015 +0200
@@ -37,6 +37,7 @@
 #include "../../Core/Uuid.h"
 #include "../FromDcmtkBridge.h"
 #include "../ServerContext.h"
+#include "../OrthancInitialization.h"
 
 namespace Orthanc
 {
@@ -476,6 +477,19 @@
       return false;
     }
 
+    std::string tmp;
+    if (request["Tags"].isMember("SpecificCharacterSet"))
+    {
+      tmp = request["Tags"]["SpecificCharacterSet"].asString();
+    }
+    else
+    {
+      tmp = Configuration::GetGlobalStringParameter("DefaultEncoding", "Latin1");
+    }
+
+    Encoding encoding = StringToEncoding(tmp.c_str());
+    dicom.SetEncoding(encoding);
+
     ResourceType parentType = ResourceType_Instance;
 
     if (request.isMember("Parent"))
@@ -549,7 +563,8 @@
           }
           else if (tag["Type"] == "String")
           {
-            dicom.Replace(*it, tag["Value"].asString());
+            std::string value = tag["Value"].asString();
+            dicom.Replace(*it, Toolbox::ConvertFromUtf8(value, encoding));
           }
         }
       }
@@ -609,7 +624,7 @@
       }
       else
       {
-        dicom.Replace(tag, value);
+        dicom.Replace(tag, Toolbox::ConvertFromUtf8(value, encoding));
       }
     }