# HG changeset patch # User Sebastien Jodogne # Date 1440084582 -7200 # Node ID ad1e127b4ed525c7ce9c283b2e629d20fbbcab70 # Parent b8dc2f855a83bdc4c388f4faf32a98b925035c7e fix in encodings diff -r b8dc2f855a83 -r ad1e127b4ed5 Core/Toolbox.cpp --- a/Core/Toolbox.cpp Thu Aug 20 17:05:05 2015 +0200 +++ b/Core/Toolbox.cpp Thu Aug 20 17:29:42 2015 +0200 @@ -566,77 +566,89 @@ } - std::string Toolbox::ConvertToUtf8(const std::string& source, - const Encoding sourceEncoding) + static const char* GetBoostLocaleEncoding(const Encoding sourceEncoding) { - const char* encoding; - - - // http://bradleyross.users.sourceforge.net/docs/dicom/doc/src-html/org/dcm4che2/data/SpecificCharacterSet.html switch (sourceEncoding) { case Encoding_Utf8: - // Already in UTF-8: No conversion is required - return source; + return "UTF-8"; case Encoding_Ascii: - return ConvertToAscii(source); + return "ASCII"; case Encoding_Latin1: - encoding = "ISO-8859-1"; + return "ISO-8859-1"; break; case Encoding_Latin2: - encoding = "ISO-8859-2"; + return "ISO-8859-2"; break; case Encoding_Latin3: - encoding = "ISO-8859-3"; + return "ISO-8859-3"; break; case Encoding_Latin4: - encoding = "ISO-8859-4"; + return "ISO-8859-4"; break; case Encoding_Latin5: - encoding = "ISO-8859-9"; + return "ISO-8859-9"; break; case Encoding_Cyrillic: - encoding = "ISO-8859-5"; + return "ISO-8859-5"; break; case Encoding_Windows1251: - encoding = "WINDOWS-1251"; + return "WINDOWS-1251"; break; case Encoding_Arabic: - encoding = "ISO-8859-6"; + return "ISO-8859-6"; break; case Encoding_Greek: - encoding = "ISO-8859-7"; + return "ISO-8859-7"; break; case Encoding_Hebrew: - encoding = "ISO-8859-8"; + return "ISO-8859-8"; break; case Encoding_Japanese: - encoding = "SHIFT-JIS"; + return "SHIFT-JIS"; break; case Encoding_Chinese: - encoding = "GB18030"; + return "GB18030"; break; case Encoding_Thai: - encoding = "TIS620.2533-0"; + return "TIS620.2533-0"; break; default: throw OrthancException(ErrorCode_NotImplemented); } + } + + + std::string Toolbox::ConvertToUtf8(const std::string& source, + Encoding sourceEncoding) + { + if (sourceEncoding == Encoding_Utf8) + { + // Already in UTF-8: No conversion is required + return source; + } + + if (sourceEncoding == Encoding_Ascii) + { + return ConvertToAscii(source); + } + + const char* encoding = GetBoostLocaleEncoding(sourceEncoding); try { @@ -650,6 +662,34 @@ } + std::string Toolbox::ConvertFromUtf8(const std::string& source, + Encoding targetEncoding) + { + if (targetEncoding == Encoding_Utf8) + { + // Already in UTF-8: No conversion is required + return source; + } + + if (targetEncoding == Encoding_Ascii) + { + return ConvertToAscii(source); + } + + const char* encoding = GetBoostLocaleEncoding(targetEncoding); + + try + { + return boost::locale::conv::from_utf(source, encoding); + } + catch (std::runtime_error&) + { + // Bad input string or bad encoding + return ConvertToAscii(source); + } + } + + std::string Toolbox::ConvertToAscii(const std::string& source) { std::string result; diff -r b8dc2f855a83 -r ad1e127b4ed5 Core/Toolbox.h --- a/Core/Toolbox.h Thu Aug 20 17:05:05 2015 +0200 +++ b/Core/Toolbox.h Thu Aug 20 17:29:42 2015 +0200 @@ -120,7 +120,10 @@ std::string GetDirectoryOfExecutable(); std::string ConvertToUtf8(const std::string& source, - const Encoding sourceEncoding); + Encoding sourceEncoding); + + std::string ConvertFromUtf8(const std::string& source, + Encoding targetEncoding); std::string ConvertToAscii(const std::string& source); diff -r b8dc2f855a83 -r ad1e127b4ed5 OrthancServer/FromDcmtkBridge.cpp --- a/OrthancServer/FromDcmtkBridge.cpp Thu Aug 20 17:05:05 2015 +0200 +++ b/OrthancServer/FromDcmtkBridge.cpp Thu Aug 20 17:29:42 2015 +0200 @@ -122,7 +122,7 @@ Encoding FromDcmtkBridge::DetectEncoding(DcmDataset& dataset) { // By default, Latin1 encoding is assumed - std::string s = Configuration::GetGlobalStringParameter("DefaultEncoding", ""); + std::string s = Configuration::GetGlobalStringParameter("DefaultEncoding", "Latin1"); Encoding encoding = s.empty() ? Encoding_Latin1 : StringToEncoding(s.c_str()); OFString tmp; diff -r b8dc2f855a83 -r ad1e127b4ed5 OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp --- a/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp Thu Aug 20 17:05:05 2015 +0200 +++ b/OrthancServer/OrthancRestApi/OrthancRestAnonymizeModify.cpp Thu Aug 20 17:29:42 2015 +0200 @@ -37,6 +37,7 @@ #include "../../Core/Uuid.h" #include "../FromDcmtkBridge.h" #include "../ServerContext.h" +#include "../OrthancInitialization.h" namespace Orthanc { @@ -476,6 +477,19 @@ return false; } + std::string tmp; + if (request["Tags"].isMember("SpecificCharacterSet")) + { + tmp = request["Tags"]["SpecificCharacterSet"].asString(); + } + else + { + tmp = Configuration::GetGlobalStringParameter("DefaultEncoding", "Latin1"); + } + + Encoding encoding = StringToEncoding(tmp.c_str()); + dicom.SetEncoding(encoding); + ResourceType parentType = ResourceType_Instance; if (request.isMember("Parent")) @@ -549,7 +563,8 @@ } else if (tag["Type"] == "String") { - dicom.Replace(*it, tag["Value"].asString()); + std::string value = tag["Value"].asString(); + dicom.Replace(*it, Toolbox::ConvertFromUtf8(value, encoding)); } } } @@ -609,7 +624,7 @@ } else { - dicom.Replace(tag, value); + dicom.Replace(tag, Toolbox::ConvertFromUtf8(value, encoding)); } }