diff OrthancServer/FromDcmtkBridge.cpp @ 957:63973b76a51f

detection of encoding
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 25 Jun 2014 21:21:18 +0200
parents 2fd5a163776d
children 2f76b92addd4
line wrap: on
line diff
--- a/OrthancServer/FromDcmtkBridge.cpp	Wed Jun 25 16:08:47 2014 +0200
+++ b/OrthancServer/FromDcmtkBridge.cpp	Wed Jun 25 21:21:18 2014 +0200
@@ -61,6 +61,7 @@
 #include <dcmtk/dcmdata/dcistrmb.h>
 #include <dcmtk/dcmdata/dcuid.h>
 #include <dcmtk/dcmdata/dcmetinf.h>
+#include <dcmtk/dcmdata/dcdeftag.h>
 
 #include <dcmtk/dcmdata/dcvrae.h>
 #include <dcmtk/dcmdata/dcvras.h>
@@ -118,8 +119,34 @@
 
   Encoding FromDcmtkBridge::DetectEncoding(DcmDataset& dataset)
   {
-    // TODO Implement this!
-    return Encoding_Latin1;
+    // By default, assume UTF-8 encoding (as in dcm2xml.cc)
+    Encoding encoding = Encoding_Utf8;
+
+    OFString tmp;
+    if (dataset.findAndGetOFString(DCM_SpecificCharacterSet, tmp).good())
+    {
+      std::string characterSet = Toolbox::StripSpaces(std::string(tmp.c_str()));
+
+      // TODO Add more encodings
+
+      if (characterSet == "ISO_IR 6" ||
+          characterSet == "ISO_IR 192")
+      {
+        encoding = Encoding_Utf8;
+      }
+      else if (characterSet == "ISO_IR 100")
+      {
+        encoding = Encoding_Latin1;
+      }
+      else if (!characterSet.empty())
+      {
+        LOG(WARNING) << "Value of Specific Character Set (0008,0005) is not supported: " << characterSet;
+        // Fallback to ASCII (remove all special characters)
+        encoding = Encoding_Ascii;
+      }
+    }
+
+    return encoding;
   }