changeset 1347:60cc0ee61edb

author Sebastien Jodogne <s.jodogne@gmail.com>
date Tue, 21 Apr 2015 17:43:05 +0200
parents 911a1ad5ebe8
children cff17da28916
files Core/Enumerations.cpp Core/Enumerations.h Core/Toolbox.cpp NEWS OrthancServer/ParsedDicomFile.cpp Resources/Configuration.json Resources/EncodingTests.h Resources/EncodingTests.py UnitTestsSources/FromDcmtkTests.cpp
diffstat 9 files changed, 53 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/Core/Enumerations.cpp	Mon Apr 20 17:26:02 2015 +0200
+++ b/Core/Enumerations.cpp	Tue Apr 21 17:43:05 2015 +0200
@@ -289,6 +289,9 @@
       case Encoding_Cyrillic:
         return "Cyrillic";
 
+      case Encoding_Windows1251:
+        return "Windows1251";
+
       case Encoding_Arabic:
         return "Arabic";
 
@@ -410,6 +413,11 @@
       return Encoding_Cyrillic;
     }
 
+    if (s == "WINDOWS1251")
+    {
+      return Encoding_Windows1251;
+    }
+
     if (s == "ARABIC")
     {
       return Encoding_Arabic;
--- a/Core/Enumerations.h	Mon Apr 20 17:26:02 2015 +0200
+++ b/Core/Enumerations.h	Tue Apr 21 17:43:05 2015 +0200
@@ -243,6 +243,7 @@
     Encoding_Latin4,
     Encoding_Latin5,                        // Turkish
     Encoding_Cyrillic,
+    Encoding_Windows1251,                   // Windows-1251 (commonly used for Cyrillic)
     Encoding_Arabic,
     Encoding_Greek,
     Encoding_Hebrew,
--- a/Core/Toolbox.cpp	Mon Apr 20 17:26:02 2015 +0200
+++ b/Core/Toolbox.cpp	Tue Apr 21 17:43:05 2015 +0200
@@ -586,6 +586,10 @@
         encoding = "ISO-8859-5";
         break;
 
+      case Encoding_Windows1251:
+        encoding = "WINDOWS-1251";
+        break;
+
       case Encoding_Arabic:
         encoding = "ISO-8859-6";
         break;
--- a/NEWS	Mon Apr 20 17:26:02 2015 +0200
+++ b/NEWS	Tue Apr 21 17:43:05 2015 +0200
@@ -7,6 +7,7 @@
 * Prevent freeze on C-FIND if no DICOM tag is to be returned
 * Allow replacing PatientID/StudyInstanceUID/SeriesInstanceUID from Lua scripts
 * Fix issue 30 (QR response missing "Query/Retrieve Level" (008,0052))
+* Fix issue 32 (Cyrillic symbols): Introduction of the "Windows1251" encoding
 
 
 Version 0.8.6 (2015/02/12)
--- a/OrthancServer/ParsedDicomFile.cpp	Mon Apr 20 17:26:02 2015 +0200
+++ b/OrthancServer/ParsedDicomFile.cpp	Tue Apr 21 17:43:05 2015 +0200
@@ -1286,6 +1286,11 @@
         s = "ISO_IR 192";
         break;
 
+      case Encoding_Windows1251:
+        // This Cyrillic codepage is not officially supported by the
+        // DICOM standard. Do not set the SpecificCharacterSet tag.
+        return;
+
       case Encoding_Latin1:
         s = "ISO_IR 100";
         break;
--- a/Resources/Configuration.json	Mon Apr 20 17:26:02 2015 +0200
+++ b/Resources/Configuration.json	Tue Apr 21 17:43:05 2015 +0200
@@ -67,8 +67,8 @@
   // The default encoding that is assumed for DICOM files without
   // "SpecificCharacterSet" DICOM tag. The allowed values are "Ascii",
   // "Utf8", "Latin1", "Latin2", "Latin3", "Latin4", "Latin5",
-  // "Cyrillic", "Arabic", "Greek", "Hebrew", "Thai", "Japanese",
-  // and "Chinese".
+  // "Cyrillic", "Windows1251", "Arabic", "Greek", "Hebrew", "Thai",
+  // "Japanese", and "Chinese".
   "DefaultEncoding" : "Latin1",
 
   // The transfer syntaxes that are accepted by Orthanc C-Store SCP
--- a/Resources/EncodingTests.h	Mon Apr 20 17:26:02 2015 +0200
+++ b/Resources/EncodingTests.h	Tue Apr 21 17:43:05 2015 +0200
@@ -1,4 +1,4 @@
-static const unsigned int testEncodingsCount = 14;
+static const unsigned int testEncodingsCount = 18;
 static const ::Orthanc::Encoding testEncodings[] = {
   ::Orthanc::Encoding_Latin5,
   ::Orthanc::Encoding_Hebrew,
@@ -13,9 +13,13 @@
   ::Orthanc::Encoding_Thai,
   ::Orthanc::Encoding_Japanese,
   ::Orthanc::Encoding_Ascii,
-  ::Orthanc::Encoding_Chinese
+  ::Orthanc::Encoding_Windows1251,
+  ::Orthanc::Encoding_Chinese,
+  ::Orthanc::Encoding_Windows1251,
+  ::Orthanc::Encoding_Windows1251,
+  ::Orthanc::Encoding_Windows1251
 };
-static const char *testEncodingsEncoded[14] = {
+static const char *testEncodingsEncoded[18] = {
   "\x54\x65\x73\x74\xe9\xe4\xf6\xf2\xdd",
   "\x54\x65\x73\x74\xe3",
   "\x54\x65\x73\x74\xc8",
@@ -29,9 +33,13 @@
   "\x54\x65\x73\x74\xfb",
   "\x54\x65\x73\x74\x84\x44\x83\xa6\xc8",
   "\x54\x65\x73\x74",
-  "\x81\x30\x89\x37\x81\x30\x89\x38\xA8\xA4\xA8\xA2\x81\x30\x89\x39\x81\x30\x8A\x30"
+  "\x54\x65\x73\x74\xc4\x9e",
+  "\x81\x30\x89\x37\x81\x30\x89\x38\xA8\xA4\xA8\xA2\x81\x30\x89\x39\x81\x30\x8A\x30",
+  "\xd0\xe5\xed\xf2\xe3\xe5\xed\xee\xe3\xf0\xe0\xf4\xe8\xff",
+  "\xD2\xE0\xE7",
+  "\xcf\xf0\xff\xec\xe0\xff"
 };
-static const char *testEncodingsExpected[14] = {
+static const char *testEncodingsExpected[18] = {
   "\x54\x65\x73\x74\xc3\xa9\xc3\xa4\xc3\xb6\xc3\xb2\xc4\xb0",
   "\x54\x65\x73\x74\xd7\x93",
   "\x54\x65\x73\x74\xce\x98",
@@ -45,5 +53,9 @@
   "\x54\x65\x73\x74\xe0\xb9\x9b",
   "\x54\x65\x73\x74\xd0\x94\xce\x98\xef\xbe\x88",
   "\x54\x65\x73\x74",
-  "\xc3\x9e\xc3\x9f\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3"
+  "\x54\x65\x73\x74\xd0\x94\xd1\x9b",
+  "\xc3\x9e\xc3\x9f\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3",
+  "\xd0\xa0\xd0\xb5\xd0\xbd\xd1\x82\xd0\xb3\xd0\xb5\xd0\xbd\xd0\xbe\xd0\xb3\xd1\x80\xd0\xb0\xd1\x84\xd0\xb8\xd1\x8f",
+  "\xd0\xa2\xd0\xb0\xd0\xb7",
+  "\xd0\x9f\xd1\x80\xd1\x8f\xd0\xbc\xd0\xb0\xd1\x8f"
 };
--- a/Resources/EncodingTests.py	Mon Apr 20 17:26:02 2015 +0200
+++ b/Resources/EncodingTests.py	Tue Apr 21 17:43:05 2015 +0200
@@ -12,6 +12,7 @@
     'ISO-8859-4' : 'Latin4',
     'ISO-8859-9' : 'Latin5',
     'ISO-8859-5' : 'Cyrillic',
+    'WINDOWS-1251' : 'Windows1251',
     'ISO-8859-6' : 'Arabic',
     'ISO-8859-7' : 'Greek',
     'ISO-8859-8' : 'Hebrew',
@@ -49,6 +50,18 @@
 expected.append(ToArray('Þßàáâã'))
 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"')
 
+# Issue 32
+# "encoded" is the copy/paste from "dcm2xml +Ca cyrillic Issue32.dcm"
+l.append('::Orthanc::Encoding_Windows1251')
+encoded.append('"\\xd0\\xe5\\xed\\xf2\\xe3\\xe5\\xed\\xee\\xe3\\xf0\\xe0\\xf4\\xe8\\xff"')
+expected.append(ToArray('Рентгенография'))
+l.append('::Orthanc::Encoding_Windows1251')
+encoded.append('"\\xD2\\xE0\\xE7"')
+expected.append(ToArray('Таз'))
+l.append('::Orthanc::Encoding_Windows1251')
+encoded.append('"\\xcf\\xf0\\xff\\xec\\xe0\\xff"')
+expected.append(ToArray('Прямая'))
+
 
 if True:
     print 'static const unsigned int testEncodingsCount = %d;' % len(l)
--- a/UnitTestsSources/FromDcmtkTests.cpp	Mon Apr 20 17:26:02 2015 +0200
+++ b/UnitTestsSources/FromDcmtkTests.cpp	Tue Apr 21 17:43:05 2015 +0200
@@ -272,6 +272,7 @@
       f.SaveToMemoryBuffer(dicom);
     }
 
+    if (testEncodings[i] != Encoding_Windows1251)
     {
       ParsedDicomFile g(dicom);