annotate Resources/EncodingTests.py @ 3200:1f4a2c58e7fa

set encoding to UTF-8, IBinaryFormatter
author Sebastien Jodogne <s.jodogne@gmail.com>
date Wed, 06 Feb 2019 13:51:20 +0100
parents 55c900a5b0e4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
1 #!/usr/bin/python
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
3
1091
a66224eec125 encoding tests
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1090
diff changeset
4 source = u'TestéäöòДΘĝדصķћ๛ネİ'
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
5
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
6 encodings = {
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
7 'UTF-8' : 'Utf8',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
8 'ASCII' : 'Ascii',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
9 'ISO-8859-1' : 'Latin1',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
10 'ISO-8859-2' : 'Latin2',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
11 'ISO-8859-3' : 'Latin3',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
12 'ISO-8859-4' : 'Latin4',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
13 'ISO-8859-9' : 'Latin5',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
14 'ISO-8859-5' : 'Cyrillic',
1347
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
15 'WINDOWS-1251' : 'Windows1251',
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
16 'ISO-8859-6' : 'Arabic',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
17 'ISO-8859-7' : 'Greek',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
18 'ISO-8859-8' : 'Hebrew',
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
19 'TIS-620' : 'Thai',
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
20 'SHIFT-JIS' : 'Japanese',
1091
a66224eec125 encoding tests
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1090
diff changeset
21 #'GB18030' : 'Chinese', # Done manually below (*)
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
22 }
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
23
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
24 #from encodings.aliases import aliases
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
25 #for a, b in aliases.iteritems():
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
26 # print '%s : %s' % (a, b)
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
27
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
28
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
29 # "63" corresponds to "?"
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
30 l = []
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
31 encoded = []
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
32 expected = []
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
33
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
34 def ToArray(source):
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
35 result = ''
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
36 for byte in bytearray(source):
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
37 result += '\\x%02x' % byte
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
38 return '"%s"' % result
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
39
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
40
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
41 for encoding, orthancEnumeration in encodings.iteritems():
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
42 l.append('::Orthanc::Encoding_%s' % orthancEnumeration)
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
43 s = source.encode(encoding, 'ignore')
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
44 encoded.append(ToArray(s))
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
45 expected.append(ToArray(s.decode(encoding).encode('utf-8')))
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
46
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
47
1091
a66224eec125 encoding tests
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1090
diff changeset
48 # https://en.wikipedia.org/wiki/GB_18030#Technical_details (*)
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
49 l.append('::Orthanc::Encoding_Chinese')
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
50 expected.append(ToArray('Þßàáâã'))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
51 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"')
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
52
1347
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
53 # Issue 32
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
54 # "encoded" is the copy/paste from "dcm2xml +Ca cyrillic Issue32.dcm"
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
55 l.append('::Orthanc::Encoding_Windows1251')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
56 encoded.append('"\\xd0\\xe5\\xed\\xf2\\xe3\\xe5\\xed\\xee\\xe3\\xf0\\xe0\\xf4\\xe8\\xff"')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
57 expected.append(ToArray('Рентгенография'))
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
58 l.append('::Orthanc::Encoding_Windows1251')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
59 encoded.append('"\\xD2\\xE0\\xE7"')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
60 expected.append(ToArray('Таз'))
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
61 l.append('::Orthanc::Encoding_Windows1251')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
62 encoded.append('"\\xcf\\xf0\\xff\\xec\\xe0\\xff"')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
63 expected.append(ToArray('Прямая'))
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
64
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
65
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
66 if True:
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
67 print 'static const unsigned int testEncodingsCount = %d;' % len(l)
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
68 print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
69 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
70 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
71 else:
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
72 for i in range(len(expected)):
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
73 print expected[i]
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
74 #print '%s: %s' % (expected[i], l[i])
2326
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
75
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
76
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
77
2329
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 2328
diff changeset
78 u = (u'grüßEN SébasTIen %s' % source)
2326
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
79 print 'static const char *toUpperSource = %s;' % ToArray(u.encode('utf-8'))
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
80 print 'static const char *toUpperResult = %s;' % ToArray(u.upper().encode('utf-8'))