annotate Resources/EncodingTests.py @ 3103:81b58b549845

back to using 'var' instead of 'let' since let is not supported by many old browsers. All variables declaration have been moved to the top of the function to better show that their scope is the function
author Alain Mazy <alain@mazy.be>
date Thu, 10 Jan 2019 10:51:36 +0100
parents 55c900a5b0e4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
1 #!/usr/bin/python
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
2 # -*- coding: utf-8 -*-
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
3
1091
a66224eec125 encoding tests
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1090
diff changeset
4 source = u'TestéäöòДΘĝדصķћ๛ネİ'
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
5
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
6 encodings = {
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
7 'UTF-8' : 'Utf8',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
8 'ASCII' : 'Ascii',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
9 'ISO-8859-1' : 'Latin1',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
10 'ISO-8859-2' : 'Latin2',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
11 'ISO-8859-3' : 'Latin3',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
12 'ISO-8859-4' : 'Latin4',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
13 'ISO-8859-9' : 'Latin5',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
14 'ISO-8859-5' : 'Cyrillic',
1347
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
15 'WINDOWS-1251' : 'Windows1251',
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
16 'ISO-8859-6' : 'Arabic',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
17 'ISO-8859-7' : 'Greek',
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
18 'ISO-8859-8' : 'Hebrew',
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
19 'TIS-620' : 'Thai',
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
20 'SHIFT-JIS' : 'Japanese',
1091
a66224eec125 encoding tests
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1090
diff changeset
21 #'GB18030' : 'Chinese', # Done manually below (*)
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
22 }
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
23
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
24 #from encodings.aliases import aliases
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
25 #for a, b in aliases.iteritems():
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
26 # print '%s : %s' % (a, b)
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
27
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
28
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
29 # "63" corresponds to "?"
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
30 l = []
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
31 encoded = []
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
32 expected = []
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
33
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
34 def ToArray(source):
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
35 result = ''
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
36 for byte in bytearray(source):
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
37 result += '\\x%02x' % byte
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
38 return '"%s"' % result
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
39
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
40
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
41 for encoding, orthancEnumeration in encodings.iteritems():
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
42 l.append('::Orthanc::Encoding_%s' % orthancEnumeration)
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
43 s = source.encode(encoding, 'ignore')
1088
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
44 encoded.append(ToArray(s))
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
45 expected.append(ToArray(s.decode(encoding).encode('utf-8')))
6fd4434c1bcf unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff changeset
46
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
47
1091
a66224eec125 encoding tests
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1090
diff changeset
48 # https://en.wikipedia.org/wiki/GB_18030#Technical_details (*)
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
49 l.append('::Orthanc::Encoding_Chinese')
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
50 expected.append(ToArray('Þßàáâã'))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
51 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"')
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
52
1347
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
53 # Issue 32
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
54 # "encoded" is the copy/paste from "dcm2xml +Ca cyrillic Issue32.dcm"
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
55 l.append('::Orthanc::Encoding_Windows1251')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
56 encoded.append('"\\xd0\\xe5\\xed\\xf2\\xe3\\xe5\\xed\\xee\\xe3\\xf0\\xe0\\xf4\\xe8\\xff"')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
57 expected.append(ToArray('Рентгенография'))
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
58 l.append('::Orthanc::Encoding_Windows1251')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
59 encoded.append('"\\xD2\\xE0\\xE7"')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
60 expected.append(ToArray('Таз'))
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
61 l.append('::Orthanc::Encoding_Windows1251')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
62 encoded.append('"\\xcf\\xf0\\xff\\xec\\xe0\\xff"')
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
63 expected.append(ToArray('Прямая'))
60cc0ee61edb fix issue 32
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1091
diff changeset
64
1090
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
65
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
66 if True:
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
67 print 'static const unsigned int testEncodingsCount = %d;' % len(l)
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
68 print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
69 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
70 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected))
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
71 else:
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
72 for i in range(len(expected)):
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
73 print expected[i]
e494ceb8d763 support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1088
diff changeset
74 #print '%s: %s' % (expected[i], l[i])
2326
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
75
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
76
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
77
2329
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 2328
diff changeset
78 u = (u'grüßEN SébasTIen %s' % source)
2326
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
79 print 'static const char *toUpperSource = %s;' % ToArray(u.encode('utf-8'))
423d3b692bb9 Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents: 1347
diff changeset
80 print 'static const char *toUpperResult = %s;' % ToArray(u.upper().encode('utf-8'))