Mercurial > hg > orthanc
annotate Resources/EncodingTests.py @ 3103:81b58b549845
back to using 'var' instead of 'let' since let is not supported by many old browsers. All variables declaration have been moved to the top of the function to better show that their scope is the function
author | Alain Mazy <alain@mazy.be> |
---|---|
date | Thu, 10 Jan 2019 10:51:36 +0100 |
parents | 55c900a5b0e4 |
children |
rev | line source |
---|---|
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/python |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
3 |
1091 | 4 source = u'TestéäöòДΘĝדصķћ๛ネİ' |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
5 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
6 encodings = { |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
7 'UTF-8' : 'Utf8', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
8 'ASCII' : 'Ascii', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
9 'ISO-8859-1' : 'Latin1', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
10 'ISO-8859-2' : 'Latin2', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
11 'ISO-8859-3' : 'Latin3', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
12 'ISO-8859-4' : 'Latin4', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
13 'ISO-8859-9' : 'Latin5', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
14 'ISO-8859-5' : 'Cyrillic', |
1347 | 15 'WINDOWS-1251' : 'Windows1251', |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
16 'ISO-8859-6' : 'Arabic', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
17 'ISO-8859-7' : 'Greek', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
18 'ISO-8859-8' : 'Hebrew', |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
19 'TIS-620' : 'Thai', |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
20 'SHIFT-JIS' : 'Japanese', |
1091 | 21 #'GB18030' : 'Chinese', # Done manually below (*) |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
22 } |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
23 |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
24 #from encodings.aliases import aliases |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
25 #for a, b in aliases.iteritems(): |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
26 # print '%s : %s' % (a, b) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
27 |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
28 |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
29 # "63" corresponds to "?" |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
30 l = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
31 encoded = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
32 expected = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
33 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
34 def ToArray(source): |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
35 result = '' |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
36 for byte in bytearray(source): |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
37 result += '\\x%02x' % byte |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
38 return '"%s"' % result |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
39 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
40 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
41 for encoding, orthancEnumeration in encodings.iteritems(): |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
42 l.append('::Orthanc::Encoding_%s' % orthancEnumeration) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
43 s = source.encode(encoding, 'ignore') |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
44 encoded.append(ToArray(s)) |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
45 expected.append(ToArray(s.decode(encoding).encode('utf-8'))) |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
46 |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
47 |
1091 | 48 # https://en.wikipedia.org/wiki/GB_18030#Technical_details (*) |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
49 l.append('::Orthanc::Encoding_Chinese') |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
50 expected.append(ToArray('Þßàáâã')) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
51 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"') |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
52 |
1347 | 53 # Issue 32 |
54 # "encoded" is the copy/paste from "dcm2xml +Ca cyrillic Issue32.dcm" | |
55 l.append('::Orthanc::Encoding_Windows1251') | |
56 encoded.append('"\\xd0\\xe5\\xed\\xf2\\xe3\\xe5\\xed\\xee\\xe3\\xf0\\xe0\\xf4\\xe8\\xff"') | |
57 expected.append(ToArray('Рентгенография')) | |
58 l.append('::Orthanc::Encoding_Windows1251') | |
59 encoded.append('"\\xD2\\xE0\\xE7"') | |
60 expected.append(ToArray('Таз')) | |
61 l.append('::Orthanc::Encoding_Windows1251') | |
62 encoded.append('"\\xcf\\xf0\\xff\\xec\\xe0\\xff"') | |
63 expected.append(ToArray('Прямая')) | |
64 | |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
65 |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
66 if True: |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
67 print 'static const unsigned int testEncodingsCount = %d;' % len(l) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
68 print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
69 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
70 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
71 else: |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
72 for i in range(len(expected)): |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
73 print expected[i] |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
74 #print '%s: %s' % (expected[i], l[i]) |
2326
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
75 |
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
76 |
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
77 |
2329 | 78 u = (u'grüßEN SébasTIen %s' % source) |
2326
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
79 print 'static const char *toUpperSource = %s;' % ToArray(u.encode('utf-8')) |
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
80 print 'static const char *toUpperResult = %s;' % ToArray(u.upper().encode('utf-8')) |