Mercurial > hg > orthanc
annotate Resources/EncodingTests.py @ 3856:dd0fcbf6a791
back to mainline
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Tue, 21 Apr 2020 14:45:30 +0200 |
parents | 55c900a5b0e4 |
children |
rev | line source |
---|---|
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/python |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
3 |
1091 | 4 source = u'TestéäöòДΘĝדصķћ๛ネİ' |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
5 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
6 encodings = { |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
7 'UTF-8' : 'Utf8', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
8 'ASCII' : 'Ascii', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
9 'ISO-8859-1' : 'Latin1', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
10 'ISO-8859-2' : 'Latin2', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
11 'ISO-8859-3' : 'Latin3', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
12 'ISO-8859-4' : 'Latin4', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
13 'ISO-8859-9' : 'Latin5', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
14 'ISO-8859-5' : 'Cyrillic', |
1347 | 15 'WINDOWS-1251' : 'Windows1251', |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
16 'ISO-8859-6' : 'Arabic', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
17 'ISO-8859-7' : 'Greek', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
18 'ISO-8859-8' : 'Hebrew', |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
19 'TIS-620' : 'Thai', |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
20 'SHIFT-JIS' : 'Japanese', |
1091 | 21 #'GB18030' : 'Chinese', # Done manually below (*) |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
22 } |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
23 |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
24 #from encodings.aliases import aliases |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
25 #for a, b in aliases.iteritems(): |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
26 # print '%s : %s' % (a, b) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
27 |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
28 |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
29 # "63" corresponds to "?" |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
30 l = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
31 encoded = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
32 expected = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
33 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
34 def ToArray(source): |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
35 result = '' |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
36 for byte in bytearray(source): |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
37 result += '\\x%02x' % byte |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
38 return '"%s"' % result |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
39 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
40 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
41 for encoding, orthancEnumeration in encodings.iteritems(): |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
42 l.append('::Orthanc::Encoding_%s' % orthancEnumeration) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
43 s = source.encode(encoding, 'ignore') |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
44 encoded.append(ToArray(s)) |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
45 expected.append(ToArray(s.decode(encoding).encode('utf-8'))) |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
46 |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
47 |
1091 | 48 # https://en.wikipedia.org/wiki/GB_18030#Technical_details (*) |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
49 l.append('::Orthanc::Encoding_Chinese') |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
50 expected.append(ToArray('Þßàáâã')) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
51 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"') |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
52 |
1347 | 53 # Issue 32 |
54 # "encoded" is the copy/paste from "dcm2xml +Ca cyrillic Issue32.dcm" | |
55 l.append('::Orthanc::Encoding_Windows1251') | |
56 encoded.append('"\\xd0\\xe5\\xed\\xf2\\xe3\\xe5\\xed\\xee\\xe3\\xf0\\xe0\\xf4\\xe8\\xff"') | |
57 expected.append(ToArray('Рентгенография')) | |
58 l.append('::Orthanc::Encoding_Windows1251') | |
59 encoded.append('"\\xD2\\xE0\\xE7"') | |
60 expected.append(ToArray('Таз')) | |
61 l.append('::Orthanc::Encoding_Windows1251') | |
62 encoded.append('"\\xcf\\xf0\\xff\\xec\\xe0\\xff"') | |
63 expected.append(ToArray('Прямая')) | |
64 | |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
65 |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
66 if True: |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
67 print 'static const unsigned int testEncodingsCount = %d;' % len(l) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
68 print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
69 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
70 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
71 else: |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
72 for i in range(len(expected)): |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
73 print expected[i] |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
74 #print '%s: %s' % (expected[i], l[i]) |
2326
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
75 |
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
76 |
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
77 |
2329 | 78 u = (u'grüßEN SébasTIen %s' % source) |
2326
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
79 print 'static const char *toUpperSource = %s;' % ToArray(u.encode('utf-8')) |
423d3b692bb9
Upgrade to Boost 1.64.0, and Toolbox::ToUpperCaseWithAccents
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1347
diff
changeset
|
80 print 'static const char *toUpperResult = %s;' % ToArray(u.upper().encode('utf-8')) |