Mercurial > hg > orthanc
annotate Resources/EncodingTests.py @ 1110:becde5351e47
preparing to update mongoose
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Mon, 01 Sep 2014 11:11:00 +0200 |
parents | a66224eec125 |
children | 60cc0ee61edb |
rev | line source |
---|---|
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
1 #!/usr/bin/python |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
3 |
1091 | 4 source = u'TestéäöòДΘĝדصķћ๛ネİ' |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
5 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
6 encodings = { |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
7 'UTF-8' : 'Utf8', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
8 'ASCII' : 'Ascii', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
9 'ISO-8859-1' : 'Latin1', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
10 'ISO-8859-2' : 'Latin2', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
11 'ISO-8859-3' : 'Latin3', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
12 'ISO-8859-4' : 'Latin4', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
13 'ISO-8859-9' : 'Latin5', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
14 'ISO-8859-5' : 'Cyrillic', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
15 'ISO-8859-6' : 'Arabic', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
16 'ISO-8859-7' : 'Greek', |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
17 'ISO-8859-8' : 'Hebrew', |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
18 'TIS-620' : 'Thai', |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
19 'SHIFT-JIS' : 'Japanese', |
1091 | 20 #'GB18030' : 'Chinese', # Done manually below (*) |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
21 } |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
22 |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
23 #from encodings.aliases import aliases |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
24 #for a, b in aliases.iteritems(): |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
25 # print '%s : %s' % (a, b) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
26 |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
27 |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
28 # "63" corresponds to "?" |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
29 l = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
30 encoded = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
31 expected = [] |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
32 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
33 def ToArray(source): |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
34 result = '' |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
35 for byte in bytearray(source): |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
36 result += '\\x%02x' % byte |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
37 return '"%s"' % result |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
38 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
39 |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
40 for encoding, orthancEnumeration in encodings.iteritems(): |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
41 l.append('::Orthanc::Encoding_%s' % orthancEnumeration) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
42 s = source.encode(encoding, 'ignore') |
1088
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
43 encoded.append(ToArray(s)) |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
44 expected.append(ToArray(s.decode(encoding).encode('utf-8'))) |
6fd4434c1bcf
unit tests for encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
diff
changeset
|
45 |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
46 |
1091 | 47 # https://en.wikipedia.org/wiki/GB_18030#Technical_details (*) |
1090
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
48 l.append('::Orthanc::Encoding_Chinese') |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
49 expected.append(ToArray('Þßàáâã')) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
50 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"') |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
51 |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
52 |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
53 if True: |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
54 print 'static const unsigned int testEncodingsCount = %d;' % len(l) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
55 print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
56 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
57 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected)) |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
58 else: |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
59 for i in range(len(expected)): |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
60 print expected[i] |
e494ceb8d763
support more encodings
Sebastien Jodogne <s.jodogne@gmail.com>
parents:
1088
diff
changeset
|
61 #print '%s: %s' % (expected[i], l[i]) |