Mercurial > hg > orthanc
comparison Resources/EncodingTests.py @ 1090:e494ceb8d763
support more encodings
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Tue, 05 Aug 2014 12:04:23 +0200 |
parents | 6fd4434c1bcf |
children | a66224eec125 |
comparison
equal
deleted
inserted
replaced
1089:5ea0b56e850d | 1090:e494ceb8d763 |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
3 | 3 |
4 source = u'éäöòДΘĝדصķћ' | 4 source = u'éäöòДΘĝדصķћ๛ネİ' |
5 | 5 |
6 encodings = { | 6 encodings = { |
7 'UTF-8' : 'Utf8', | 7 'UTF-8' : 'Utf8', |
8 'ASCII' : 'Ascii', | 8 'ASCII' : 'Ascii', |
9 'ISO-8859-1' : 'Latin1', | 9 'ISO-8859-1' : 'Latin1', |
13 'ISO-8859-9' : 'Latin5', | 13 'ISO-8859-9' : 'Latin5', |
14 'ISO-8859-5' : 'Cyrillic', | 14 'ISO-8859-5' : 'Cyrillic', |
15 'ISO-8859-6' : 'Arabic', | 15 'ISO-8859-6' : 'Arabic', |
16 'ISO-8859-7' : 'Greek', | 16 'ISO-8859-7' : 'Greek', |
17 'ISO-8859-8' : 'Hebrew', | 17 'ISO-8859-8' : 'Hebrew', |
18 'TIS-620' : 'Thai', | |
19 'SHIFT-JIS' : 'Japanese', | |
20 #'GB18030' : 'Chinese', | |
18 } | 21 } |
22 | |
23 #from encodings.aliases import aliases | |
24 #for a, b in aliases.iteritems(): | |
25 # print '%s : %s' % (a, b) | |
26 | |
19 | 27 |
20 # "63" corresponds to "?" | 28 # "63" corresponds to "?" |
21 l = [] | 29 l = [] |
22 encoded = [] | 30 encoded = [] |
23 expected = [] | 31 expected = [] |
28 result += '\\x%02x' % byte | 36 result += '\\x%02x' % byte |
29 return '"%s"' % result | 37 return '"%s"' % result |
30 | 38 |
31 | 39 |
32 for encoding, orthancEnumeration in encodings.iteritems(): | 40 for encoding, orthancEnumeration in encodings.iteritems(): |
33 l.append('Orthanc::Encoding_%s' % orthancEnumeration) | 41 l.append('::Orthanc::Encoding_%s' % orthancEnumeration) |
34 s = source.encode(encoding, 'replace') | 42 s = source.encode(encoding, 'ignore') |
35 encoded.append(ToArray(s)) | 43 encoded.append(ToArray(s)) |
36 expected.append(ToArray(s.decode(encoding).encode('utf-8'))) | 44 expected.append(ToArray(s.decode(encoding).encode('utf-8'))) |
37 | 45 |
38 print 'static const unsigned int testEncodingsCount = %d;' % len(encodings) | 46 |
39 print 'static const Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) | 47 # https://en.wikipedia.org/wiki/GB_18030#Technical_details |
40 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(encodings), ',\n '.join(encoded)) | 48 l.append('::Orthanc::Encoding_Chinese') |
41 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(encodings), ',\n '.join(expected)) | 49 expected.append(ToArray('Þßàáâã')) |
50 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"') | |
51 | |
52 | |
53 if True: | |
54 print 'static const unsigned int testEncodingsCount = %d;' % len(l) | |
55 print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) | |
56 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded)) | |
57 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected)) | |
58 else: | |
59 for i in range(len(expected)): | |
60 print expected[i] | |
61 #print '%s: %s' % (expected[i], l[i]) |