comparison Resources/EncodingTests.py @ 1090:e494ceb8d763

support more encodings
author Sebastien Jodogne <s.jodogne@gmail.com>
date Tue, 05 Aug 2014 12:04:23 +0200
parents 6fd4434c1bcf
children a66224eec125
comparison
equal deleted inserted replaced
1089:5ea0b56e850d 1090:e494ceb8d763
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 source = u'éäöòДΘĝדصķћ' 4 source = u'éäöòДΘĝדصķћ๛ネİ'
5 5
6 encodings = { 6 encodings = {
7 'UTF-8' : 'Utf8', 7 'UTF-8' : 'Utf8',
8 'ASCII' : 'Ascii', 8 'ASCII' : 'Ascii',
9 'ISO-8859-1' : 'Latin1', 9 'ISO-8859-1' : 'Latin1',
13 'ISO-8859-9' : 'Latin5', 13 'ISO-8859-9' : 'Latin5',
14 'ISO-8859-5' : 'Cyrillic', 14 'ISO-8859-5' : 'Cyrillic',
15 'ISO-8859-6' : 'Arabic', 15 'ISO-8859-6' : 'Arabic',
16 'ISO-8859-7' : 'Greek', 16 'ISO-8859-7' : 'Greek',
17 'ISO-8859-8' : 'Hebrew', 17 'ISO-8859-8' : 'Hebrew',
18 'TIS-620' : 'Thai',
19 'SHIFT-JIS' : 'Japanese',
20 #'GB18030' : 'Chinese',
18 } 21 }
22
23 #from encodings.aliases import aliases
24 #for a, b in aliases.iteritems():
25 # print '%s : %s' % (a, b)
26
19 27
20 # "63" corresponds to "?" 28 # "63" corresponds to "?"
21 l = [] 29 l = []
22 encoded = [] 30 encoded = []
23 expected = [] 31 expected = []
28 result += '\\x%02x' % byte 36 result += '\\x%02x' % byte
29 return '"%s"' % result 37 return '"%s"' % result
30 38
31 39
32 for encoding, orthancEnumeration in encodings.iteritems(): 40 for encoding, orthancEnumeration in encodings.iteritems():
33 l.append('Orthanc::Encoding_%s' % orthancEnumeration) 41 l.append('::Orthanc::Encoding_%s' % orthancEnumeration)
34 s = source.encode(encoding, 'replace') 42 s = source.encode(encoding, 'ignore')
35 encoded.append(ToArray(s)) 43 encoded.append(ToArray(s))
36 expected.append(ToArray(s.decode(encoding).encode('utf-8'))) 44 expected.append(ToArray(s.decode(encoding).encode('utf-8')))
37 45
38 print 'static const unsigned int testEncodingsCount = %d;' % len(encodings) 46
39 print 'static const Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) 47 # https://en.wikipedia.org/wiki/GB_18030#Technical_details
40 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(encodings), ',\n '.join(encoded)) 48 l.append('::Orthanc::Encoding_Chinese')
41 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(encodings), ',\n '.join(expected)) 49 expected.append(ToArray('Þßàáâã'))
50 encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"')
51
52
53 if True:
54 print 'static const unsigned int testEncodingsCount = %d;' % len(l)
55 print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l))
56 print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded))
57 print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected))
58 else:
59 for i in range(len(expected)):
60 print expected[i]
61 #print '%s: %s' % (expected[i], l[i])