Mercurial > hg > orthanc
view Resources/EncodingTests.py @ 3103:81b58b549845
back to using 'var' instead of 'let' since let is not supported by many old browsers. All variables declaration have been moved to the top of the function to better show that their scope is the function
author | Alain Mazy <alain@mazy.be> |
---|---|
date | Thu, 10 Jan 2019 10:51:36 +0100 |
parents | 55c900a5b0e4 |
children |
line wrap: on
line source
#!/usr/bin/python # -*- coding: utf-8 -*- source = u'TestéäöòДΘĝדصķћ๛ネİ' encodings = { 'UTF-8' : 'Utf8', 'ASCII' : 'Ascii', 'ISO-8859-1' : 'Latin1', 'ISO-8859-2' : 'Latin2', 'ISO-8859-3' : 'Latin3', 'ISO-8859-4' : 'Latin4', 'ISO-8859-9' : 'Latin5', 'ISO-8859-5' : 'Cyrillic', 'WINDOWS-1251' : 'Windows1251', 'ISO-8859-6' : 'Arabic', 'ISO-8859-7' : 'Greek', 'ISO-8859-8' : 'Hebrew', 'TIS-620' : 'Thai', 'SHIFT-JIS' : 'Japanese', #'GB18030' : 'Chinese', # Done manually below (*) } #from encodings.aliases import aliases #for a, b in aliases.iteritems(): # print '%s : %s' % (a, b) # "63" corresponds to "?" l = [] encoded = [] expected = [] def ToArray(source): result = '' for byte in bytearray(source): result += '\\x%02x' % byte return '"%s"' % result for encoding, orthancEnumeration in encodings.iteritems(): l.append('::Orthanc::Encoding_%s' % orthancEnumeration) s = source.encode(encoding, 'ignore') encoded.append(ToArray(s)) expected.append(ToArray(s.decode(encoding).encode('utf-8'))) # https://en.wikipedia.org/wiki/GB_18030#Technical_details (*) l.append('::Orthanc::Encoding_Chinese') expected.append(ToArray('Þßàáâã')) encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"') # Issue 32 # "encoded" is the copy/paste from "dcm2xml +Ca cyrillic Issue32.dcm" l.append('::Orthanc::Encoding_Windows1251') encoded.append('"\\xd0\\xe5\\xed\\xf2\\xe3\\xe5\\xed\\xee\\xe3\\xf0\\xe0\\xf4\\xe8\\xff"') expected.append(ToArray('Рентгенография')) l.append('::Orthanc::Encoding_Windows1251') encoded.append('"\\xD2\\xE0\\xE7"') expected.append(ToArray('Таз')) l.append('::Orthanc::Encoding_Windows1251') encoded.append('"\\xcf\\xf0\\xff\\xec\\xe0\\xff"') expected.append(ToArray('Прямая')) if True: print 'static const unsigned int testEncodingsCount = %d;' % len(l) print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded)) print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected)) else: for i in range(len(expected)): print expected[i] #print '%s: %s' % (expected[i], l[i]) u = (u'grüßEN SébasTIen %s' % source) print 'static const char *toUpperSource = %s;' % ToArray(u.encode('utf-8')) print 'static const char *toUpperResult = %s;' % ToArray(u.upper().encode('utf-8'))