Mercurial > hg > orthanc
diff OrthancFramework/Resources/CodeGeneration/EncodingTests.py @ 4044:d25f4c0fa160 framework
splitting code into OrthancFramework and OrthancServer
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Wed, 10 Jun 2020 20:30:34 +0200 |
parents | Resources/EncodingTests.py@55c900a5b0e4 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrthancFramework/Resources/CodeGeneration/EncodingTests.py Wed Jun 10 20:30:34 2020 +0200 @@ -0,0 +1,80 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +source = u'TestéäöòДΘĝדصķћ๛ネİ' + +encodings = { + 'UTF-8' : 'Utf8', + 'ASCII' : 'Ascii', + 'ISO-8859-1' : 'Latin1', + 'ISO-8859-2' : 'Latin2', + 'ISO-8859-3' : 'Latin3', + 'ISO-8859-4' : 'Latin4', + 'ISO-8859-9' : 'Latin5', + 'ISO-8859-5' : 'Cyrillic', + 'WINDOWS-1251' : 'Windows1251', + 'ISO-8859-6' : 'Arabic', + 'ISO-8859-7' : 'Greek', + 'ISO-8859-8' : 'Hebrew', + 'TIS-620' : 'Thai', + 'SHIFT-JIS' : 'Japanese', + #'GB18030' : 'Chinese', # Done manually below (*) +} + +#from encodings.aliases import aliases +#for a, b in aliases.iteritems(): +# print '%s : %s' % (a, b) + + +# "63" corresponds to "?" +l = [] +encoded = [] +expected = [] + +def ToArray(source): + result = '' + for byte in bytearray(source): + result += '\\x%02x' % byte + return '"%s"' % result + + +for encoding, orthancEnumeration in encodings.iteritems(): + l.append('::Orthanc::Encoding_%s' % orthancEnumeration) + s = source.encode(encoding, 'ignore') + encoded.append(ToArray(s)) + expected.append(ToArray(s.decode(encoding).encode('utf-8'))) + + +# https://en.wikipedia.org/wiki/GB_18030#Technical_details (*) +l.append('::Orthanc::Encoding_Chinese') +expected.append(ToArray('Þßàáâã')) +encoded.append('"\\x81\\x30\\x89\\x37\\x81\\x30\\x89\\x38\\xA8\\xA4\\xA8\\xA2\\x81\\x30\\x89\\x39\\x81\\x30\\x8A\\x30"') + +# Issue 32 +# "encoded" is the copy/paste from "dcm2xml +Ca cyrillic Issue32.dcm" +l.append('::Orthanc::Encoding_Windows1251') +encoded.append('"\\xd0\\xe5\\xed\\xf2\\xe3\\xe5\\xed\\xee\\xe3\\xf0\\xe0\\xf4\\xe8\\xff"') +expected.append(ToArray('Рентгенография')) +l.append('::Orthanc::Encoding_Windows1251') +encoded.append('"\\xD2\\xE0\\xE7"') +expected.append(ToArray('Таз')) +l.append('::Orthanc::Encoding_Windows1251') +encoded.append('"\\xcf\\xf0\\xff\\xec\\xe0\\xff"') +expected.append(ToArray('Прямая')) + + +if True: + print 'static const unsigned int testEncodingsCount = %d;' % len(l) + print 'static const ::Orthanc::Encoding testEncodings[] = {\n %s\n};' % (',\n '.join(l)) + print 'static const char *testEncodingsEncoded[%d] = {\n %s\n};' % (len(l), ',\n '.join(encoded)) + print 'static const char *testEncodingsExpected[%d] = {\n %s\n};' % (len(l), ',\n '.join(expected)) +else: + for i in range(len(expected)): + print expected[i] + #print '%s: %s' % (expected[i], l[i]) + + + +u = (u'grüßEN SébasTIen %s' % source) +print 'static const char *toUpperSource = %s;' % ToArray(u.encode('utf-8')) +print 'static const char *toUpperResult = %s;' % ToArray(u.upper().encode('utf-8'))