comparison Tests/Tests.py @ 217:a491d5ec370a

test encodings
author Sebastien Jodogne <s.jodogne@gmail.com>
date Thu, 14 Feb 2019 11:32:44 +0100
parents d8fc62207ba3
children 70fba41776be
comparison
equal deleted inserted replaced
216:6c6411d34951 217:a491d5ec370a
4442 4442
4443 Compare('DummyCT.dcm', 'DummyCT.json') 4443 Compare('DummyCT.dcm', 'DummyCT.json')
4444 Compare('MarekLatin2.dcm', 'MarekLatin2.json') 4444 Compare('MarekLatin2.dcm', 'MarekLatin2.json')
4445 Compare('HierarchicalAnonymization/StructuredReports/IM0', 4445 Compare('HierarchicalAnonymization/StructuredReports/IM0',
4446 'HierarchicalAnonymization/StructuredReports/IM0.json') 4446 'HierarchicalAnonymization/StructuredReports/IM0.json')
4447
4448
4449 def test_issue_95_encodings(self):
4450 # https://bitbucket.org/sjodogne/orthanc/issues/95/server-cant-support-chinese
4451 # Check out image: "../Database/Encodings/DavidClunie/charsettests.screenshot.png"
4452
4453 # Very useful tool: "file2" from package "file-kanji"
4454
4455 def GetPatientName(dicom):
4456 i = UploadInstance(_REMOTE, dicom) ['ID']
4457 j = DoGet(_REMOTE, '/instances/%s/tags?simplify' % i)
4458 return j['PatientName']
4459
4460 def ComparePatientName(name, dicom):
4461 self.assertEqual(name, GetPatientName(dicom))
4462
4463 # gdcmraw -t 10,10 -i SCSFREN -o /tmp/tag && uconv -f ISO-IR-100 -t UTF-8 /tmp/tag && echo
4464 ComparePatientName(u'Buc^Jérôme', 'Encodings/DavidClunie/SCSFREN')
4465
4466 # gdcmraw -t 10,10 -i SCSI2 -o /tmp/tag && uconv -f KOREAN -t UTF-8 /tmp/tag && echo
4467 ComparePatientName(u'Hong^Gildong=洪^吉洞=홍^길동', 'Encodings/DavidClunie/SCSI2') # Since Orthanc 1.5.5
4468
4469 # gdcmraw -t 10,10 -i SCSX2 -o /tmp/tag && uconv -f GB18030 -t UTF-8 /tmp/tag && echo
4470 ComparePatientName(u'Wang^XiaoDong=王^小东=', 'Encodings/DavidClunie/SCSX2')
4471
4472 # gdcmraw -t 10,10 -i SCSX1 -o /tmp/tag && cat /tmp/tag && echo
4473 ComparePatientName(u'Wang^XiaoDong=王^小東=', 'Encodings/DavidClunie/SCSX1')
4474
4475 # gdcmraw -t 10,10 -i SCSH31 -o /tmp/tag && uconv -f JIS -t UTF-8 /tmp/tag && echo
4476 ComparePatientName(u'Yamada^Tarou=山田^太郎=やまだ^たろう', 'Encodings/DavidClunie/SCSH31')
4477
4478 # gdcmraw -t 10,10 -i SCSGERM -o /tmp/tag && uconv -f ISO-IR-100 -t UTF-8 /tmp/tag && echo
4479 ComparePatientName(u'Äneas^Rüdiger', 'Encodings/DavidClunie/SCSGERM')
4480
4481 # gdcmraw -t 10,10 -i SCSGREEK -o /tmp/tag && uconv -f ISO-IR-126 -t UTF-8 /tmp/tag && echo
4482 ComparePatientName(u'Διονυσιος', 'Encodings/DavidClunie/SCSGREEK')
4483
4484 # gdcmraw -t 10,10 -i SCSRUSS -o /tmp/tag && uconv -f ISO-IR-144 -t UTF-8 /tmp/tag && echo
4485 ComparePatientName(u'Люкceмбypг', 'Encodings/DavidClunie/SCSRUSS')
4486
4487 # gdcmraw -t 10,10 -i SCSHBRW -o /tmp/tag && uconv -f ISO-IR-138 -t UTF-8 /tmp/tag && echo
4488 # NB: Hebrew is a right-to-left encoding, copying/pasting from
4489 # Linux console into Emacs automatically reverse the string
4490 ComparePatientName(u'שרון^דבורה', 'Encodings/DavidClunie/SCSHBRW')
4491
4492 # gdcmraw -t 10,10 -i SCSARAB -o /tmp/tag && uconv -f ISO-IR-127 -t UTF-8 /tmp/tag && echo
4493 # NB: Right-to-left as for Hebrew (SCSHBRW), and the Ubuntu console can't display such
4494 # characters by default, but copy/paste works with Emacs
4495 ComparePatientName(u'قباني^لنزار', 'Encodings/DavidClunie/SCSARAB')
4496
4497 # SCSH32: This SpecificCharacterSet is composed of 2
4498 # codepages: "ISO 2022 IR 13" (i.e. "SHIFT_JIS") until the
4499 # first equal, then "ISO 2022 IR 87" (i.e. "JIS") for the
4500 # remainer. Orthanc only takes into consideration the first
4501 # codepage: This is a limitation.
4502 # gdcmraw -t 10,10 -i SCSH32 -o /tmp/tag && cut -d '=' -f 1 /tmp/tag | uconv -f SHIFT_JIS -t UTF-8
4503 self.assertTrue(GetPatientName('Encodings/DavidClunie/SCSH32').startswith(u'ヤマダ^タロウ='))