Mercurial > hg > orthanc
changeset 4220:92a21efa5c96
reorganization of DicomStreamReader
author | Sebastien Jodogne <s.jodogne@gmail.com> |
---|---|
date | Wed, 30 Sep 2020 15:33:47 +0200 |
parents | b8ed2852a35d |
children | e4c0218b6b23 |
files | OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake OrthancFramework/Sources/DicomFormat/DicomStreamReader.cpp OrthancFramework/Sources/DicomFormat/DicomStreamReader.h OrthancFramework/Sources/DicomFormat/StreamBlockReader.cpp OrthancFramework/Sources/DicomFormat/StreamBlockReader.h OrthancFramework/UnitTestsSources/DicomMapTests.cpp |
diffstat | 6 files changed, 892 insertions(+), 718 deletions(-) [+] |
line wrap: on
line diff
--- a/OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake Wed Sep 30 12:18:43 2020 +0200 +++ b/OrthancFramework/Resources/CMake/OrthancFrameworkConfiguration.cmake Wed Sep 30 15:33:47 2020 +0200 @@ -184,6 +184,8 @@ ${CMAKE_CURRENT_LIST_DIR}/../../Sources/DicomFormat/DicomIntegerPixelAccessor.cpp ${CMAKE_CURRENT_LIST_DIR}/../../Sources/DicomFormat/DicomMap.cpp ${CMAKE_CURRENT_LIST_DIR}/../../Sources/DicomFormat/DicomValue.cpp + ${CMAKE_CURRENT_LIST_DIR}/../../Sources/DicomFormat/StreamBlockReader.cpp + ${CMAKE_CURRENT_LIST_DIR}/../../Sources/DicomFormat/DicomStreamReader.cpp ) endif()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrthancFramework/Sources/DicomFormat/DicomStreamReader.cpp Wed Sep 30 15:33:47 2020 +0200 @@ -0,0 +1,599 @@ +/** + * Orthanc - A Lightweight, RESTful DICOM Store + * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics + * Department, University Hospital of Liege, Belgium + * Copyright (C) 2017-2020 Osimis S.A., Belgium + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/>. + **/ + + +#include "../PrecompiledHeaders.h" +#include "DicomStreamReader.h" + +#include "../OrthancException.h" + +namespace Orthanc +{ + static uint16_t ReadUnsignedInteger16(const char* dicom, + bool littleEndian) + { + const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom); + + if (littleEndian) + { + return (static_cast<uint16_t>(p[0]) | + (static_cast<uint16_t>(p[1]) << 8)); + } + else + { + return (static_cast<uint16_t>(p[1]) | + (static_cast<uint16_t>(p[0]) << 8)); + } + } + + + static uint32_t ReadUnsignedInteger32(const char* dicom, + bool littleEndian) + { + const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom); + + if (littleEndian) + { + return (static_cast<uint32_t>(p[0]) | + (static_cast<uint32_t>(p[1]) << 8) | + (static_cast<uint32_t>(p[2]) << 16) | + (static_cast<uint32_t>(p[3]) << 24)); + } + else + { + return (static_cast<uint32_t>(p[3]) | + (static_cast<uint32_t>(p[2]) << 8) | + (static_cast<uint32_t>(p[1]) << 16) | + (static_cast<uint32_t>(p[0]) << 24)); + } + } + + + static DicomTag ReadTag(const char* dicom, + bool littleEndian) + { + return DicomTag(ReadUnsignedInteger16(dicom, littleEndian), + ReadUnsignedInteger16(dicom + 2, littleEndian)); + } + + + static bool IsShortExplicitTag(ValueRepresentation vr) + { + /** + * Are we in the case of Table 7.1-2? "Data Element with + * Explicit VR of AE, AS, AT, CS, DA, DS, DT, FL, FD, IS, LO, + * LT, PN, SH, SL, SS, ST, TM, UI, UL and US" + * http://dicom.nema.org/medical/dicom/current/output/chtml/part05/chapter_7.html#sect_7.1.2 + **/ + return (vr == ValueRepresentation_ApplicationEntity /* AE */ || + vr == ValueRepresentation_AgeString /* AS */ || + vr == ValueRepresentation_AttributeTag /* AT */ || + vr == ValueRepresentation_CodeString /* CS */ || + vr == ValueRepresentation_Date /* DA */ || + vr == ValueRepresentation_DecimalString /* DS */ || + vr == ValueRepresentation_DateTime /* DT */ || + vr == ValueRepresentation_FloatingPointSingle /* FL */ || + vr == ValueRepresentation_FloatingPointDouble /* FD */ || + vr == ValueRepresentation_IntegerString /* IS */ || + vr == ValueRepresentation_LongString /* LO */ || + vr == ValueRepresentation_LongText /* LT */ || + vr == ValueRepresentation_PersonName /* PN */ || + vr == ValueRepresentation_ShortString /* SH */ || + vr == ValueRepresentation_SignedLong /* SL */ || + vr == ValueRepresentation_SignedShort /* SS */ || + vr == ValueRepresentation_ShortText /* ST */ || + vr == ValueRepresentation_Time /* TM */ || + vr == ValueRepresentation_UniqueIdentifier /* UI */ || + vr == ValueRepresentation_UnsignedLong /* UL */ || + vr == ValueRepresentation_UnsignedShort /* US */); + } + + + static void PrintBlock(const std::string& block) + { + for (size_t i = 0; i < block.size(); i++) + { + printf("%02x ", static_cast<uint8_t>(block[i])); + if (i % 16 == 15) + printf("\n"); + } + printf("\n"); + } + + + + bool DicomStreamReader::IsLittleEndian() const + { + return (transferSyntax_ != DicomTransferSyntax_BigEndianExplicit); + } + + + void DicomStreamReader::HandlePreamble(IVisitor& visitor, + const std::string& block) + { + //printf("PREAMBLE:\n"); + //PrintBlock(block); + + assert(block.size() == 144u); + assert(reader_.GetProcessedBytes() == 144u); + + /** + * The "DICOM file meta information" is always encoded using + * "Explicit VR Little Endian Transfer Syntax" + * http://dicom.nema.org/medical/dicom/current/output/chtml/part10/chapter_7.html + **/ + if (block[128] != 'D' || + block[129] != 'I' || + block[130] != 'C' || + block[131] != 'M' || + ReadTag(block.c_str() + 132, true) != DicomTag(0x0002, 0x0000) || + block[136] != 'U' || + block[137] != 'L' || + ReadUnsignedInteger16(block.c_str() + 138, true) != 4) + { + throw OrthancException(ErrorCode_BadFileFormat); + } + + uint32_t length = ReadUnsignedInteger32(block.c_str() + 140, true); + + reader_.Schedule(length); + state_ = State_MetaHeader; + } + + + void DicomStreamReader::HandleMetaHeader(IVisitor& visitor, + const std::string& block) + { + //printf("META-HEADER:\n"); + //PrintBlock(block); + + size_t pos = 0; + const char* p = block.c_str(); + + bool hasTransferSyntax = false; + + while (pos + 8 <= block.size()) + { + DicomTag tag = ReadTag(p + pos, true); + + ValueRepresentation vr = StringToValueRepresentation(std::string(p + pos + 4, 2), true); + + if (IsShortExplicitTag(vr)) + { + uint16_t length = ReadUnsignedInteger16(p + pos + 6, true); + + std::string value; + value.assign(p + pos + 8, length); + + if (tag.GetGroup() == 0x0002) + { + visitor.VisitMetaHeaderTag(tag, vr, value); + } + + if (tag == DICOM_TAG_TRANSFER_SYNTAX_UID) + { + // Remove possible padding byte + if (!value.empty() && + value[value.size() - 1] == '\0') + { + value.resize(value.size() - 1); + } + + if (LookupTransferSyntax(transferSyntax_, value)) + { + hasTransferSyntax = true; + } + else + { + throw OrthancException(ErrorCode_NotImplemented, "Unsupported transfer syntax: " + value); + } + } + + pos += length + 8; + } + else if (pos + 12 <= block.size()) + { + uint16_t reserved = ReadUnsignedInteger16(p + pos + 6, true); + if (reserved != 0) + { + break; + } + + uint32_t length = ReadUnsignedInteger32(p + pos + 8, true); + + std::string value; + value.assign(p + pos + 12, length); + + if (tag.GetGroup() == 0x0002) + { + visitor.VisitMetaHeaderTag(tag, vr, value); + } + + pos += length + 12; + } + } + + if (pos != block.size()) + { + throw OrthancException(ErrorCode_BadFileFormat); + } + + if (!hasTransferSyntax) + { + throw OrthancException(ErrorCode_BadFileFormat, "DICOM file meta-header without transfer syntax UID"); + } + + visitor.VisitTransferSyntax(transferSyntax_); + + reader_.Schedule(8); + state_ = State_DatasetTag; + } + + + void DicomStreamReader::HandleDatasetTag(const std::string& block, + const DicomTag& untilTag) + { + static const DicomTag DICOM_TAG_SEQUENCE_ITEM(0xfffe, 0xe000); + static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_ITEM(0xfffe, 0xe00d); + static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE(0xfffe, 0xe0dd); + + assert(block.size() == 8u); + + const bool littleEndian = IsLittleEndian(); + DicomTag tag = ReadTag(block.c_str(), littleEndian); + + if (sequenceDepth_ == 0 && + tag >= untilTag) + { + state_ = State_Done; + return; + } + + if (tag == DICOM_TAG_SEQUENCE_ITEM || + tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM || + tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE) + { + //printf("SEQUENCE TAG:\n"); + //PrintBlock(block); + + // The special sequence items are encoded like "Implicit VR" + uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, littleEndian); + + if (tag == DICOM_TAG_SEQUENCE_ITEM) + { + for (unsigned int i = 0; i <= sequenceDepth_; i++) + printf(" "); + if (length == 0xffffffffu) + { + // Undefined length: Need to loop over the tags of the nested dataset + printf("...next dataset in sequence...\n"); + reader_.Schedule(8); + state_ = State_DatasetTag; + } + else + { + // Explicit length: Can skip the full sequence at once + printf("...next dataset in sequence... %u bytes\n", length); + reader_.Schedule(length); + state_ = State_DatasetValue; + } + } + else if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM || + tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE) + { + if (length != 0 || + sequenceDepth_ == 0) + { + throw OrthancException(ErrorCode_BadFileFormat); + } + + if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE) + { + for (unsigned int i = 0; i < sequenceDepth_; i++) + printf(" "); + printf("...leaving sequence...\n"); + + sequenceDepth_ --; + } + else + { + if (sequenceDepth_ == 0) + { + throw OrthancException(ErrorCode_BadFileFormat); + } + } + + reader_.Schedule(8); + state_ = State_DatasetTag; + } + else + { + throw OrthancException(ErrorCode_InternalError); + } + } + else + { + //printf("DATASET TAG:\n"); + //PrintBlock(block); + + previousTag_ = tag; + + ValueRepresentation vr = ValueRepresentation_Unknown; + + if (transferSyntax_ == DicomTransferSyntax_LittleEndianImplicit) + { + if (sequenceDepth_ == 0) + { + danglingTag_ = tag; + danglingVR_ = vr; + } + + uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, true /* little endian */); + HandleDatasetExplicitLength(length); + } + else + { + // This in an explicit transfer syntax + + vr = StringToValueRepresentation( + std::string(block.c_str() + 4, 2), false /* ignore unknown VR */); + + if (vr != ValueRepresentation_Sequence && + sequenceDepth_ > 0) + { + for (unsigned int i = 0; i <= sequenceDepth_; i++) + printf(" "); + printf("%s\n", tag.Format().c_str()); + } + + if (vr == ValueRepresentation_Sequence) + { + for (unsigned int i = 0; i <= sequenceDepth_; i++) + printf(" "); + printf("...entering sequence... %s\n", tag.Format().c_str()); + sequenceDepth_ ++; + reader_.Schedule(4); + state_ = State_SequenceExplicitLength; + } + else if (IsShortExplicitTag(vr)) + { + uint16_t length = ReadUnsignedInteger16(block.c_str() + 6, littleEndian); + + reader_.Schedule(length); + state_ = State_DatasetValue; + } + else + { + uint16_t reserved = ReadUnsignedInteger16(block.c_str() + 6, littleEndian); + if (reserved != 0) + { + throw OrthancException(ErrorCode_BadFileFormat); + } + + reader_.Schedule(4); + state_ = State_DatasetExplicitLength; + } + + if (sequenceDepth_ == 0) + { + danglingTag_ = tag; + danglingVR_ = vr; + } + } + } + } + + + void DicomStreamReader::HandleDatasetExplicitLength(uint32_t length) + { + if (length == 0xffffffffu) + { + /** + * This is the case of pixel data with compressed transfer + * syntaxes. Schedule the reading of the first tag of the + * nested dataset. + * http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_7.5.html + **/ + + for (unsigned int i = 0; i <= sequenceDepth_; i++) + printf(" "); + printf("...entering sequence... %s\n", previousTag_.Format().c_str()); + + state_ = State_DatasetTag; + reader_.Schedule(8); + sequenceDepth_ ++; + } + else + { + reader_.Schedule(length); + state_ = State_DatasetValue; + } + } + + + void DicomStreamReader::HandleDatasetExplicitLength(const std::string& block) + { + //printf("DATASET TAG LENGTH:\n"); + //PrintBlock(block); + + assert(block.size() == 4); + + uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian()); + HandleDatasetExplicitLength(length); + } + + + void DicomStreamReader::HandleSequenceExplicitLength(const std::string& block) + { + //printf("DATASET TAG LENGTH:\n"); + //PrintBlock(block); + + assert(block.size() == 4); + + uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian()); + if (length == 0xffffffffu) + { + state_ = State_DatasetTag; + reader_.Schedule(8); + } + else + { + for (unsigned int i = 0; i <= sequenceDepth_; i++) + printf(" "); + printf("...skipping sequence thanks to explicit length... %d\n", length); + + reader_.Schedule(length); + state_ = State_SequenceExplicitValue; + } + } + + + void DicomStreamReader::HandleSequenceExplicitValue() + { + if (sequenceDepth_ == 0) + { + throw OrthancException(ErrorCode_InternalError); + } + + sequenceDepth_ --; + + state_ = State_DatasetTag; + reader_.Schedule(8); + } + + + void DicomStreamReader::HandleDatasetValue(IVisitor& visitor, + const std::string& block) + { + if (sequenceDepth_ == 0) + { + bool c; + + if (!block.empty() && + (block[block.size() - 1] == ' ' || + block[block.size() - 1] == '\0') && + (danglingVR_ == ValueRepresentation_ApplicationEntity || + danglingVR_ == ValueRepresentation_AgeString || + danglingVR_ == ValueRepresentation_CodeString || + danglingVR_ == ValueRepresentation_DecimalString || + danglingVR_ == ValueRepresentation_IntegerString || + danglingVR_ == ValueRepresentation_LongString || + danglingVR_ == ValueRepresentation_LongText || + danglingVR_ == ValueRepresentation_PersonName || + danglingVR_ == ValueRepresentation_ShortString || + danglingVR_ == ValueRepresentation_ShortText || + danglingVR_ == ValueRepresentation_UniqueIdentifier || + danglingVR_ == ValueRepresentation_UnlimitedText)) + { + std::string s(block.begin(), block.end() - 1); + c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, s, IsLittleEndian()); + } + else + { + c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, block, IsLittleEndian()); + } + + if (!c) + { + state_ = State_Done; + return; + } + } + + reader_.Schedule(8); + state_ = State_DatasetTag; + } + + + DicomStreamReader::DicomStreamReader(std::istream& stream) : + reader_(stream), + state_(State_Preamble), + transferSyntax_(DicomTransferSyntax_LittleEndianImplicit), // Dummy + previousTag_(0x0000, 0x0000), // Dummy + danglingTag_(0x0000, 0x0000), // Dummy + danglingVR_(ValueRepresentation_Unknown), // Dummy + sequenceDepth_(0) + { + reader_.Schedule(128 /* empty header */ + + 4 /* "DICM" magic value */ + + 4 /* (0x0002, 0x0000) tag */ + + 2 /* value representation of (0x0002, 0x0000) == "UL" */ + + 2 /* length of "UL" value == 4 */ + + 4 /* actual length of the meta-header */); + } + + + void DicomStreamReader::Consume(IVisitor& visitor, + const DicomTag& untilTag) + { + while (state_ != State_Done) + { + std::string block; + if (reader_.Read(block)) + { + switch (state_) + { + case State_Preamble: + HandlePreamble(visitor, block); + break; + + case State_MetaHeader: + HandleMetaHeader(visitor, block); + break; + + case State_DatasetTag: + HandleDatasetTag(block, untilTag); + break; + + case State_DatasetExplicitLength: + HandleDatasetExplicitLength(block); + break; + + case State_SequenceExplicitLength: + HandleSequenceExplicitLength(block); + break; + + case State_SequenceExplicitValue: + HandleSequenceExplicitValue(); + break; + + case State_DatasetValue: + HandleDatasetValue(visitor, block); + break; + + default: + throw OrthancException(ErrorCode_InternalError); + } + } + else + { + return; // No more data in the stream + } + } + } + + + void DicomStreamReader::Consume(IVisitor& visitor) + { + DicomTag untilTag(0xffff, 0xffff); + Consume(visitor, untilTag); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrthancFramework/Sources/DicomFormat/DicomStreamReader.h Wed Sep 30 15:33:47 2020 +0200 @@ -0,0 +1,129 @@ +/** + * Orthanc - A Lightweight, RESTful DICOM Store + * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics + * Department, University Hospital of Liege, Belgium + * Copyright (C) 2017-2020 Osimis S.A., Belgium + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/>. + **/ + + +#pragma once + +#include "DicomTag.h" +#include "StreamBlockReader.h" + +namespace Orthanc +{ + /** + * This class parses a stream containing a DICOM instance. It does + * *not* support the visit of sequences (it only works at the first + * level of the hierarchy), and as a consequence, it doesn't give + * access to the pixel data of compressed transfer syntaxes. + **/ + class DicomStreamReader : public boost::noncopyable + { + public: + class IVisitor : public boost::noncopyable + { + public: + virtual ~IVisitor() + { + } + + // The data from this function will always be Little Endian (as + // specified by the DICOM standard) + virtual void VisitMetaHeaderTag(const DicomTag& tag, + const ValueRepresentation& vr, + const std::string& value) = 0; + + virtual void VisitTransferSyntax(DicomTransferSyntax transferSyntax) = 0; + + // Return "false" to stop processing + virtual bool VisitDatasetTag(const DicomTag& tag, + const ValueRepresentation& vr, + const std::string& value, + bool isLittleEndian) = 0; + }; + + private: + enum State + { + State_Preamble, + State_MetaHeader, + State_DatasetTag, + State_SequenceExplicitLength, + State_SequenceExplicitValue, + State_DatasetExplicitLength, + State_DatasetValue, + State_Done + }; + + StreamBlockReader reader_; + State state_; + DicomTransferSyntax transferSyntax_; + DicomTag previousTag_; + DicomTag danglingTag_; // Root-level tag + ValueRepresentation danglingVR_; + unsigned int sequenceDepth_; + + bool IsLittleEndian() const; + + void HandlePreamble(IVisitor& visitor, + const std::string& block); + + void HandleMetaHeader(IVisitor& visitor, + const std::string& block); + + void HandleDatasetTag(const std::string& block, + const DicomTag& untilTag); + + void HandleDatasetExplicitLength(uint32_t length); + + void HandleDatasetExplicitLength(const std::string& block); + + void HandleSequenceExplicitLength(const std::string& block); + + void HandleSequenceExplicitValue(); + + void HandleDatasetValue(IVisitor& visitor, + const std::string& block); + + public: + DicomStreamReader(std::istream& stream); + + /** + * Consume all the available bytes from the input stream, until + * end-of-stream is reached or the current tag is ">= untilTag". + * This method can be invoked several times, as more bytes are + * available from the input stream. To check if the DICOM stream + * is fully parsed until the goal tag, call "IsDone()". + **/ + void Consume(IVisitor& visitor, + const DicomTag& untilTag); + + void Consume(IVisitor& visitor); + + bool IsDone() const + { + return (state_ == State_Done); + } + + uint64_t GetProcessedBytes() const + { + return reader_.GetProcessedBytes(); + } + }; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrthancFramework/Sources/DicomFormat/StreamBlockReader.cpp Wed Sep 30 15:33:47 2020 +0200 @@ -0,0 +1,88 @@ +/** + * Orthanc - A Lightweight, RESTful DICOM Store + * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics + * Department, University Hospital of Liege, Belgium + * Copyright (C) 2017-2020 Osimis S.A., Belgium + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/>. + **/ + + +#include "../PrecompiledHeaders.h" +#include "StreamBlockReader.h" + +#include "../OrthancException.h" + + +namespace Orthanc +{ + StreamBlockReader::StreamBlockReader(std::istream& stream) : + stream_(stream), + blockPos_(0), + processedBytes_(0) + { + } + + + void StreamBlockReader::Schedule(size_t blockSize) + { + if (!block_.empty()) + { + throw OrthancException(ErrorCode_BadSequenceOfCalls); + } + else + { + block_.resize(blockSize); + blockPos_ = 0; + } + } + + + bool StreamBlockReader::Read(std::string& block) + { + if (block_.empty()) + { + if (blockPos_ != 0) + { + throw OrthancException(ErrorCode_BadSequenceOfCalls); + } + + block.clear(); + return true; + } + else + { + while (blockPos_ < block_.size()) + { + size_t remainingBytes = block_.size() - blockPos_; + std::streamsize r = stream_.readsome(&block_[blockPos_], remainingBytes); + if (r == 0) + { + return false; + } + else + { + blockPos_ += r; + } + } + + processedBytes_ += block_.size(); + + block.swap(block_); + block_.clear(); + return true; + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OrthancFramework/Sources/DicomFormat/StreamBlockReader.h Wed Sep 30 15:33:47 2020 +0200 @@ -0,0 +1,66 @@ +/** + * Orthanc - A Lightweight, RESTful DICOM Store + * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics + * Department, University Hospital of Liege, Belgium + * Copyright (C) 2017-2020 Osimis S.A., Belgium + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/>. + **/ + + +#pragma once + +#include <boost/noncopyable.hpp> +#include <istream> +#include <string> + + +namespace Orthanc +{ + /** + * This class is used to extract blocks of given size from a + * stream. Bytes from the stream are buffered until the requested + * size is available, and the full block can be returned. + **/ + class StreamBlockReader : public boost::noncopyable + { + private: + std::istream& stream_; + std::string block_; + size_t blockPos_; + uint64_t processedBytes_; + + public: + StreamBlockReader(std::istream& stream); + + /** + * Schedule the size of the next block to be extracted from the + * stream. + **/ + void Schedule(size_t blockSize); + + /** + * Extract the block whose size was configured by the previous + * call to "Schedule()". Returns "false" iff not enough bytes are + * available from the stream yet: In this case, try again later. + **/ + bool Read(std::string& block); + + uint64_t GetProcessedBytes() const + { + return processedBytes_; + } + }; +}
--- a/OrthancFramework/UnitTestsSources/DicomMapTests.cpp Wed Sep 30 12:18:43 2020 +0200 +++ b/OrthancFramework/UnitTestsSources/DicomMapTests.cpp Wed Sep 30 15:33:47 2020 +0200 @@ -34,6 +34,7 @@ #include "../Sources/Compatibility.h" #include "../Sources/OrthancException.h" #include "../Sources/DicomFormat/DicomMap.h" +#include "../Sources/DicomFormat/DicomStreamReader.h" #include "../Sources/DicomParsing/FromDcmtkBridge.h" #include "../Sources/DicomParsing/ToDcmtkBridge.h" #include "../Sources/DicomParsing/ParsedDicomFile.h" @@ -798,721 +799,6 @@ namespace { - class StreamBlockReader : public boost::noncopyable - { - private: - std::istream& stream_; - std::string block_; - size_t blockPos_; - uint64_t processedBytes_; - - public: - StreamBlockReader(std::istream& stream) : - stream_(stream), - blockPos_(0), - processedBytes_(0) - { - } - - void Schedule(size_t blockSize) - { - if (!block_.empty()) - { - throw OrthancException(ErrorCode_BadSequenceOfCalls); - } - else - { - block_.resize(blockSize); - blockPos_ = 0; - } - } - - bool Read(std::string& block) - { - if (block_.empty()) - { - if (blockPos_ != 0) - { - throw OrthancException(ErrorCode_BadSequenceOfCalls); - } - - block.clear(); - return true; - } - else - { - while (blockPos_ < block_.size()) - { -#if 0 - char c; - stream_.get(c); - - if (stream_.good()) - { - block_[blockPos_] = c; - blockPos_++; - } - else - { - return false; - } -#else - size_t n = block_.size() - blockPos_; - std::streamsize r = stream_.readsome(&block_[blockPos_], n); - if (r == 0) - { - return false; - } - else - { - blockPos_ += r; - } -#endif - } - - processedBytes_ += block_.size(); - - block.swap(block_); - block_.clear(); - return true; - } - } - - uint64_t GetProcessedBytes() const - { - return processedBytes_; - } - }; - - - - - /** - * This class parses a stream containing a DICOM instance. It does - * *not* support the visit of sequences (it only works at the first - * level of the hierarchy), and it stops the processing once pixel - * data is reached in compressed transfer syntaxes. - **/ - class DicomStreamReader : public boost::noncopyable - { - public: - class IVisitor : public boost::noncopyable - { - public: - virtual ~IVisitor() - { - } - - // The data from this function will always be Little Endian (as - // specified by the DICOM standard) - virtual void VisitMetaHeaderTag(const DicomTag& tag, - const ValueRepresentation& vr, - const std::string& value) = 0; - - // Return "false" to stop processing - virtual bool VisitDatasetTag(const DicomTag& tag, - const ValueRepresentation& vr, - DicomTransferSyntax transferSyntax, - const std::string& value, - bool isLittleEndian) = 0; - }; - - private: - enum State - { - State_Preamble, - State_MetaHeader, - State_DatasetTag, - State_SequenceExplicitLength, - State_SequenceExplicitValue, - State_DatasetExplicitLength, - State_DatasetValue, - State_Done - }; - - StreamBlockReader reader_; - State state_; - DicomTransferSyntax transferSyntax_; - DicomTag previousTag_; - DicomTag danglingTag_; // Root-level tag - ValueRepresentation danglingVR_; - unsigned int sequenceDepth_; - - static uint16_t ReadUnsignedInteger16(const char* dicom, - bool littleEndian) - { - const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom); - - if (littleEndian) - { - return (static_cast<uint16_t>(p[0]) | - (static_cast<uint16_t>(p[1]) << 8)); - } - else - { - return (static_cast<uint16_t>(p[1]) | - (static_cast<uint16_t>(p[0]) << 8)); - } - } - - - static uint32_t ReadUnsignedInteger32(const char* dicom, - bool littleEndian) - { - const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom); - - if (littleEndian) - { - return (static_cast<uint32_t>(p[0]) | - (static_cast<uint32_t>(p[1]) << 8) | - (static_cast<uint32_t>(p[2]) << 16) | - (static_cast<uint32_t>(p[3]) << 24)); - } - else - { - return (static_cast<uint32_t>(p[3]) | - (static_cast<uint32_t>(p[2]) << 8) | - (static_cast<uint32_t>(p[1]) << 16) | - (static_cast<uint32_t>(p[0]) << 24)); - } - } - - - static DicomTag ReadTag(const char* dicom, - bool littleEndian) - { - return DicomTag(ReadUnsignedInteger16(dicom, littleEndian), - ReadUnsignedInteger16(dicom + 2, littleEndian)); - } - - - static bool IsShortExplicitTag(ValueRepresentation vr) - { - /** - * Are we in the case of Table 7.1-2? "Data Element with - * Explicit VR of AE, AS, AT, CS, DA, DS, DT, FL, FD, IS, LO, - * LT, PN, SH, SL, SS, ST, TM, UI, UL and US" - * http://dicom.nema.org/medical/dicom/current/output/chtml/part05/chapter_7.html#sect_7.1.2 - **/ - return (vr == ValueRepresentation_ApplicationEntity /* AE */ || - vr == ValueRepresentation_AgeString /* AS */ || - vr == ValueRepresentation_AttributeTag /* AT */ || - vr == ValueRepresentation_CodeString /* CS */ || - vr == ValueRepresentation_Date /* DA */ || - vr == ValueRepresentation_DecimalString /* DS */ || - vr == ValueRepresentation_DateTime /* DT */ || - vr == ValueRepresentation_FloatingPointSingle /* FL */ || - vr == ValueRepresentation_FloatingPointDouble /* FD */ || - vr == ValueRepresentation_IntegerString /* IS */ || - vr == ValueRepresentation_LongString /* LO */ || - vr == ValueRepresentation_LongText /* LT */ || - vr == ValueRepresentation_PersonName /* PN */ || - vr == ValueRepresentation_ShortString /* SH */ || - vr == ValueRepresentation_SignedLong /* SL */ || - vr == ValueRepresentation_SignedShort /* SS */ || - vr == ValueRepresentation_ShortText /* ST */ || - vr == ValueRepresentation_Time /* TM */ || - vr == ValueRepresentation_UniqueIdentifier /* UI */ || - vr == ValueRepresentation_UnsignedLong /* UL */ || - vr == ValueRepresentation_UnsignedShort /* US */); - } - - - bool IsLittleEndian() const - { - return (transferSyntax_ != DicomTransferSyntax_BigEndianExplicit); - } - - - void PrintBlock(const std::string& block) - { - for (size_t i = 0; i < block.size(); i++) - { - printf("%02x ", static_cast<uint8_t>(block[i])); - if (i % 16 == 15) - printf("\n"); - } - printf("\n"); - } - - void HandlePreamble(IVisitor& visitor, - const std::string& block) - { - //printf("PREAMBLE:\n"); - //PrintBlock(block); - - assert(block.size() == 144u); - assert(reader_.GetProcessedBytes() == 144u); - - /** - * The "DICOM file meta information" is always encoded using - * "Explicit VR Little Endian Transfer Syntax" - * http://dicom.nema.org/medical/dicom/current/output/chtml/part10/chapter_7.html - **/ - if (block[128] != 'D' || - block[129] != 'I' || - block[130] != 'C' || - block[131] != 'M' || - ReadTag(block.c_str() + 132, true) != DicomTag(0x0002, 0x0000) || - block[136] != 'U' || - block[137] != 'L' || - ReadUnsignedInteger16(block.c_str() + 138, true) != 4) - { - throw OrthancException(ErrorCode_BadFileFormat); - } - - uint32_t length = ReadUnsignedInteger32(block.c_str() + 140, true); - - reader_.Schedule(length); - state_ = State_MetaHeader; - } - - - void HandleMetaHeader(IVisitor& visitor, - const std::string& block) - { - //printf("META-HEADER:\n"); - //PrintBlock(block); - - size_t pos = 0; - const char* p = block.c_str(); - - bool hasTransferSyntax = false; - - while (pos + 8 <= block.size()) - { - DicomTag tag = ReadTag(p + pos, true); - - ValueRepresentation vr = StringToValueRepresentation(std::string(p + pos + 4, 2), true); - - if (IsShortExplicitTag(vr)) - { - uint16_t length = ReadUnsignedInteger16(p + pos + 6, true); - - std::string value; - value.assign(p + pos + 8, length); - - if (tag.GetGroup() == 0x0002) - { - visitor.VisitMetaHeaderTag(tag, vr, value); - } - - if (tag == DICOM_TAG_TRANSFER_SYNTAX_UID) - { - // Remove possible padding byte - if (!value.empty() && - value[value.size() - 1] == '\0') - { - value.resize(value.size() - 1); - } - - if (LookupTransferSyntax(transferSyntax_, value)) - { - hasTransferSyntax = true; - } - else - { - throw OrthancException(ErrorCode_NotImplemented, "Unsupported transfer syntax: " + value); - } - } - - pos += length + 8; - } - else if (pos + 12 <= block.size()) - { - uint16_t reserved = ReadUnsignedInteger16(p + pos + 6, true); - if (reserved != 0) - { - break; - } - - uint32_t length = ReadUnsignedInteger32(p + pos + 8, true); - - std::string value; - value.assign(p + pos + 12, length); - - if (tag.GetGroup() == 0x0002) - { - visitor.VisitMetaHeaderTag(tag, vr, value); - } - - pos += length + 12; - } - } - - if (pos != block.size()) - { - throw OrthancException(ErrorCode_BadFileFormat); - } - - if (!hasTransferSyntax) - { - throw OrthancException(ErrorCode_BadFileFormat, "DICOM file meta-header without transfer syntax UID"); - } - - reader_.Schedule(8); - state_ = State_DatasetTag; - } - - - void HandleDatasetTag(const std::string& block, - const DicomTag& untilTag) - { - static const DicomTag DICOM_TAG_SEQUENCE_ITEM(0xfffe, 0xe000); - static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_ITEM(0xfffe, 0xe00d); - static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE(0xfffe, 0xe0dd); - - assert(block.size() == 8u); - - const bool littleEndian = IsLittleEndian(); - DicomTag tag = ReadTag(block.c_str(), littleEndian); - - if (sequenceDepth_ == 0 && - tag >= untilTag) - { - state_ = State_Done; - return; - } - - if (tag == DICOM_TAG_SEQUENCE_ITEM || - tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM || - tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE) - { - //printf("SEQUENCE TAG:\n"); - //PrintBlock(block); - - // The special sequence items are encoded like "Implicit VR" - uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, littleEndian); - - if (tag == DICOM_TAG_SEQUENCE_ITEM) - { - for (unsigned int i = 0; i <= sequenceDepth_; i++) - printf(" "); - if (length == 0xffffffffu) - { - // Undefined length: Need to loop over the tags of the nested dataset - printf("...next dataset in sequence...\n"); - reader_.Schedule(8); - state_ = State_DatasetTag; - } - else - { - // Explicit length: Can skip the full sequence at once - printf("...next dataset in sequence... %u bytes\n", length); - reader_.Schedule(length); - state_ = State_DatasetValue; - } - } - else if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM || - tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE) - { - if (length != 0 || - sequenceDepth_ == 0) - { - throw OrthancException(ErrorCode_BadFileFormat); - } - - if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE) - { - for (unsigned int i = 0; i < sequenceDepth_; i++) - printf(" "); - printf("...leaving sequence...\n"); - - sequenceDepth_ --; - } - else - { - if (sequenceDepth_ == 0) - { - throw OrthancException(ErrorCode_BadFileFormat); - } - } - - reader_.Schedule(8); - state_ = State_DatasetTag; - } - else - { - throw OrthancException(ErrorCode_InternalError); - } - } - else - { - //printf("DATASET TAG:\n"); - //PrintBlock(block); - - previousTag_ = tag; - - ValueRepresentation vr = ValueRepresentation_Unknown; - - if (transferSyntax_ == DicomTransferSyntax_LittleEndianImplicit) - { - if (sequenceDepth_ == 0) - { - danglingTag_ = tag; - danglingVR_ = vr; - } - - uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, true /* little endian */); - HandleDatasetExplicitLength(length); - } - else - { - // This in an explicit transfer syntax - - vr = StringToValueRepresentation( - std::string(block.c_str() + 4, 2), false /* ignore unknown VR */); - - if (vr != ValueRepresentation_Sequence && - sequenceDepth_ > 0) - { - for (unsigned int i = 0; i <= sequenceDepth_; i++) - printf(" "); - printf("%s\n", tag.Format().c_str()); - } - - if (vr == ValueRepresentation_Sequence) - { - for (unsigned int i = 0; i <= sequenceDepth_; i++) - printf(" "); - printf("...entering sequence... %s\n", tag.Format().c_str()); - sequenceDepth_ ++; - reader_.Schedule(4); - state_ = State_SequenceExplicitLength; - } - else if (IsShortExplicitTag(vr)) - { - uint16_t length = ReadUnsignedInteger16(block.c_str() + 6, littleEndian); - - reader_.Schedule(length); - state_ = State_DatasetValue; - } - else - { - uint16_t reserved = ReadUnsignedInteger16(block.c_str() + 6, littleEndian); - if (reserved != 0) - { - throw OrthancException(ErrorCode_BadFileFormat); - } - - reader_.Schedule(4); - state_ = State_DatasetExplicitLength; - } - - if (sequenceDepth_ == 0) - { - danglingTag_ = tag; - danglingVR_ = vr; - } - } - } - } - - - void HandleDatasetExplicitLength(uint32_t length) - { - if (length == 0xffffffffu) - { - /** - * This is the case of pixel data with compressed transfer - * syntaxes. Schedule the reading of the first tag of the - * nested dataset. - * http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_7.5.html - **/ - - for (unsigned int i = 0; i <= sequenceDepth_; i++) - printf(" "); - printf("...entering sequence... %s\n", previousTag_.Format().c_str()); - - state_ = State_DatasetTag; - reader_.Schedule(8); - sequenceDepth_ ++; - } - else - { - reader_.Schedule(length); - state_ = State_DatasetValue; - } - } - - void HandleDatasetExplicitLength(const std::string& block) - { - //printf("DATASET TAG LENGTH:\n"); - //PrintBlock(block); - - assert(block.size() == 4); - - uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian()); - HandleDatasetExplicitLength(length); - } - - void HandleSequenceExplicitLength(const std::string& block) - { - //printf("DATASET TAG LENGTH:\n"); - //PrintBlock(block); - - assert(block.size() == 4); - - uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian()); - if (length == 0xffffffffu) - { - state_ = State_DatasetTag; - reader_.Schedule(8); - } - else - { - for (unsigned int i = 0; i <= sequenceDepth_; i++) - printf(" "); - printf("...skipping sequence thanks to explicit length... %d\n", length); - - reader_.Schedule(length); - state_ = State_SequenceExplicitValue; - } - } - - void HandleSequenceExplicitValue() - { - if (sequenceDepth_ == 0) - { - throw OrthancException(ErrorCode_InternalError); - } - - sequenceDepth_ --; - - state_ = State_DatasetTag; - reader_.Schedule(8); - } - - - void HandleDatasetValue(IVisitor& visitor, - const std::string& block) - { - if (sequenceDepth_ == 0) - { - bool c; - - if (!block.empty() && - (block[block.size() - 1] == ' ' || - block[block.size() - 1] == '\0') && - (danglingVR_ == ValueRepresentation_ApplicationEntity || - danglingVR_ == ValueRepresentation_AgeString || - danglingVR_ == ValueRepresentation_CodeString || - danglingVR_ == ValueRepresentation_DecimalString || - danglingVR_ == ValueRepresentation_IntegerString || - danglingVR_ == ValueRepresentation_LongString || - danglingVR_ == ValueRepresentation_LongText || - danglingVR_ == ValueRepresentation_PersonName || - danglingVR_ == ValueRepresentation_ShortString || - danglingVR_ == ValueRepresentation_ShortText || - danglingVR_ == ValueRepresentation_UniqueIdentifier || - danglingVR_ == ValueRepresentation_UnlimitedText)) - { - std::string s(block.begin(), block.end() - 1); - c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, transferSyntax_, s, IsLittleEndian()); - } - else - { - c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, transferSyntax_, block, IsLittleEndian()); - } - - if (!c) - { - state_ = State_Done; - return; - } - } - - reader_.Schedule(8); - state_ = State_DatasetTag; - } - - - public: - DicomStreamReader(std::istream& stream) : - reader_(stream), - state_(State_Preamble), - transferSyntax_(DicomTransferSyntax_LittleEndianImplicit), // Dummy - previousTag_(0x0000, 0x0000), // Dummy - danglingTag_(0x0000, 0x0000), // Dummy - danglingVR_(ValueRepresentation_Unknown), // Dummy - sequenceDepth_(0) - { - reader_.Schedule(128 /* empty header */ + - 4 /* "DICM" magic value */ + - 4 /* (0x0002, 0x0000) tag */ + - 2 /* value representation of (0x0002, 0x0000) == "UL" */ + - 2 /* length of "UL" value == 4 */ + - 4 /* actual length of the meta-header */); - } - - void Consume(IVisitor& visitor, - const DicomTag& untilTag) - { - while (state_ != State_Done) - { - std::string block; - if (reader_.Read(block)) - { - switch (state_) - { - case State_Preamble: - HandlePreamble(visitor, block); - break; - - case State_MetaHeader: - HandleMetaHeader(visitor, block); - break; - - case State_DatasetTag: - HandleDatasetTag(block, untilTag); - break; - - case State_DatasetExplicitLength: - HandleDatasetExplicitLength(block); - break; - - case State_SequenceExplicitLength: - HandleSequenceExplicitLength(block); - break; - - case State_SequenceExplicitValue: - HandleSequenceExplicitValue(); - break; - - case State_DatasetValue: - HandleDatasetValue(visitor, block); - break; - - default: - throw OrthancException(ErrorCode_InternalError); - } - } - else - { - return; // No more data in the stream - } - } - } - - void Consume(IVisitor& visitor) - { - DicomTag untilTag(0xffff, 0xffff); - Consume(visitor, untilTag); - } - - bool IsDone() const - { - return (state_ == State_Done); - } - - uint64_t GetProcessedBytes() const - { - return reader_.GetProcessedBytes(); - } - }; - - - class V : public DicomStreamReader::IVisitor { private: @@ -1531,9 +817,13 @@ std::cout << "Header: " << tag.Format() << " [" << Toolbox::ConvertToAscii(value).c_str() << "] (" << value.size() << ")" << std::endl; } + virtual void VisitTransferSyntax(DicomTransferSyntax transferSyntax) ORTHANC_OVERRIDE + { + printf("TRANSFER SYNTAX: %s\n", GetTransferSyntaxUid(transferSyntax)); + } + virtual bool VisitDatasetTag(const DicomTag& tag, const ValueRepresentation& vr, - DicomTransferSyntax transferSyntax, const std::string& value, bool isLittleEndian) ORTHANC_OVERRIDE { @@ -1648,8 +938,8 @@ try { - //r.Consume(visitor, DICOM_TAG_PIXEL_DATA); - r.Consume(visitor); + r.Consume(visitor, DICOM_TAG_PIXEL_DATA); + //r.Consume(visitor); success++; } catch (OrthancException& e)