view OrthancFramework/Sources/DicomFormat/DicomStreamReader.cpp @ 5911:bfae0fc2ea1b get-scu-test

Started to work on handling errors as warnings when trying to store instances whose SOPClassUID has not been accepted during the negotiation. Work to be finalized later
author Alain Mazy <am@orthanc.team>
date Mon, 09 Dec 2024 10:07:19 +0100
parents f7adfb22e20e
children ff0b54e8bc71
line wrap: on
line source

/**
 * Orthanc - A Lightweight, RESTful DICOM Store
 * Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
 * Department, University Hospital of Liege, Belgium
 * Copyright (C) 2017-2023 Osimis S.A., Belgium
 * Copyright (C) 2024-2024 Orthanc Team SRL, Belgium
 * Copyright (C) 2021-2024 Sebastien Jodogne, ICTEAM UCLouvain, Belgium
 *
 * This program is free software: you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program. If not, see
 * <http://www.gnu.org/licenses/>.
 **/


#include "../PrecompiledHeaders.h"
#include "DicomStreamReader.h"

#include "../OrthancException.h"

#include <cassert>
#include <sstream>
#include <boost/iostreams/device/array.hpp>
#include <boost/iostreams/stream.hpp>


#include <iostream>

namespace Orthanc
{
  static bool IsNormalizationNeeded(const std::string& source,
                                    ValueRepresentation vr)
  {
    return (!source.empty() &&
            (source[source.size() - 1] == ' ' ||
             source[source.size() - 1] == '\0') &&
            // Normalization only applies to string-based VR
            (vr == ValueRepresentation_ApplicationEntity ||
             vr == ValueRepresentation_AgeString ||
             vr == ValueRepresentation_CodeString ||
             vr == ValueRepresentation_DecimalString ||
             vr == ValueRepresentation_IntegerString ||
             vr == ValueRepresentation_LongString ||
             vr == ValueRepresentation_LongText ||
             vr == ValueRepresentation_PersonName ||
             vr == ValueRepresentation_ShortString ||
             vr == ValueRepresentation_ShortText ||
             vr == ValueRepresentation_UniqueIdentifier ||
             vr == ValueRepresentation_UnlimitedText));
  }

  
  static void NormalizeValue(std::string& inplace,
                             ValueRepresentation vr)
  {
    if (IsNormalizationNeeded(inplace, vr))
    {
      assert(!inplace.empty());
      inplace.resize(inplace.size() - 1);
    }
  }

    
  static uint16_t ReadUnsignedInteger16(const char* dicom,
                                        bool littleEndian)
  {
    const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom);

    if (littleEndian)
    {
      return (static_cast<uint16_t>(p[0]) |
              (static_cast<uint16_t>(p[1]) << 8));
    }
    else
    {
      return (static_cast<uint16_t>(p[1]) |
              (static_cast<uint16_t>(p[0]) << 8));
    }
  }


  static uint32_t ReadUnsignedInteger32(const char* dicom,
                                        bool littleEndian)
  {
    const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom);

    if (littleEndian)
    {
      return (static_cast<uint32_t>(p[0]) |
              (static_cast<uint32_t>(p[1]) << 8) |
              (static_cast<uint32_t>(p[2]) << 16) |
              (static_cast<uint32_t>(p[3]) << 24));
    }
    else
    {
      return (static_cast<uint32_t>(p[3]) |
              (static_cast<uint32_t>(p[2]) << 8) |
              (static_cast<uint32_t>(p[1]) << 16) |
              (static_cast<uint32_t>(p[0]) << 24));
    }
  }


  static DicomTag ReadTag(const char* dicom,
                          bool littleEndian)
  {
    return DicomTag(ReadUnsignedInteger16(dicom, littleEndian),
                    ReadUnsignedInteger16(dicom + 2, littleEndian));
  }


  static bool IsShortExplicitTag(ValueRepresentation vr)
  {
    /**
     * Are we in the case of Table 7.1-2? "Data Element with
     * Explicit VR of AE, AS, AT, CS, DA, DS, DT, FL, FD, IS, LO,
     * LT, PN, SH, SL, SS, ST, TM, UI, UL and US"
     * http://dicom.nema.org/medical/dicom/current/output/chtml/part05/chapter_7.html#sect_7.1.2
     **/
    return (vr == ValueRepresentation_ApplicationEntity   /* AE */ ||
            vr == ValueRepresentation_AgeString           /* AS */ ||
            vr == ValueRepresentation_AttributeTag        /* AT */ ||
            vr == ValueRepresentation_CodeString          /* CS */ ||
            vr == ValueRepresentation_Date                /* DA */ ||
            vr == ValueRepresentation_DecimalString       /* DS */ ||
            vr == ValueRepresentation_DateTime            /* DT */ ||
            vr == ValueRepresentation_FloatingPointSingle /* FL */ ||
            vr == ValueRepresentation_FloatingPointDouble /* FD */ ||
            vr == ValueRepresentation_IntegerString       /* IS */ ||
            vr == ValueRepresentation_LongString          /* LO */ ||
            vr == ValueRepresentation_LongText            /* LT */ ||
            vr == ValueRepresentation_PersonName          /* PN */ ||
            vr == ValueRepresentation_ShortString         /* SH */ ||
            vr == ValueRepresentation_SignedLong          /* SL */ ||
            vr == ValueRepresentation_SignedShort         /* SS */ ||
            vr == ValueRepresentation_ShortText           /* ST */ ||
            vr == ValueRepresentation_Time                /* TM */ ||
            vr == ValueRepresentation_UniqueIdentifier    /* UI */ ||
            vr == ValueRepresentation_UnsignedLong        /* UL */ ||
            vr == ValueRepresentation_UnsignedShort       /* US */);
  }


  bool DicomStreamReader::IsLittleEndian() const
  {
    return (transferSyntax_ != DicomTransferSyntax_BigEndianExplicit);
  }


  void DicomStreamReader::HandlePreamble(IVisitor& visitor,
                                         const std::string& block)
  {
    assert(block.size() == 144u);
    assert(reader_.GetProcessedBytes() == 144u);

    /**
     * The "DICOM file meta information" is always encoded using
     * "Explicit VR Little Endian Transfer Syntax"
     * http://dicom.nema.org/medical/dicom/current/output/chtml/part10/chapter_7.html
     **/
    if (block[128] != 'D' ||
        block[129] != 'I' ||
        block[130] != 'C' ||
        block[131] != 'M' ||
        ReadTag(block.c_str() + 132, true) != DicomTag(0x0002, 0x0000) ||
        block[136] != 'U' ||
        block[137] != 'L' ||
        ReadUnsignedInteger16(block.c_str() + 138, true) != 4)
    {
      throw OrthancException(ErrorCode_BadFileFormat);
    }

    uint32_t length = ReadUnsignedInteger32(block.c_str() + 140, true);

    reader_.Schedule(length);
    state_ = State_MetaHeader;
  }


  void DicomStreamReader::HandleMetaHeader(IVisitor& visitor,
                                           const std::string& block)
  {
    size_t pos = 0;
    const char* p = block.c_str();

    bool hasTransferSyntax = false;

    while (pos + 8 <= block.size())
    {
      DicomTag tag = ReadTag(p + pos, true);
        
      ValueRepresentation vr = StringToValueRepresentation(std::string(p + pos + 4, 2), true);

      if (IsShortExplicitTag(vr))
      {
        uint16_t length = ReadUnsignedInteger16(p + pos + 6, true);

        std::string value;
        value.assign(p + pos + 8, length);
        NormalizeValue(value, vr);

        if (tag.GetGroup() == 0x0002)
        {
          visitor.VisitMetaHeaderTag(tag, vr, value);
        }                  

        if (tag == DICOM_TAG_TRANSFER_SYNTAX_UID)
        {
          if (LookupTransferSyntax(transferSyntax_, value))
          {
            hasTransferSyntax = true;
          }
          else
          {
            throw OrthancException(ErrorCode_NotImplemented, "Unsupported transfer syntax: " + value);
          }
        }
          
        pos += length + 8;
      }
      else if (pos + 12 <= block.size())
      {
        uint16_t reserved = ReadUnsignedInteger16(p + pos + 6, true);
        if (reserved != 0)
        {
          break;
        }
          
        uint32_t length = ReadUnsignedInteger32(p + pos + 8, true);

        if (tag.GetGroup() == 0x0002)
        {
          std::string value;
          value.assign(p + pos + 12, length);
          NormalizeValue(value, vr);
          visitor.VisitMetaHeaderTag(tag, vr, value);
        }                  
          
        pos += length + 12;
      }
    }

    if (pos != block.size())
    {
      throw OrthancException(ErrorCode_BadFileFormat);
    }

    if (!hasTransferSyntax)
    {
      throw OrthancException(ErrorCode_BadFileFormat, "DICOM file meta-header without transfer syntax UID");
    }

    visitor.VisitTransferSyntax(transferSyntax_);

    reader_.Schedule(8);
    state_ = State_DatasetTag;
  }
    

  void DicomStreamReader::HandleDatasetTag(const std::string& block,
                                           const DicomTag& untilTag)
  {
    static const DicomTag DICOM_TAG_SEQUENCE_ITEM(0xfffe, 0xe000);
    static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_ITEM(0xfffe, 0xe00d);
    static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE(0xfffe, 0xe0dd);

    assert(block.size() == 8u);

    const bool littleEndian = IsLittleEndian();
    DicomTag tag = ReadTag(block.c_str(), littleEndian);

    if (sequenceDepth_ == 0 &&
        tag >= untilTag)
    {
      state_ = State_Done;
      return;
    }
      
    if (tag == DICOM_TAG_SEQUENCE_ITEM ||
        tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM ||
        tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE)
    {
      // The special sequence items are encoded like "Implicit VR"
      uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, littleEndian);

      if (tag == DICOM_TAG_SEQUENCE_ITEM)
      {
        if (length == 0xffffffffu)
        {
          // Undefined length: Need to loop over the tags of the nested dataset
          reader_.Schedule(8);
          state_ = State_DatasetTag;
        }
        else
        {
          // Explicit length: Can skip the full sequence at once
          reader_.Schedule(length);
          state_ = State_DatasetValue;
        }
      }
      else if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM ||
               tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE)
      {
        if (length != 0 ||
            sequenceDepth_ == 0)
        {
          throw OrthancException(ErrorCode_BadFileFormat);
        }

        if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE)
        {
          sequenceDepth_ --;
        }

        reader_.Schedule(8);
        state_ = State_DatasetTag;          
      }
      else
      {
        throw OrthancException(ErrorCode_InternalError);
      }
    }
    else
    {
      assert(reader_.GetProcessedBytes() >= block.size());
      const uint64_t tagOffset = reader_.GetProcessedBytes() - block.size();
        
      ValueRepresentation vr = ValueRepresentation_Unknown;
        
      if (transferSyntax_ == DicomTransferSyntax_LittleEndianImplicit)
      {
        if (sequenceDepth_ == 0)
        {
          danglingTag_ = tag;
          danglingVR_ = vr;
          danglingOffset_ = tagOffset;
        }

        uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, true /* little endian */);
        HandleDatasetExplicitLength(length);
      }
      else
      {
        // This in an explicit transfer syntax

        vr = StringToValueRepresentation(
          std::string(block.c_str() + 4, 2), false /* ignore unknown VR */);

        if (vr == ValueRepresentation_Sequence)
        {
          sequenceDepth_ ++;
          reader_.Schedule(4);
          state_ = State_SequenceExplicitLength;
        }
        else if (IsShortExplicitTag(vr))
        {
          uint16_t length = ReadUnsignedInteger16(block.c_str() + 6, littleEndian);

          reader_.Schedule(length);
          state_ = State_DatasetValue;
        }
        else
        {
          uint16_t reserved = ReadUnsignedInteger16(block.c_str() + 6, littleEndian);
          if (reserved != 0)
          {
            throw OrthancException(ErrorCode_BadFileFormat);
          }

          reader_.Schedule(4);
          state_ = State_DatasetExplicitLength;
        }

        if (sequenceDepth_ == 0)
        {
          danglingTag_ = tag;
          danglingVR_ = vr;
          danglingOffset_ = tagOffset;
        }
      }
    }
  }


  void DicomStreamReader::HandleDatasetExplicitLength(uint32_t length)
  {
    if (length == 0xffffffffu)
    {
      /**
       * This is the case of pixel data with compressed transfer
       * syntaxes. Schedule the reading of the first tag of the
       * nested dataset.
       * http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_7.5.html
       **/
      state_ = State_DatasetTag;
      reader_.Schedule(8);
      sequenceDepth_ ++;
    }
    else
    {
      reader_.Schedule(length);
      state_ = State_DatasetValue;
    }
  }    

    
  void DicomStreamReader::HandleDatasetExplicitLength(IVisitor& visitor,
                                                      const std::string& block)
  {
    assert(block.size() == 4);

    uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian());
    HandleDatasetExplicitLength(length);

    std::string empty;
    if (!visitor.VisitDatasetTag(danglingTag_, danglingVR_, empty, IsLittleEndian(), danglingOffset_))
    {
      state_ = State_Done;
    }
  }
    

  void DicomStreamReader::HandleSequenceExplicitLength(const std::string& block)
  {
    assert(block.size() == 4);

    uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian());
    if (length == 0xffffffffu)
    {
      state_ = State_DatasetTag;
      reader_.Schedule(8);
    }
    else
    {
      reader_.Schedule(length);
      state_ = State_SequenceExplicitValue;
    }
  }

    
  void DicomStreamReader::HandleSequenceExplicitValue()
  {
    if (sequenceDepth_ == 0)
    {
      throw OrthancException(ErrorCode_InternalError);
    }

    sequenceDepth_ --;

    state_ = State_DatasetTag;
    reader_.Schedule(8);
  }


  void DicomStreamReader::HandleDatasetValue(IVisitor& visitor,
                                             const std::string& block)
  {
    if (sequenceDepth_ == 0)
    {
      bool c;

      if (IsNormalizationNeeded(block, danglingVR_))
      {
        std::string s(block.begin(), block.end() - 1);
        c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, s, IsLittleEndian(), danglingOffset_);
      }
      else
      {
        c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, block, IsLittleEndian(), danglingOffset_);
      }
      
      if (!c)
      {
        state_ = State_Done;
        return;
      }
    }

    reader_.Schedule(8);
    state_ = State_DatasetTag;
  }
    
    
  DicomStreamReader::DicomStreamReader(std::istream& stream) :
    reader_(stream),
    state_(State_Preamble),
    transferSyntax_(DicomTransferSyntax_LittleEndianImplicit),  // Dummy
    danglingTag_(0x0000, 0x0000),  // Dummy
    danglingVR_(ValueRepresentation_Unknown),  // Dummy
    danglingOffset_(0),  // Dummy
    sequenceDepth_(0)
  {
    reader_.Schedule(128 /* empty header */ +
                     4 /* "DICM" magic value */ +
                     4 /* (0x0002, 0x0000) tag */ +
                     2 /* value representation of (0x0002, 0x0000) == "UL" */ +
                     2 /* length of "UL" value == 4 */ +
                     4 /* actual length of the meta-header */);
  }

  
  void DicomStreamReader::Consume(IVisitor& visitor,
                                  const DicomTag& untilTag)
  {
    while (state_ != State_Done)
    {
      std::string block;
      if (reader_.Read(block))
      {
        switch (state_)
        {
          case State_Preamble:
            HandlePreamble(visitor, block);
            break;

          case State_MetaHeader:
            HandleMetaHeader(visitor, block);
            break;

          case State_DatasetTag:
            HandleDatasetTag(block, untilTag);
            break;

          case State_DatasetExplicitLength:
            HandleDatasetExplicitLength(visitor, block);
            break;

          case State_SequenceExplicitLength:
            HandleSequenceExplicitLength(block);
            break;

          case State_SequenceExplicitValue:
            HandleSequenceExplicitValue();
            break;

          case State_DatasetValue:
            HandleDatasetValue(visitor, block);
            break;

          default:
            throw OrthancException(ErrorCode_InternalError);
        }
      }
      else
      {
        return;  // No more data in the stream
      }
    }
  }


  void DicomStreamReader::Consume(IVisitor& visitor)
  {
    DicomTag untilTag(0xffff, 0xffff);
    Consume(visitor, untilTag);
  }


  bool DicomStreamReader::IsDone() const
  {
    return (state_ == State_Done);
  }

  
  uint64_t DicomStreamReader::GetProcessedBytes() const
  {
    return reader_.GetProcessedBytes();
  }


  class DicomStreamReader::PixelDataVisitor : public DicomStreamReader::IVisitor
  {
  private:
    bool                 hasPixelData_;
    uint64_t             pixelDataOffset_;
    ValueRepresentation  pixelDataVR_;
    DicomTransferSyntax  transferSyntax_;
    
  public:
    PixelDataVisitor() :
      hasPixelData_(false),
      pixelDataOffset_(0),
      pixelDataVR_(ValueRepresentation_Unknown),
      transferSyntax_(DicomTransferSyntax_LittleEndianImplicit) // Default DICOM transfer syntax
    {
    }
    
    virtual void VisitMetaHeaderTag(const DicomTag& tag,
                                    const ValueRepresentation& vr,
                                    const std::string& value) ORTHANC_OVERRIDE
    {
    }

    virtual void VisitTransferSyntax(DicomTransferSyntax transferSyntax) ORTHANC_OVERRIDE
    {
      transferSyntax_ = transferSyntax;
    }
    
    virtual bool VisitDatasetTag(const DicomTag& tag,
                                 const ValueRepresentation& vr,
                                 const std::string& value,
                                 bool isLittleEndian,
                                 uint64_t fileOffset) ORTHANC_OVERRIDE
    {
      if (tag == DICOM_TAG_PIXEL_DATA)
      {
        hasPixelData_ = true;
        pixelDataOffset_ = fileOffset;

        if (transferSyntax_ == DicomTransferSyntax_LittleEndianImplicit)
        {
          // Implicit Little Endian has always "OW" VR for pixel data
          // https://dicom.nema.org/medical/dicom/current/output/chtml/part05/chapter_A.html
          pixelDataVR_ = ValueRepresentation_OtherWord;
        }
        else if (transferSyntax_ == DicomTransferSyntax_LittleEndianExplicit ||
                 transferSyntax_ == DicomTransferSyntax_BigEndianExplicit)
        {
          pixelDataVR_ = vr;
        }
        else
        {
          // Compressed transfer syntaxes must always be OB
          pixelDataVR_ = ValueRepresentation_OtherByte;
        }
      }

      // Stop processing once pixel data has been passed
      return (tag < DICOM_TAG_PIXEL_DATA);
    }

    bool HasPixelData() const
    {
      return hasPixelData_;
    }

    uint64_t GetPixelDataOffset() const
    {
      return pixelDataOffset_;
    }

    ValueRepresentation GetPixelDataVR() const
    {
      return pixelDataVR_;
    }

    static bool LookupPixelDataOffset(uint64_t& offset,
                                      ValueRepresentation& vr,
                                      std::istream& stream)
    {
      PixelDataVisitor visitor;
      bool isLittleEndian;

      {
        DicomStreamReader reader(stream);

        try
        {
          reader.Consume(visitor);
          isLittleEndian = reader.IsLittleEndian();
        }
        catch (OrthancException& e)
        {
          // Invalid DICOM file
          return false;
        }
      }

      if (visitor.HasPixelData())
      {
        // Sanity check if we face an unsupported DICOM file: Make
        // sure that we can read DICOM_TAG_PIXEL_DATA at the reported
        // position in the stream
        stream.seekg(visitor.GetPixelDataOffset(), stream.beg);
        
        std::string s;
        s.resize(4);
        stream.read(&s[0], s.size());

        if (!isLittleEndian)
        {
          // Byte swapping if reading a file whose transfer syntax is
          // 1.2.840.10008.1.2.2 (big endian explicit)
          std::swap(s[0], s[1]);
          std::swap(s[2], s[3]);          
        }
        
        if (stream.gcount() == static_cast<std::streamsize>(s.size()) &&
            s[0] == char(0xe0) &&
            s[1] == char(0x7f) &&
            s[2] == char(0x10) &&
            s[3] == char(0x00))
        {
          offset = visitor.GetPixelDataOffset();
          vr = visitor.GetPixelDataVR();
          return true;
        }
        else
        {
          return false;
        }
      }
      else
      {
        return false;
      }
    }
  };

  
  bool DicomStreamReader::LookupPixelDataOffset(uint64_t& offset,
                                                ValueRepresentation& vr,
                                                const std::string& dicom)
  {
    std::stringstream stream(dicom);
    return PixelDataVisitor::LookupPixelDataOffset(offset, vr, stream);
  }
  

  bool DicomStreamReader::LookupPixelDataOffset(uint64_t& offset,
                                                ValueRepresentation& vr,
                                                const void* buffer,
                                                size_t size)
  {
    boost::iostreams::array_source source(reinterpret_cast<const char*>(buffer), size);
    boost::iostreams::stream<boost::iostreams::array_source> stream(source);
    return PixelDataVisitor::LookupPixelDataOffset(offset, vr, stream);
  }
}