/*
 *
 *  Copyright (C) 2011-2017, OFFIS e.V.
 *  All rights reserved.  See COPYRIGHT file for details.
 *
 *  This software and supporting documentation were developed by
 *
 *    OFFIS e.V.
 *    R&D Division Health
 *    Escherweg 2
 *    D-26121 Oldenburg, Germany
 *
 *
 *  Module:  dcmdata
 *
 *  Author:  Joerg Riesmeier
 *
 *  Purpose: Class for supporting the Specific Character Set attribute
 *
 */


#include "dcmtk/config/osconfig.h"    /* make sure OS specific configuration is included first */

#include "dcmtk/dcmdata/dcspchrs.h"
#include "dcmtk/dcmdata/dcitem.h"
#include "dcmtk/dcmdata/dcbytstr.h"
#include "dcmtk/dcmdata/dcdeftag.h"
#include "dcmtk/ofstd/ofstream.h"
#include "dcmtk/ofstd/ofstd.h"


#define MAX_OUTPUT_STRING_LENGTH 60


/*------------------*
 *  implementation  *
 *------------------*/

DcmSpecificCharacterSet::DcmSpecificCharacterSet()
  : SourceCharacterSet(),
    DestinationCharacterSet(),
    DestinationEncoding(),
    DefaultEncodingConverter(),
    EncodingConverters()
{
}


DcmSpecificCharacterSet::~DcmSpecificCharacterSet()
{
    clear();
}


void DcmSpecificCharacterSet::clear()
{
    DefaultEncodingConverter.clear();
    EncodingConverters.clear();
    SourceCharacterSet.clear();
    DestinationCharacterSet.clear();
    DestinationEncoding.clear();
}


DcmSpecificCharacterSet::operator OFBool() const
{
    return OFstatic_cast(OFBool, DefaultEncodingConverter);
}


OFBool DcmSpecificCharacterSet::operator!() const
{
    return !DefaultEncodingConverter;
}


const OFString &DcmSpecificCharacterSet::getSourceCharacterSet() const
{
    return SourceCharacterSet;
}


const OFString &DcmSpecificCharacterSet::getDestinationCharacterSet() const
{
    return DestinationCharacterSet;
}


const OFString &DcmSpecificCharacterSet::getDestinationEncoding() const
{
    return DestinationEncoding;
}


unsigned DcmSpecificCharacterSet::getConversionFlags() const
{
    return DefaultEncodingConverter.getConversionFlags();
}


OFCondition DcmSpecificCharacterSet::setConversionFlags(const unsigned flags)
{
    if (!EncodingConverters.empty())
    {
        /* pass conversion flags to all "encoding converters" */
        for (T_EncodingConvertersMap::iterator it = EncodingConverters.begin();
            it != EncodingConverters.end(); ++it)
        {
            OFCondition status = it->second.setConversionFlags(flags);
            if (status.bad())
                return status;
        }
        return EC_Normal;
    } else return DefaultEncodingConverter.setConversionFlags(flags);
}


OFCondition DcmSpecificCharacterSet::selectCharacterSet(const OFString &fromCharset,
                                                        const OFString &toCharset)
{
    // first, make sure that all converters are cleared
    clear();
    // determine the destination encoding (and check whether it is supported at all)
    OFCondition status = determineDestinationEncoding(toCharset);
    if (status.good())
    {
        // normalize the given string (original VR is "CS" with VM "1-n")
        SourceCharacterSet = fromCharset;
        normalizeString(SourceCharacterSet, MULTIPART, DELETE_LEADING, DELETE_TRAILING);
        // check whether it is multi-valued
        const unsigned long sourceVM = DcmElement::determineVM(SourceCharacterSet.c_str(), SourceCharacterSet.length());
        if (sourceVM == 0)
        {
            // no character set specified, use ASCII
            status = DefaultEncodingConverter.selectEncoding("ASCII", DestinationEncoding);
            // output some useful debug information
            if (status.good())
            {
                DCMDATA_DEBUG("DcmSpecificCharacterSet: Selected character set '' (ASCII) "
                    << "for the conversion to " << DestinationEncoding);
            }
        }
        else if (sourceVM == 1)
        {
            // a single character set specified (no code extensions)
            status = selectCharacterSetWithoutCodeExtensions();
        } else {
            // multiple character sets specified (code extensions used)
            status = selectCharacterSetWithCodeExtensions(sourceVM);
        }
    }
    return status;
}


OFCondition DcmSpecificCharacterSet::selectCharacterSet(DcmItem &dataset,
                                                        const OFString &toCharset)
{
    OFString fromCharset;
    // check whether Specific Character Set (0008,0005) is present in the given item/dataset
    dataset.findAndGetOFStringArray(DCM_SpecificCharacterSet, fromCharset, OFFalse /*searchIntoSub*/);
    // if missing or empty, the default character set (ASCII) will be used
    return selectCharacterSet(fromCharset, toCharset);
}


OFCondition DcmSpecificCharacterSet::determineDestinationEncoding(const OFString &toCharset)
{
    OFCondition status = EC_Normal;
    // normalize the given string (original VR is "CS" with VM "1-n", but we only support VM "1")
    DestinationCharacterSet = toCharset;
    normalizeString(DestinationCharacterSet, !MULTIPART, DELETE_LEADING, DELETE_TRAILING);
    // there should only be a single character set specified (no code extensions)
    if (DestinationCharacterSet.empty())                // ASCII (no value)
        DestinationEncoding = "ASCII";
    else if (DestinationCharacterSet == "ISO_IR 6")     // ASCII
    {
        DCMDATA_WARN("DcmSpecificCharacterSet: 'ISO_IR 6' is not a defined term in DICOM, "
            << "will be treated as an empty value (ASCII)");
        DestinationCharacterSet.clear();
        DestinationEncoding = "ASCII";
    }
    else if (DestinationCharacterSet == "ISO_IR 100")   // Latin alphabet No. 1
        DestinationEncoding = "ISO-8859-1";
    else if (DestinationCharacterSet == "ISO_IR 101")   // Latin alphabet No. 2
        DestinationEncoding = "ISO-8859-2";
    else if (DestinationCharacterSet == "ISO_IR 109")   // Latin alphabet No. 3
        DestinationEncoding = "ISO-8859-3";
    else if (DestinationCharacterSet == "ISO_IR 110")   // Latin alphabet No. 4
        DestinationEncoding = "ISO-8859-4";
    else if (DestinationCharacterSet == "ISO_IR 144")   // Cyrillic
        DestinationEncoding = "ISO-8859-5";
    else if (DestinationCharacterSet == "ISO_IR 127")   // Arabic
        DestinationEncoding = "ISO-8859-6";
    else if (DestinationCharacterSet == "ISO_IR 126")   // Greek
        DestinationEncoding = "ISO-8859-7";
    else if (DestinationCharacterSet == "ISO_IR 138")   // Hebrew
        DestinationEncoding = "ISO-8859-8";
    else if (DestinationCharacterSet == "ISO_IR 148")   // Latin alphabet No. 5
        DestinationEncoding = "ISO-8859-9";
    else if (DestinationCharacterSet == "ISO_IR 13")    // Japanese
#if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_ICONV
        DestinationEncoding = "JIS_X0201";              // - the name "ISO-IR-13" is not supported by libiconv
#else
        DestinationEncoding = "Shift_JIS";              // - ICU and stdlibc iconv only know "Shift_JIS" (is this mapping correct?)
#endif
    else if (DestinationCharacterSet == "ISO_IR 166")   // Thai
#if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_ICU
        DestinationEncoding = "TIS-620";                // - the name "ISO-IR-166" is not supported by ICU
#else
        DestinationEncoding = "ISO-IR-166";
#endif
    else if (DestinationCharacterSet == "ISO_IR 192")   // Unicode in UTF-8 (multi-byte)
        DestinationEncoding = "UTF-8";
    else if (DestinationCharacterSet == "GB18030")      // Chinese (multi-byte)
        DestinationEncoding = "GB18030";
    else if (DestinationCharacterSet == "GBK")          // Chinese (multi-byte, subset of "GB 18030")
        DestinationEncoding = "GBK";
    else {
        DestinationEncoding.clear();
        // create an appropriate error code
        OFOStringStream stream;
        stream << "Cannot select destination character set: SpecificCharacterSet (0008,0005) value '"
            << DestinationCharacterSet << "' not supported" << OFStringStream_ends;
        OFSTRINGSTREAM_GETOFSTRING(stream, message)
        status = makeOFCondition(OFM_dcmdata, EC_CODE_CannotSelectCharacterSet, OF_error, message.c_str());
    }
    return status;
}


OFCondition DcmSpecificCharacterSet::selectCharacterSetWithoutCodeExtensions()
{
    OFCondition status = EC_Normal;
    // a single character set specified (no code extensions)
    OFString fromEncoding;
    if (SourceCharacterSet == "ISO_IR 6")           // ASCII
    {
        DCMDATA_WARN("DcmSpecificCharacterSet: 'ISO_IR 6' is not a defined term in DICOM, "
            << "will be treated as an empty value (ASCII)");
        SourceCharacterSet.clear();
        fromEncoding = "ASCII";
    }
    else if (SourceCharacterSet == "ISO_IR 100")    // Latin alphabet No. 1
        fromEncoding = "ISO-8859-1";
    else if (SourceCharacterSet == "ISO_IR 101")    // Latin alphabet No. 2
        fromEncoding = "ISO-8859-2";
    else if (SourceCharacterSet == "ISO_IR 109")    // Latin alphabet No. 3
        fromEncoding = "ISO-8859-3";
    else if (SourceCharacterSet == "ISO_IR 110")    // Latin alphabet No. 4
        fromEncoding = "ISO-8859-4";
    else if (SourceCharacterSet == "ISO_IR 144")    // Cyrillic
        fromEncoding = "ISO-8859-5";
    else if (SourceCharacterSet == "ISO_IR 127")    // Arabic
        fromEncoding = "ISO-8859-6";
    else if (SourceCharacterSet == "ISO_IR 126")    // Greek
        fromEncoding = "ISO-8859-7";
    else if (SourceCharacterSet == "ISO_IR 138")    // Hebrew
        fromEncoding = "ISO-8859-8";
    else if (SourceCharacterSet == "ISO_IR 148")    // Latin alphabet No. 5
        fromEncoding = "ISO-8859-9";
    else if (SourceCharacterSet == "ISO_IR 13")     // Japanese
#if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_ICONV
        fromEncoding = "JIS_X0201";                 // - the name "ISO-IR-13" is not supported by libiconv
#else
        fromEncoding = "Shift_JIS";                 // - ICU and stdlibc iconv only know "Shift_JIS" (is this mapping correct?)
#endif
    else if (SourceCharacterSet == "ISO_IR 166")    // Thai
#if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_ICU
        fromEncoding = "TIS-620";                   // - the name "ISO-IR-166" is not supported by ICU
#else
        fromEncoding = "ISO-IR-166";
#endif
    else if (SourceCharacterSet == "ISO_IR 192")    // Unicode in UTF-8 (multi-byte)
        fromEncoding = "UTF-8";
    else if (SourceCharacterSet == "GB18030")       // Chinese (multi-byte)
        fromEncoding = "GB18030";
    else if (SourceCharacterSet == "GBK")           // Chinese (multi-byte, subset of "GB 18030")
        fromEncoding = "GBK";
    else {
        // create an appropriate error code
        OFOStringStream stream;
        stream << "Cannot select source character set: SpecificCharacterSet (0008,0005) value '"
            << SourceCharacterSet << "' not supported" << OFStringStream_ends;
        OFSTRINGSTREAM_GETOFSTRING(stream, message)
        status = makeOFCondition(OFM_dcmdata, EC_CODE_CannotSelectCharacterSet, OF_error, message.c_str());
    }
    // check whether an appropriate character encoding has been found
    if (!fromEncoding.empty())
    {
        status = DefaultEncodingConverter.selectEncoding(fromEncoding, DestinationEncoding);
        // output some useful debug information
        if (status.good())
        {
            DCMDATA_DEBUG("DcmSpecificCharacterSet: Selected character set '" << SourceCharacterSet
                << "' (" << fromEncoding << ") for the conversion to " << DestinationEncoding);
        }
    }
    return status;
}


OFCondition DcmSpecificCharacterSet::selectCharacterSetWithCodeExtensions(const unsigned long sourceVM)
{
    // first, check whether multiple character sets are specified (i.e. code extensions used)
    if (sourceVM <= 1)
        return EC_IllegalCall;
    // then proceed with the real work
    OFCondition status = EC_Normal;
    size_t pos = 0;
    OFBool needsASCII = OFFalse;
    OFBool notFirstValue = OFFalse;
    OFString definedTerm;
    unsigned long i = 0;
    while ((i < sourceVM) && status.good())
    {
        // extract single value from string (separated by a backslash)
        pos = DcmElement::getValueFromString(SourceCharacterSet.c_str(), pos, SourceCharacterSet.length(), definedTerm);
        if (definedTerm.empty() && (i == 0))            // assuming ASCII (according to DICOM PS 3.5)
            definedTerm = "ISO 2022 IR 6";
        // determine character encoding from DICOM defined term
        OFString encodingName;
        if (definedTerm == "ISO 2022 IR 6")             // ASCII
            encodingName = "ASCII";
        else if (definedTerm == "ISO 2022 IR 100")      // Latin alphabet No. 1
        {
            encodingName = "ISO-8859-1";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 101")      // Latin alphabet No. 2
        {
            encodingName = "ISO-8859-2";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 109")      // Latin alphabet No. 3
        {
            encodingName = "ISO-8859-3";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 110")      // Latin alphabet No. 4
        {
            encodingName = "ISO-8859-4";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 144")      // Cyrillic
        {
            encodingName = "ISO-8859-5";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 127")      // Arabic
        {
            encodingName = "ISO-8859-6";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 126")      // Greek
        {
            encodingName = "ISO-8859-7";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 138")      // Hebrew
        {
            encodingName = "ISO-8859-8";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 148")      // Latin alphabet No. 5
        {
            encodingName = "ISO-8859-9";
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 13")       // Japanese
        {
#if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_ICONV
            encodingName = "JIS_X0201";                 // - the name "ISO-IR-13" is not supported by libiconv
#else
            encodingName = "Shift_JIS";                 // - ICU and stdlibc iconv only know "Shift_JIS" (is this mapping correct?)
#endif
        }
        else if (definedTerm == "ISO 2022 IR 166")      // Thai
        {
#if DCMTK_ENABLE_CHARSET_CONVERSION == DCMTK_CHARSET_CONVERSION_ICU
            encodingName = "TIS-620";                   // - "ISO-IR-166" is not supported by ICU
#else
            encodingName = "ISO-IR-166";
#endif
            needsASCII = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 87")       // Japanese (multi-byte)
        {
            encodingName = "ISO-IR-87";                 // - this might generate an error since "ISO-IR-87" is not supported by ICU and stdlibc iconv
            notFirstValue = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 159")      // Japanese (multi-byte)
        {
            encodingName = "ISO-IR-159";                // - this might generate an error since "ISO-IR-159" is not supported by ICU and stdlibc iconv
            notFirstValue = OFTrue;
        }
        else if (definedTerm == "ISO 2022 IR 149")      // Korean (multi-byte)
        {
            encodingName = "EUC-KR";                    // - is this mapping really correct?
            notFirstValue = OFTrue;                     //   "ISO-IR-149" does not work with the sample from DICOM PS 3.5
        }
        else if (definedTerm == "ISO 2022 IR 58")       // Simplified Chinese (multi-byte)
        {
            encodingName = "GB2312";                    // - should work, but not tested yet!
            notFirstValue = OFTrue;
        }
        else {
            // create an appropriate error code
            OFOStringStream stream;
            stream << "Cannot select source character set: SpecificCharacterSet (0008,0005) value " << (i + 1)
                << " of " << sourceVM << " '" << definedTerm << "' not supported" << OFStringStream_ends;
            OFSTRINGSTREAM_GETOFSTRING(stream, message)
            status = makeOFCondition(OFM_dcmdata, EC_CODE_CannotSelectCharacterSet, OF_error, message.c_str());
        }
        // check whether character set is allowed as the default (first value)
        if ((i == 0) && notFirstValue)
        {
            OFOStringStream stream;
            stream << "Cannot select source character set: '" << definedTerm << "' is not a allowed "
                << "as the first value in SpecificCharacterSet (0008,0005)" << OFStringStream_ends;
            OFSTRINGSTREAM_GETOFSTRING(stream, message)
            status = makeOFCondition(OFM_dcmdata, EC_CODE_CannotSelectCharacterSet, OF_error, message.c_str());
        }
        // add descriptor to the map using the defined term as a key
        if (status.good() && !encodingName.empty())
        {
            OFPair<T_EncodingConvertersMap::iterator,OFBool> conv = EncodingConverters.insert(
                OFMake_pair(definedTerm, OFCharacterEncoding()));
            // but first check whether this encoding has already been added before
            if (conv.second)
            {
                status = conv.first->second.selectEncoding(encodingName, DestinationEncoding);
                if (status.good())
                {
                    // output some useful debug information
                    DCMDATA_DEBUG("DcmSpecificCharacterSet: Added character set '" << definedTerm
                        << "' (" << encodingName << ") for the conversion to " << DestinationEncoding);
                    // also remember the default descriptor, which refers to the first character set
                    if (i == 0)
                    {
                        DefaultEncodingConverter = conv.first->second;
                        DCMDATA_TRACE("DcmSpecificCharacterSet: Also selected this character set "
                            << "(i.e. '" << definedTerm << "') as the default one");
                    }
                } else {
                    DCMDATA_ERROR("DcmSpecificCharacterSet: '" << definedTerm <<
                        "' is not supported by the utilized character set conversion library '"
                        << OFCharacterEncoding::getLibraryVersionString() << '\'');
                    EncodingConverters.erase(conv.first);
                }
            } else {
                DCMDATA_WARN("DcmSpecificCharacterSet: '" << definedTerm << "' is defined more than once "
                    << "in SpecificCharacterSet (0008,0005), ignoring the duplicate definition");
            }
        }
        ++i;
    }
    // add ASCII to the map if needed but not already there
    if (status.good() && needsASCII)
    {
        OFPair<T_EncodingConvertersMap::iterator,OFBool> conv = EncodingConverters.insert(
            OFMake_pair(OFString("ISO 2022 IR 6"), OFCharacterEncoding()));
        if (conv.second)
        {
            status = conv.first->second.selectEncoding("ASCII", DestinationEncoding);
            if (status.good())
            {
                // output some useful debug information
                DCMDATA_DEBUG("DcmSpecificCharacterSet: Added character set 'ISO 2022 IR 6' (ASCII) "
                    << "for the conversion to " << DestinationEncoding
                    << " (because it is needed for one or more of the previously added character sets)");
            } else {
                DCMDATA_ERROR("DcmSpecificCharacterSet: 'ISO 2022 IR 6' is not supported by"
                    << " the utilized character set conversion library '"
                    << OFCharacterEncoding::getLibraryVersionString() << '\'');
                EncodingConverters.erase(conv.first);
            }
        }
    }
    return status;
}


OFCondition DcmSpecificCharacterSet::convertString(const OFString &fromString,
                                                   OFString &toString,
                                                   const OFString &delimiters)
{
    // call the real method converting the given string
    return convertString(fromString.c_str(), fromString.length(), toString, delimiters);
}


OFCondition DcmSpecificCharacterSet::convertString(const char *fromString,
                                                   const size_t fromLength,
                                                   OFString &toString,
                                                   const OFString &delimiters)
{
    OFCondition status = EC_Normal;
    // check whether there are any code extensions at all
    if (EncodingConverters.empty() || !checkForEscapeCharacter(fromString, fromLength))
    {
        DCMDATA_DEBUG("DcmSpecificCharacterSet: Converting '"
            << convertToLengthLimitedOctalString(fromString, fromLength) << "'");
        // no code extensions according to ISO 2022 used - this is the simple case
        status = DefaultEncodingConverter.convertString(fromString, fromLength, toString, OFTrue /*clearMode*/);
    } else {
        if (delimiters.empty())
        {
            DCMDATA_DEBUG("DcmSpecificCharacterSet: Converting '"
                << convertToLengthLimitedOctalString(fromString, fromLength)
                << "' (with code extensions)");
        } else {
            DCMDATA_DEBUG("DcmSpecificCharacterSet: Converting '"
                << convertToLengthLimitedOctalString(fromString, fromLength)
                << "' (with code extensions and delimiters '" << delimiters << "')");
        }
        // code extensions according to ISO 2022 used, so we need to check for
        // particular escape sequences in order to switch between character sets
        toString.clear();
        size_t pos = 0;
        // some (extended) character sets use more than 1 byte per character
        // (however, the default character set always uses a single byte)
        unsigned char bytesPerChar = 1;
        // check whether '=' is a delimiter, as it is used in PN values
        OFBool isFirstGroup = (delimiters.find('=') != OFString_npos);
        // by default, we expect that delimiters can be checked by their corresponding ASCII codes
        // (this implies that the default character set is not "ISO 2022 IR 87" or "ISO 2022 IR 159")
        OFBool checkDelimiters = OFTrue;
        const char *firstChar = fromString;
        const char *currentChar = fromString;
        // initially, use the default descriptor
        OFCharacterEncoding converter = DefaultEncodingConverter;
        DCMDATA_TRACE("  Starting with the default character set");
        // iterate over all characters of the string (as long as there is no error)
        while ((pos < fromLength) && status.good())
        {
            const char c0 = *currentChar++;
            // check for characters ESC, HT, LF, FF, CR or any other specified delimiter
            const OFBool isEscape = (c0 == '\033');
            const OFBool isDelimiter = checkDelimiters &&
                ((c0 == '\011') || (c0 == '\012') || (c0 == '\014') || (c0 == '\015') || (delimiters.find(c0) != OFString_npos));
            if (isEscape || isDelimiter)
            {
                // convert the sub-string (before the delimiter) with the current character set
                const size_t convertLength = currentChar - firstChar - 1;
                if (convertLength > 0)
                {
                    // output some debug information
                    DCMDATA_TRACE("    Converting sub-string '"
                        << convertToLengthLimitedOctalString(firstChar, convertLength) << "'");
                    status = converter.convertString(firstChar, convertLength, toString, OFFalse /*clearMode*/);
                    if (status.bad())
                        DCMDATA_TRACE("    -> ERROR: " << status.text());
                }
                // check whether this was the first component group of a PN value
                if (isDelimiter && (c0 == '='))
                    isFirstGroup = OFFalse;
            }
            // the ESC character is used to explicitly switch between character sets
            if (isEscape)
            {
                // report a warning as this is a violation of DICOM PS 3.5 Section 6.2.1
                if (isFirstGroup)
                {
                    DCMDATA_WARN("DcmSpecificCharacterSet: Escape sequences shall not be used "
                        << "in the first component group of a Person Name (PN), using them anyway");
                }
                // we need at least two more characters to determine the new character set
                size_t escLength = 2;
                if (pos + escLength < fromLength)
                {
                    OFString key;
                    const char c1 = *currentChar++;
                    const char c2 = *currentChar++;
                    char c3 = '\0';
                    if ((c1 == 0x28) && (c2 == 0x42))       // ASCII
                        key = "ISO 2022 IR 6";
                    else if ((c1 == 0x2d) && (c2 == 0x41))  // Latin alphabet No. 1
                        key = "ISO 2022 IR 100";
                    else if ((c1 == 0x2d) && (c2 == 0x42))  // Latin alphabet No. 2
                        key = "ISO 2022 IR 101";
                    else if ((c1 == 0x2d) && (c2 == 0x43))  // Latin alphabet No. 3
                        key = "ISO 2022 IR 109";
                    else if ((c1 == 0x2d) && (c2 == 0x44))  // Latin alphabet No. 4
                        key = "ISO 2022 IR 110";
                    else if ((c1 == 0x2d) && (c2 == 0x4c))  // Cyrillic
                        key = "ISO 2022 IR 144";
                    else if ((c1 == 0x2d) && (c2 == 0x47))  // Arabic
                        key = "ISO 2022 IR 127";
                    else if ((c1 == 0x2d) && (c2 == 0x46))  // Greek
                        key = "ISO 2022 IR 126";
                    else if ((c1 == 0x2d) && (c2 == 0x48))  // Hebrew
                        key = "ISO 2022 IR 138";
                    else if ((c1 == 0x2d) && (c2 == 0x4d))  // Latin alphabet No. 5
                        key = "ISO 2022 IR 148";
                    else if ((c1 == 0x29) && (c2 == 0x49))  // Japanese
                        key = "ISO 2022 IR 13";
                    else if ((c1 == 0x28) && (c2 == 0x4a))  // Japanese - is this really correct?
                        key = "ISO 2022 IR 13";
                    else if ((c1 == 0x2d) && (c2 == 0x54))  // Thai
                        key = "ISO 2022 IR 166";
                    else if ((c1 == 0x24) && (c2 == 0x42))  // Japanese (multi-byte)
                        key = "ISO 2022 IR 87";
                    else if ((c1 == 0x24) && (c2 == 0x28))  // Japanese (multi-byte)
                    {
                        escLength = 3;
                        // do we still have another character in the string?
                        if (pos + escLength < fromLength)
                        {
                            c3 = *currentChar++;
                            if (c3 == 0x44)
                                key = "ISO 2022 IR 159";
                        }
                    }
                    else if ((c1 == 0x24) && (c2 == 0x29)) // might be Korean or Chinese
                    {
                        escLength = 3;
                        // do we still have another character in the string?
                        if (pos + escLength < fromLength)
                        {
                            c3 = *currentChar++;
                            if (c3 == 0x43)                // Korean (single- and multi-byte)
                                key = "ISO 2022 IR 149";
                            else if (c3 == 0x41)           // Simplified Chinese (multi-byte)
                                key = "ISO 2022 IR 58";
                        }
                    }
                    // check whether a valid escape sequence has been found
                    if (key.empty())
                    {
                        OFOStringStream stream;
                        stream << "Cannot convert character set: Illegal escape sequence 'ESC "
                            << STD_NAMESPACE dec << STD_NAMESPACE setfill('0')
                            << STD_NAMESPACE setw(2) << OFstatic_cast(int, c1 >> 4) << "/"
                            << STD_NAMESPACE setw(2) << OFstatic_cast(int, c1 & 0x0f) << " "
                            << STD_NAMESPACE setw(2) << OFstatic_cast(int, c2 >> 4) << "/"
                            << STD_NAMESPACE setw(2) << OFstatic_cast(int, c2 & 0x0f);
                        if (escLength == 3)
                        {
                            stream << " " << STD_NAMESPACE setw(2) << OFstatic_cast(int, c3 >> 4) << "/"
                                << STD_NAMESPACE setw(2) << OFstatic_cast(int, c3 & 0x0f);
                        }
                        stream  << "' found" << OFStringStream_ends;
                        OFSTRINGSTREAM_GETOFSTRING(stream, message)
                        status = makeOFCondition(OFM_dcmdata, EC_CODE_CannotConvertCharacterSet, OF_error, message.c_str());
                    }
                    if (status.good())
                    {
                        DCMDATA_TRACE("  Switching to character set '" << key << "'");
                        T_EncodingConvertersMap::const_iterator it = EncodingConverters.find(key);
                        // check whether the descriptor was found in the map, i.e. properly declared in (0008,0005)
                        if (it != EncodingConverters.end())
                        {
                            converter = it->second;
                            // special case: these Japanese character sets replace the ASCII part (G0 code area),
                            // so according to DICOM PS 3.5 Section 6.2.1.2 an explicit switch to the default is required
                            checkDelimiters = (key != "ISO 2022 IR 87") && (key != "ISO 2022 IR 159");
                            // determine number of bytes per character (used by the selected character set)
                            if ((key == "ISO 2022 IR 87") || (key == "ISO 2022 IR 159") || (key == "ISO 2022 IR 58"))
                            {
                                DCMDATA_TRACE("    Now using 2 bytes per character");
                                bytesPerChar = 2;
                            }
                            else if (key == "ISO 2022 IR 149")
                            {
                                DCMDATA_TRACE("    Now using 1 or 2 bytes per character");
                                bytesPerChar = 0;      // special handling for single- and multi-byte
                            } else {
                                DCMDATA_TRACE("    Now using 1 byte per character");
                                bytesPerChar = 1;
                            }
                        } else {
                            OFOStringStream stream;
                            stream << "Cannot convert character set: Escape sequence refers to character set '" << key << "' that "
                                "was not declared in SpecificCharacterSet (0008,0005)" << OFStringStream_ends;
                            OFSTRINGSTREAM_GETOFSTRING(stream, message)
                            status = makeOFCondition(OFM_dcmdata, EC_CODE_CannotConvertCharacterSet, OF_error, message.c_str());
                        }
                    }
                    pos += escLength;
                }
                // check whether the escape sequence was complete
                if (status.good() && (pos >= fromLength))
                {
                    OFOStringStream stream;
                    stream << "Cannot convert character set: Incomplete escape sequence (" << (escLength + 1)
                        << " bytes expected) at the end of the string to be converted" << OFStringStream_ends;
                    OFSTRINGSTREAM_GETOFSTRING(stream, message)
                    status = makeOFCondition(OFM_dcmdata, EC_CODE_CannotConvertCharacterSet, OF_error, message.c_str());
                }
                // do not copy the escape sequence to the output
                firstChar = currentChar;
            }
            // the HT, LF, FF, CR character or other delimiters (depending on the VR) also cause a switch
            else if (isDelimiter)
            {
                // output some debug information
                DCMDATA_TRACE("    Appending delimiter '"
                    << convertToLengthLimitedOctalString(currentChar - 1 /* identical to c0 */, 1)
                    << "' to the output");
                // don't forget to append the delimiter
                toString += c0;
                // use the default descriptor again (see DICOM PS 3.5)
                if (converter != DefaultEncodingConverter)
                {
                    DCMDATA_TRACE("  Switching back to the default character set (because a delimiter was found)");
                    converter = DefaultEncodingConverter;
                    checkDelimiters = OFTrue;
                }
                // start new sub-string after delimiter
                firstChar = currentChar;
            }
            // skip remaining bytes of current character (if any)
            else if (bytesPerChar != 1)
            {
                const size_t skipBytes = (bytesPerChar > 0) ? (bytesPerChar - 1) : ((c0 & 0x80) ? 1 : 0);
                if (pos + skipBytes < fromLength)
                    currentChar += skipBytes;
                pos += skipBytes;
            }
            ++pos;
        }
        if (status.good())
        {
            // convert any remaining characters from the input string
            const size_t convertLength = currentChar - firstChar;
            if (convertLength > 0)
            {
                // output some debug information
                DCMDATA_TRACE("    Converting remaining sub-string '"
                    << convertToLengthLimitedOctalString(firstChar, convertLength) << "'");
                status = converter.convertString(firstChar, convertLength, toString, OFFalse /*clearMode*/);
                if (status.bad())
                    DCMDATA_TRACE("    -> ERROR: " << status.text());
            }
        }
    }
    if (status.good())
    {
        // finally, output some debug information
        if (DestinationEncoding == "UTF-8")
        {
            // output code points only in case of UTF-8 output
            DCMDATA_TRACE("Converted result in " << DestinationEncoding << " is '"
                << convertToLengthLimitedOctalString(toString.c_str(), toString.length()) << "' ("
                << countCharactersInUTF8String(toString) << " code points)");
        } else {
            DCMDATA_TRACE("Converted result in " << DestinationEncoding << " is '"
                << convertToLengthLimitedOctalString(toString.c_str(), toString.length()) << "'");
        }
    }
    return status;
}


OFBool DcmSpecificCharacterSet::isConversionAvailable()
{
    // just call the appropriate function from the underlying class
    return OFCharacterEncoding::isLibraryAvailable();
}


size_t DcmSpecificCharacterSet::countCharactersInUTF8String(const OFString &utf8String)
{
    // just call the appropriate function from the underlying class
    return OFCharacterEncoding::countCharactersInUTF8String(utf8String);
}


OFBool DcmSpecificCharacterSet::checkForEscapeCharacter(const char *strValue,
                                                        const size_t strLength) const
{
    OFBool result = OFFalse;
    // iterate over the string of characters
    for (size_t pos = 0; pos < strLength; ++pos)
    {
        // and search for the first ESC character
        if (*strValue++ == '\033')
        {
            // then return with "true"
            result = OFTrue;
            break;
        }
    }
    return result;
}


OFString DcmSpecificCharacterSet::convertToLengthLimitedOctalString(const char *strValue,
                                                                    const size_t strLength) const
{
    OFString octalString;
    // convert given string to octal representation, allow one character more than the maximum ...
    OFStandard::convertToOctalString(OFString(strValue, strLength), octalString, MAX_OUTPUT_STRING_LENGTH + 1);
    // ... in order to determine whether trailing dots should be added, i.e. the string was cropped
    if (octalString.length() > MAX_OUTPUT_STRING_LENGTH)
    {
        octalString.erase(MAX_OUTPUT_STRING_LENGTH);
        octalString.append("...");
    }
    // return string by-value (in order to avoid another parameter)
    return octalString;
}