#pragma once

#include <boost/array.hpp>
#include <boost/range.hpp>
#include <boost/range/as_literal.hpp>
#include <boost/range/iterator_range.hpp>
#include <algorithm>
#include <string>
#include <exception>
#include <cstring>
#include <cstdio>
#include <cctype>
#include <iconv.h>
#include <errno.h>
#include "utf8.h"

namespace mulca_mime {

inline std::string decode_numbered_entities(const std::string& s, bool ignore_errors = false)
{
    std::string r;
    unsigned int high_surrogate=0;
    for (std::string::size_type i=0; i<s.size(); ++i) {
        if (s[i]=='&' && (i+1)<s.size() && s[i+1]=='#') {
            std::string::size_type p=i+2;
            if (p<s.size()) {
                wchar_t v=0;
                if (s[p]=='X' || s[p]=='x') {
                    ++p;
                    while (p<s.size() && isxdigit(s[p])) {
                        if (isalpha(s[p])) {
                            v = v*16 + (tolower(s[p++])-'a'+10);
                        } else {
                            v = v*16 + (s[p++]-'0');
                        }
                    }
                } else {
                    while (p<s.size() && isdigit(s[p])) {
                        v = v*10 + (s[p++]-'0');
                    }
                }
                if (p<s.size() && s[p]==';') {
                    if ((v & 0xFFFFFC00) == 0xDC00) {  /* low-surrogate */
                        if (high_surrogate != 0) {
                            r.erase(r.size()-3); /* Back up past 0xFFFD */
                            v = ((high_surrogate - 0xD800) << 10) +
                                (v - 0xDC00) + 0x10000;
                            high_surrogate = 0;
                        } else {
                            v = 0xFFFD;
                        }
                    } else if ((v & 0xFFFFFC00) == 0xD800) { /* high-surrogate */
                        high_surrogate = v;
                        v = 0xFFFD;
                    } else {
                        high_surrogate = 0;
                        /* otherwise invalid? */
                        if ((v >= 0xFDD0 && v <= 0xFDEF) ||
                                ((v & 0xFFFE) == 0xFFFE) ||
                                v > 0x10FFFF) {
                            v = 0xFFFD;
                        }
                    }
                    if (wchar_to_utf8_char(v, std::back_inserter(r), std::nothrow)) {
                        i = p;
                        if (i+1 < s.size() && s[i+1]!='&') {
                            high_surrogate = 0;
                        }
                    } else if (!ignore_errors) {
                        throw std::runtime_error("bad wchar: wchar_to_utf8_char failed");
                    }
                }
            }
        } else {
            r +=s[i];
        }
    }
    return r;
}


/*
 * If a unicode codepoint w is valid in XML 1.1, then copy it to dst sequence.
 * Otherwise, if w can be escaped in XML 1.1, then copy its escape sequence to dst.
 * Do nothing otherwise.
 *
 * Note: dst is a sequence of unicode codepoints (rather than utf8-encoded sequence)
 */
template <class Iterator>
void xml11_escape_wchar(wchar_t w, Iterator dst)
{
    // Handle C0 and C1 code points.
    if ((w > 0 && w <= 32)
            || (w >=127 && w <= 169)) {
        char entity[20];
        int rv = snprintf(entity, sizeof(entity), "&#%d;", w);
        if (rv > 0)
            copy_utf8_range(boost::iterator_range<const char*>(entity, entity + rv),
                            dst);
        return;
    }
    // Handle the other code points.
    switch (w) {
        case 0:   // The only invalid point in XML 1.1 (we cannot even escape it!)
            break;
        case '<':
            copy_utf8_range(boost::as_literal("&lt;"), dst);
            break;
        case '>':
            copy_utf8_range(boost::as_literal("&gt;"), dst);
            break;
        case '&':
            copy_utf8_range(boost::as_literal("&amp;"), dst);
            break;
        case '"':
            copy_utf8_range(boost::as_literal("&quot;"), dst);
            break;
        case '\'':
            copy_utf8_range(boost::as_literal("&apos;"), dst);
            return;
        default:
            *dst++ = w;
            break;
    }
}

/*
 * If a unicode codepoint w is valid in XML 1.0, then copy it to dst sequence.
 * Otherwise, if w can be escaped in XML 1.0, then copy its escape sequence to dst.
 * Do nothing otherwise.
 *
 * Note: dst is a sequence of unicode codepoints (rather than utf8-encoded sequence)
 */
template <class Iterator>
void xml10_escape_wchar(wchar_t w, Iterator dst)
{
    // Handle codepoints that should be represented with xml entities.
    switch (w) {
        case '<':
            copy_utf8_range(boost::as_literal("&lt;"), dst);
            return;
        case '>':
            copy_utf8_range(boost::as_literal("&gt;"), dst);
            return;
        case '&':
            copy_utf8_range(boost::as_literal("&amp;"), dst);
            return;
        case '"':
            copy_utf8_range(boost::as_literal("&quot;"), dst);
            return;
        case '\'':
            copy_utf8_range(boost::as_literal("&apos;"), dst);
            return;
        case 9:
            copy_utf8_range(boost::as_literal("&#9;"), dst);
            return;
        case 0xa:
            copy_utf8_range(boost::as_literal("&#xa;"), dst);
            return;
        case 0xd:
            copy_utf8_range(boost::as_literal("&#xd;"), dst);
            return;
        default:
            break;
    }
    // See if the codepoint is valid in XML 1.0
    if ((w >= 0x20 && w <= 0xD7FF)
            || (w >= 0xE000 && w <= 0xFFFD)
            || (w >= 0x10000 && w <= 0x10FFFF)
       ) {
        *dst++ = w;
    }
}

/*
 * Copies utf8-encoded [beg, end) sequence to codepoint sequence dst in such a way
 *   that codepoint in the resulting sequence is either a valid XML1.0 point or a
 *   correspoinding XML1.0 escaped entity.
 * Assumes [beg, end) is a valid range.
 */
template <class Iterator1, class Iterator2>
void xml10_escape_utf8(Iterator1 beg, Iterator1 end, Iterator2 dst)
{
    utf8_ro_iterator<Iterator1, utf8_sanitizing_validator<Iterator1> > b(beg);
    utf8_ro_iterator<Iterator1, utf8_sanitizing_validator<Iterator1> > e(end);
    while (b != e) {
        xml10_escape_wchar(*b++, dst);
    }
}

/*
 * Returns a modified copy of utf8-encoded string src in such a way that each
 *   utf8-encoded code point in the resulting string is either a valid XML1.0
 *   point or a correspoinding XML1.0 escaped entity.
 * Assumes [beg, end) is a valid range.
 */
inline std::string xml10_escape_utf8_string(const std::string src)
{
    std::string dst;
    xml10_escape_utf8(src.begin(), src.end(),
                      make_utf8_wo_iterator(std::back_inserter(dst)));
    return dst;
}

class convert {
public:
    enum error_t {
        html,
        space,
        except,
        skip
    };

    convert(const char* from, const char* to)
        : status_(true),
          ill_seq_(false),
          fds_(reinterpret_cast<iconv_t>(-1)),
          tds_(reinterpret_cast<iconv_t>(-1)),
          buf_() {
        if (!from || !to ||!*from || !*to) {
            status_ = false;
            return;
        }
        if (strcasecmp(from,to)==0) {
            ;
        } else if (strcasecmp(from,"utf-8")==0) {
            tds_=iconv_open(to,"utf-8");
            if (tds_==reinterpret_cast<iconv_t>(-1)) {
                status_=false;
                return;
            }
        } else if (strcasecmp(to,"utf-8")==0) {
            if (strcasecmp(from, "gb2312") == 0) {
                fds_ = iconv_open("utf-8", "gbk");
            } else if (strcasecmp(from, "ks_c_5601-1987") == 0) {
                fds_ = iconv_open("utf-8", "euckr");
            } else {
                fds_ = iconv_open("utf-8", from);
            }
            if (fds_==reinterpret_cast<iconv_t>(-1)) {
                status_=false;
                return;
            }
        } else {
            tds_=iconv_open(to,"utf-8");
            if (tds_==reinterpret_cast<iconv_t>(-1)) {
                status_=false;
                return;
            } else {
                fds_=iconv_open("utf-8",from);
                if (fds_==reinterpret_cast<iconv_t>(-1)) {
                    iconv_close(tds_);
                    tds_=reinterpret_cast<iconv_t>(-1);
                    status_=false;
                    return;
                }
            }
        }
    }

    ~convert() {
        if (fds_!=reinterpret_cast<iconv_t>(-1)) {
            iconv_close(fds_);
        }
        if (tds_!=reinterpret_cast<iconv_t>(-1)) {
            iconv_close(tds_);
        }
    }

    bool illseq() {
        return ill_seq_;
    }

    bool ok() {
        return status_;
    }

    bool operator!() {
        return !ok();
    }

    bool doit(const std::string& src, std::string& dst, error_t et=html) {
        if (!ok()) {
            return false;
        }
        if (fds_!=reinterpret_cast<iconv_t>(-1)) {
            if (!to_utf(src,dst)) {
                return false;
            }
        } else {
            dst=src;
        }
        if (tds_!=reinterpret_cast<iconv_t>(-1)) {
            return from_utf(dst,dst,et);
        }
        return true;
    }

private:
    bool to_utf(const std::string& src, std::string& dst) {
        ill_seq_ = false;
        size_t src_size = src.size();
        size_t dst_size = buf_size;
        char* src_ptr = const_cast<char*>(src.data());
        char* dst_ptr = buf_.data();
        dst.clear();
        if (src_size == 0) {
            return true;
        }
        size_t rv;
        do {
            rv = iconv(fds_, &src_ptr, &src_size, &dst_ptr, &dst_size);
            if (rv == 0 || (rv == static_cast<size_t>(-1) && errno == E2BIG)) {
                dst.append(buf_.data(), buf_size - dst_size);
                dst_size = buf_size;
                dst_ptr = buf_.data();
            } else if (errno == EILSEQ) {
                ill_seq_ = true;
                src_size--;
                src_ptr++;
            } else {
                if (buf_size != dst_size) {
                    dst.append(buf_.data(), buf_size - dst_size);
                }
                dst.append(src_ptr, src_size);
                return false;
            }
        } while (rv != 0);
        return true;
    }

    bool from_utf(const std::string& src, std::string& dst, error_t error_type) {
        ill_seq_ = false;
        size_t src_size = src.size();
        size_t dst_size = buf_size;
        char* src_ptr = const_cast<char*>(src.data());
        char* dst_ptr = buf_.data();
        dst.clear();
        if (src_size == 0) {
            return true;
        }
        size_t rv;
        do {
            rv = iconv(tds_, &src_ptr, &src_size, &dst_ptr, &dst_size);
            if (rv == 0 || (rv == static_cast<size_t>(-1) && errno == E2BIG)) {
                dst.append(buf_.data(), buf_size - dst_size);
            } else if (errno == EILSEQ) {
                ill_seq_ = true;
                if (buf_size != dst_size) {
                    dst.append(buf_.data(), buf_size - dst_size);
                }
                const char* utf8_b = src_ptr;
                const char* utf8_e = next_utf8_char(src_ptr, src_ptr + src_size);
                std::size_t len = utf8_e - utf8_b;
                if (is_valid_utf8_char(utf8_b, utf8_e)) {
                    if (error_type == html) {
                        wchar_t w = utf8_char_to_wchar(utf8_b);
                        char entity[20];
                        int rv = snprintf(entity, sizeof(entity), "&#%d;", w);
                        if (rv > 0) {
                            dst.append(entity, rv);
                        }
                        src_size -=len;
                        src_ptr  +=len;
                    } else if (error_type == space) {
                        dst += ' ';
                        src_size -= len;
                        src_ptr += len;
                    } else if (error_type == skip) {
                        src_size -= len;
                        src_ptr += len;
                    } else {
                        return false;
                    }
                } else {
                    dst.append(src_ptr, src_size);
                    return false;
                }
            } else {
                if (buf_size != dst_size) {
                    dst.append(buf_.data(), buf_size - dst_size);
                }
                dst.append(src_ptr, src_size);
                return false;
            }
            dst_size = buf_size;
            dst_ptr = buf_.data();
        } while (rv != 0);
        return true;
    }

private:
    enum {
        buf_size = 65536
    };

    bool status_;
    bool ill_seq_;
    iconv_t fds_;
    iconv_t tds_;
    boost::array<char, buf_size> buf_;

};

} // namespace mulca_mime
