#pragma once

/*
 * Forward bugreports and suggestions to ctor@
 */

#include <stdexcept>
#include <boost/iterator/iterator_categories.hpp>
#include <iterator>

namespace mulca_mime {

template <class Iterator>
struct utf8_noop_validator;

template <class Iterator>
struct utf8_sanitizing_validator;

/*
 * This class adapts a forward Iterator over a utf-8 encoded sequence to an iterator
 *  over a sequence of unicode codepoints performing the decoding from utf8 upon
 *  every read.
 * Validator provides a means to deal with invalid portions of the input sequence.
 */
template<class Iterator, class Validator = utf8_noop_validator<Iterator> >
class utf8_ro_iterator;

/*
 * This class adapts an output Iterator over a sequence of unicode codepoints to an
 *   iterator over a utf-8 encoded sequence performing the encoding to utf8 upon
 *   every write.
 */
template<class Iterator>
class utf8_wo_iterator;

template <class Iterator>
inline utf8_ro_iterator<Iterator>
make_utf8_ro_iterator(Iterator beg);

template <class Iterator>
utf8_ro_iterator<Iterator, utf8_sanitizing_validator<Iterator> >
make_utf8_ro_iterator(Iterator beg, Iterator end);

template <class Iterator>
utf8_wo_iterator<Iterator>
make_utf8_wo_iterator(Iterator beg);

/*
 * RFC3629:
 *   UTF8-tail   = %x80-BF
 */
inline bool is_valid_utf8_tail(unsigned char ch) {
    return (ch & 0xC0) == 0x80;
}
/*
 * Returns true if a prefix of [beg, end) is a valid utf-8 encoding sequence of a
 *   valid unicode codepoint.
 * Assumes [beg, end) is a valid range of forward iterators.
 */
template<class Iterator>
bool is_valid_utf8_char(Iterator beg, Iterator end)
{
    if (beg == end) {
        return false;
    }

    const unsigned char ch = static_cast<unsigned char>(*beg);
    // UTF8-1      = %x00-7F
    if ((ch & 0x80) == 0) {
        return true; // valid 1-byte
    }

    // UTF8-2      = %xC2-DF UTF8-tail
    if (ch == 0xC0 || ch == 0xC1) {
        return false; // overlong
    }

    auto available = std::distance(beg, end);
    Iterator tmp = beg;
    if ((ch & 0xE0) == 0xC0) { // 2-byte
        if (available < 2) {
            return false; // truncated
        }
        if (!is_valid_utf8_tail(*++tmp)) {
            return false;
        }
        return true;
    }

    // UTF8-3      = %xE0 %xA0-BF UTF8-tail /
    //               %xE1-EC 2( UTF8-tail ) /
    //               %xED %x80-9F UTF8-tail /
    //               %xEE-EF 2( UTF8-tail )
    if ((ch & 0xF0) == 0xE0) {
        if (available < 3) {
            return false; // truncated
        }
        if (ch == 0xE0) {
            return (*++tmp & 0xE0) == 0xA0 && is_valid_utf8_tail(*++tmp);
        } else if (ch == 0xED) {
            return (*++tmp & 0xE0) == 0x80 && is_valid_utf8_tail(*++tmp);
        }
        return is_valid_utf8_tail(*++tmp) && is_valid_utf8_tail(*++tmp);
    }

    // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) /
    //               %xF1-F3 3( UTF8-tail ) /
    //               %xF4 %x80-8F 2( UTF8-tail )
    if ((ch & 0xF8) == 0xF0) {
        if (available < 4) {
            return false; // truncated
        }
        if ((ch & 0x03) != 0) { // %xF1-F3
            return is_valid_utf8_tail(*++tmp) && is_valid_utf8_tail(*++tmp) && is_valid_utf8_tail(*++tmp);
        }
        if (ch == 0xF0) {
            return (*++tmp & 0xC0) == 0x80 && (*tmp & 0xF0) != 0x80 && is_valid_utf8_tail(*++tmp) && is_valid_utf8_tail(*++tmp);
        } else if (ch == 0xF4) {
            return (*++tmp & 0xF0) == 0x80 && is_valid_utf8_tail(*++tmp) && is_valid_utf8_tail(*++tmp);
        }
    }

    return false;
}

/*
 * Returns an iterator to the first element of (beg, end) that can quilify as a first
 *   element of a utf-8 encoded sequence.
 * Assumes [beg, end) is a valid range of forward iterators.
 */
template <class Iterator>
Iterator next_utf8_char(Iterator beg, Iterator end)
{
    while (++beg != end) {
        unsigned char ch = (static_cast<unsigned char>(*beg)) ;
        if ((ch & 0x80) == 0) { // Single-byte encoding
            break;
        } else if ((ch & 0xC0) == 0x80) { // Second, third, or fourth byte of a multi-byte sequence
            continue;
        } else if (ch == 0xC0 || ch == 0xC1) { // Overlong encoding (invalid)
            continue;
        } else if ((ch & 0xE0) == 0xC0) { // Start of 2-byte sequence
            break;
        } else if ((ch & 0xF0) == 0xE0) { // Start of 3-byte sequence
            break;
        } else if ((ch & 0xF8) == 0xF0) { // Start of 4-byte sequence
            break;
        }
    }
    return beg;
}

/*
 * Returns an iterator to the first element of (beg,beg+4] that can quilify as a
 *   first element of a utf-8 encoded sequence. Throws an exception if this element
 *   does not exist.
  * Assumes beg is a forward iterator to the beginning of a complete utf-8 encoded
 *   sequence. A complete sequence means that it does not contain a prefix of the
 *   utf-8 encoding sequence of a unicode codepoint if does not contail all of this
 *   sequence.
 */
template<class Iterator>
Iterator next_utf8_char(Iterator beg)
{
    unsigned char ch = (static_cast<unsigned char>(*beg)) ;
    ++beg;
    if ((ch & 0x80) == 0) { // U+0000-U+007F (ansi)
        return beg;
    }
    ++beg;
    if ((ch & 0xE0) == 0xC0) { // U+0080-U+07FF
        return beg;
    }
    ++beg;
    if ((ch & 0xF0) == 0xE0) { // U+0800-U+FFFF
        return beg;
    }
    ++beg;
    if ((ch & 0xF8) == 0xF0) { // U+10000-U+10FFFF
        return beg;
    }
    throw std::runtime_error("bad utf: next_utf8_char failed");
}

/*
 * Returns a unicode codepoint value whose utf-8 encoding sequence is a prefix of the
 *   input sequence.
 * Assumes beg is a forward iterator to the beginning of a complete utf-8 encoded
 *   sequence. A complete sequence means that it does not contain a prefix of the
 *   utf-8 encoding sequence of a unicode codepoint if does not contail all of this
 *   sequence.
 */
template<class Iterator>
wchar_t utf8_char_to_wchar(Iterator beg)
{
    if (((static_cast<unsigned char>(*beg)) & 0x80) == 0) { // U+0000-U+007F (ansi)
        return *beg;
    }
    if (((static_cast<unsigned char>(*beg)) & 0xF8) == 0xF0) {  // U+10000-U+10FFFF
        wchar_t c = (((static_cast<unsigned char>(*beg++)) & 0x07) << 18);
        c += (((static_cast<unsigned char>(*beg++)) & 0x3F) << 12);
        c += (((static_cast<unsigned char>(*beg++)) & 0x3F) << 6);
        c += ((static_cast<unsigned char>(*beg++)) & 0x3F);
        if (c > 0x10FFFF) { // We may get out-of-bounds values here
            throw std::runtime_error("bad utf: utf8_char_to_wchar failed");
        }
        return c;
    }
    if (((static_cast<unsigned char>(*beg)) & 0xF0) == 0xE0) { // U+0800-U+FFFF
        wchar_t c = (((static_cast<unsigned char>(*beg++)) & 0x0F) << 12);
        c += (((static_cast<unsigned char>(*beg++)) & 0x3F) << 6);
        c += ((static_cast<unsigned char>(*beg++)) & 0x3F);
        return c;
    }
    if (((static_cast<unsigned char>(*beg)) & 0xE0) == 0xC0) { // U+0080-U+07FF
        wchar_t c = (((static_cast<unsigned char>(*beg++)) & 0x1F) << 6);
        c += ((static_cast<unsigned char>(*beg++)) & 0x3F);
        return c;
    }
    throw std::runtime_error("bad utf: utf8_char_to_wchar failed");
}

/*
 * Copies utf-8 encoding sequence of codepoint w to dst.
 */
template<class Iterator>
Iterator wchar_to_utf8_char(wchar_t w, Iterator dst)
{
    if (w < 0 || w > 0x10FFFF) {
        throw std::runtime_error("bad wchar: wchar_to_utf8_char failed");
    }
    if (w <= 0x007F) { // U+0000-U+007F
        *dst++ = static_cast<unsigned char>(w);
    } else if (w <= 0x07FF) { // U+0080-U+07FF
        *dst++ = static_cast<unsigned char>(0xC0 | static_cast<unsigned char>((w >> 6) & 0x1F));
        *dst++ = static_cast<unsigned char>(0x80 | static_cast<unsigned char>(w & 0x3F));
    } else if (w <= 0xFFFF) { // U+0800-U+FFFF
        *dst++ = static_cast<unsigned char>(0xE0 | static_cast<unsigned char>((w >> 12) & 0x0F));
        *dst++ = static_cast<unsigned char>(0x80 | static_cast<unsigned char>((w >> 6) & 0x3F));
        *dst++ = static_cast<unsigned char>(0x80 | static_cast<unsigned char>(w & 0x3F));
    } else { // U+10000-U+10FFFF
        *dst++ = static_cast<unsigned char>(0xF0 | static_cast<unsigned char>((w >> 18) & 0x07));
        *dst++ = static_cast<unsigned char>(0x80 | static_cast<unsigned char>((w >> 12) & 0x3F));
        *dst++ = static_cast<unsigned char>(0x80 | static_cast<unsigned char>((w >> 6) & 0x3F));
        *dst++ = static_cast<unsigned char>(0x80 | static_cast<unsigned char>(w & 0x3F));
    }
    return dst;
}

template<class Iterator>
bool wchar_to_utf8_char(wchar_t w, Iterator dst, std::nothrow_t)
{
    try {
        wchar_to_utf8_char(w, dst);
        return true;
    } catch (const std::runtime_error&) {
        return false;
    }
}

/*
 * Copies the utf-8 encoding sequence of a single unicode codepoint that equals a
 *   prefix of src to dst.
 */
template<class Iterator1, class Iterator2>
Iterator1 copy_utf8_char(Iterator1 src, Iterator2 dst)
{
    if (((static_cast<unsigned char>(*src)) & 0x80) == 0) { // U+0000-U+007F (ansi)
        *dst++ = *src++;
    } else if (((static_cast<unsigned char>(*src)) & 0xF0) == 0xF0) { // U+10000-U+10FFFF
        *dst++ = *src++;
        *dst++ = *src++;
        *dst++ = *src++;
        *dst++ = *src++;
    } else if (((static_cast<unsigned char>(*src)) & 0xE0) == 0xE0) { // U+0800-U+FFFF
        *dst++ = *src++;
        *dst++ = *src++;
        *dst++ = *src++;
    } else if (((static_cast<unsigned char>(*src)) & 0xC0) == 0xC0) { // U+0080-U+07FF
        *dst++ = *src++;
        *dst++ = *src++;
    } else {
        throw std::runtime_error("bad utf: copy_utf8_char failed");
    }
    return src;
}

/*
 * Transforms a utf-8 encoded sequence src to a sequence of codepoints and copies
 *   the result to dst.
 */
template<class Range, class Iterator>
void copy_utf8_range(const Range& src, Iterator dst)
{
    std::copy(make_utf8_ro_iterator(src.begin()),
              make_utf8_ro_iterator(src.end()),
              dst);
}


// Implementation details of utf8 iterators.

template<class Iterator>
class utf8_wo_val_proxy {
public:
    typedef utf8_wo_val_proxy proxy;

    template<class OtherIt>
    friend class utf8_wo_iterator;

    proxy& operator=(wchar_t w) {
        wchar_to_utf8_char<Iterator&>(w, it_);
        return *this;
    }

    template<class OtherIt>
    proxy& operator=(const OtherIt it) {
        copy_utf8_char<OtherIt, Iterator&>(it, it_);
        return *this;
    }

private:
    Iterator& it_;

    utf8_wo_val_proxy(Iterator& it)
        : it_(it) {
    }
};

template <class Iterator>
struct utf8_noop_validator {
    utf8_noop_validator(Iterator&, Iterator)
    {}

    utf8_noop_validator()
    {}

    void increment(Iterator& it) {
        it = next_utf8_char(it);
    }

    wchar_t dereference(Iterator& it) {
        return utf8_char_to_wchar(it);
    }
};

template <class Iterator>
struct utf8_sanitizing_validator {
    Iterator e_;

    utf8_sanitizing_validator(Iterator& it, Iterator e)
        : e_(e) {
        while (it != e && !is_valid_utf8_char(it, e)) {
            it = next_utf8_char(it, e);
        }
    }

    utf8_sanitizing_validator() {
    }

    void increment(Iterator& it) {
        it = next_utf8_char(it, e_);
    }

    wchar_t dereference(Iterator& it) {
        while (it != e_ && !is_valid_utf8_char(it, e_)) {
            it = next_utf8_char(it, e_);
        }
        if (it == e_) {
            return 0;
        }
        return utf8_char_to_wchar(it);
    }
};

template<class Iterator, class Validator>
class utf8_ro_iterator {
public:
    typedef std::forward_iterator_tag iterator_category;
    typedef wchar_t value_type;
    typedef typename std::iterator_traits<Iterator>::difference_type difference_type;
    typedef void pointer;
    typedef void reference;
    typedef utf8_ro_iterator self;

    utf8_ro_iterator() {}

    explicit utf8_ro_iterator(Iterator it, Iterator e=Iterator())
        : m_base(it),
          m_v(m_base, e) {
    }

    utf8_ro_iterator(const self& it) : m_base(it.m_base), m_v(it.m_v) {}

    template<class OtherIterator>
    utf8_ro_iterator(const utf8_ro_iterator<OtherIterator>& it)
        : m_base(it.base()) {
    }

    utf8_ro_iterator& operator=(const self&) = default;

    wchar_t operator*() {
        return m_v.dereference(m_base);
    }

    bool operator==(const self& it) const {
        return m_base == it.m_base;
    }

    bool operator!=(const self& it) const {
        return m_base != it.m_base;
    }

    self& operator++() {
        m_v.increment(m_base);
        return *this;
    }

    self operator++(int) {
        self tmp = *this;
        m_v.increment(m_base);
        return tmp;
    }

    Iterator base() const {
        return m_base;
    }

private:
    Iterator m_base;
    Validator m_v;
};

template<class Iterator>
class utf8_wo_iterator {
public:
    typedef std::output_iterator_tag iterator_category;
    typedef void value_type;
    typedef void difference_type;
    typedef void pointer;
    typedef void reference;
    typedef utf8_wo_iterator self;

    explicit utf8_wo_iterator(Iterator it) : m_base(it) {}

    utf8_wo_iterator(const self& it) : m_base(it.m_base) {}

    utf8_wo_iterator& operator=(const self&) = default;

    utf8_wo_val_proxy<Iterator> operator*() {
        return utf8_wo_val_proxy<Iterator>(m_base);
    }

    bool operator==(const self& it) {
        return m_base == it.m_base;
    }

    bool operator!=(const self& it) {
        return m_base != it.m_base;
    }

    self& operator++() {
        return *this;
    }

    self& operator++(int) {
        return *this;
    }

    Iterator base() const {
        return m_base;
    }

private:
    Iterator m_base;
};

template <class Iterator>
inline utf8_ro_iterator<Iterator>
make_utf8_ro_iterator(Iterator it)
{
    return utf8_ro_iterator<Iterator>(it);
}

template <class Iterator>
inline utf8_ro_iterator<Iterator, utf8_sanitizing_validator<Iterator> >
make_utf8_ro_iterator(Iterator it, Iterator end)
{
    return utf8_ro_iterator<Iterator, utf8_sanitizing_validator<Iterator> >(it, end);
}

template <class Iterator>
inline utf8_wo_iterator<Iterator>
make_utf8_wo_iterator(Iterator it)
{
    return utf8_wo_iterator<Iterator>(it);
}

} // namespace mulca_mime
