#ifndef FL_UTF8_H
#define FL_UTF8_H

#include <stdexcept>
#include <boost/iterator/iterator_categories.hpp>
#include <iterator>

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"

/*
    Assumes [it, end) is a valid range;
*/
template <class iterator>
iterator next_utf8_char(iterator it, iterator end) noexcept
{    
    while (++it != end)
    {
	unsigned char ch = ((unsigned char)*it) ;	
	if ((ch & 0x80) == 0) // Single-byte encoding
	    break;
    else if ((ch & 0xC0) == 0x80) // Second, third, or fourth byte of a multi-byte sequence
	    continue;
	else if (ch == 0xC0 || ch == 0xC1) // Overlong encoding (invalid)
	    continue;
	else if ((ch & 0xE0) == 0xC0) // Start of 2-byte sequence
	    break;
	else if ((ch & 0xF0) == 0xE0) // Start of 3-byte sequence
	    break;
	else if ((ch & 0xF8) == 0xF0) // Start of 4-byte sequence
	    break;
    }        
    return it;
}

/*
    Assumes [it, end) is a valid range;
*/
template<class iterator>
bool is_valid_utf8_char(iterator it, iterator end) noexcept
{
    if( it == end ) {
        return false;
    }
    bool res = true;
    iterator tmp = it;    
    unsigned char ch = ((unsigned char)*it);
    if ((ch & 0x80) == 0) { // U+0000-U+007F (ASCII)
	    return res;
    }

    if (ch == 0xC0 || ch == 0xC1) { // Overlong encoding
	    return false;
    }

    wchar_t guard_10FFFF = (((unsigned char)*tmp) & 0x07) << 2;
    if (++tmp == end) {
        return false;
    }
    res &= ((((unsigned char)*tmp) & 0xC0) == 0x80);
    if ((ch & 0xE0) == 0xC0) { // U+0080-U+07FF
        return res;
    }

    guard_10FFFF += (((unsigned char)*tmp) & 0x30) >> 4;
    if (++tmp == end) {
        return false;
    }
    res &= ((((unsigned char)*tmp) & 0xC0) == 0x80);
    if ((((unsigned char)*it) & 0xF0) == 0xE0) { // U+0800-U+FFFF
        return res;
    }

    if (++tmp == end) {
        return false;
    }
    res &= ((((unsigned char)*tmp) & 0xC0) == 0x80);
    if ((ch & 0xF8) == 0xF0) { // U+10000-U+10FFFF                                                                                                                                 
        return res && (guard_10FFFF <= 0x10); // ensuring a unicode codepoint does not exceed 10FFFF
    }

    return false;
}

/*
    The following functions assume the utf8 char sequence is complete 
    (i.e. if there's a beginning of the utf8-encoding of a unicode code point in a sequence, the sequence must contain the rest of this encoding);
    
*/
template<class iterator>
iterator next_utf8_char(iterator it)
{
    iterator orit = it;
    ++it;
    if ((((unsigned char)*orit) & 0x80) == 0) // U+0000-U+007F (ansi)
        return it;
    ++it;
    if ((((unsigned char)*orit) & 0xE0) == 0xC0) // U+0080-U+07FF
        return it;
    ++it;
    if ((((unsigned char)*orit) & 0xF0) == 0xE0) // U+0800-U+FFFF
        return it;
    ++it;
    if ((((unsigned char)*orit) & 0xF8) == 0xF0) // U+10000-U+10FFFF
        return it;
    throw std::runtime_error("bad utf: next_utf8_char failed");
}

template<class iterator1, class iterator2>
iterator1 copy_utf8_char(iterator1 src, iterator2 dst)
{
    if ((((unsigned char)*src) & 0x80) == 0) { // U+0000-U+007F (ansi)
	*dst++ = *src++;
    } else if ((((unsigned char)*src) & 0xF0) == 0xF0) { // U+10000-U+10FFFF
	*dst++ = *src++;
	*dst++ = *src++;
	*dst++ = *src++;
	*dst++ = *src++;
    } else if ((((unsigned char)*src) & 0xE0) == 0xE0) { // U+0800-U+FFFF
	*dst++ = *src++;
	*dst++ = *src++;
	*dst++ = *src++;
    } else if ((((unsigned char)*src) & 0xC0) == 0xC0) { // U+0080-U+07FF
	*dst++ = *src++;
	*dst++ = *src++;
    } else {
	throw std::runtime_error("bad utf: copy_utf8_char failed");
    }
    return src;
}

template<class iterator>
wchar_t utf8_char_to_wchar(iterator it)
{
    if ((((unsigned char)*it) & 0x80) == 0) // U+0000-U+007F (ansi)
        return *it;
    if ((((unsigned char)*it) & 0xF0) == 0xF0) {  // U+10000-U+10FFFF
	wchar_t c = ((((unsigned char)*it++) & 0x07) << 18);
	c += ((((unsigned char)*it++) & 0x3F) << 12);
	c += ((((unsigned char)*it++) & 0x3F) << 6);
	c += (((unsigned char)*it++) & 0x3F);
	if (c > 0x10FFFF) // we may get outofbound values here
	    throw std::runtime_error("bad utf: utf8_char_to_wchar failed");
	return c;
    }
    if ((((unsigned char)*it) & 0xE0) == 0xE0) { // U+0800-U+FFFF
	wchar_t c = ((((unsigned char)*it++) & 0x0F) << 12);
	c += ((((unsigned char)*it++) & 0x3F) << 6);
	c += (((unsigned char)*it++) & 0x3F);
	return c;
    }
    if ((((unsigned char)*it) & 0xC0) == 0xC0) { // U+0080-U+07FF
	wchar_t c = ((((unsigned char)*it++) & 0x1F) << 6);
	c += (((unsigned char)*it++) & 0x3F);
	return c;
    }
    throw std::runtime_error("bad utf: utf8_char_to_wchar failed");
}

template<class iterator>
iterator wchar_to_utf8_char(wchar_t w, iterator it)
{
    if (w < 0 || w > 0x10FFFF) 
	throw std::runtime_error("bad wchar: wchar_to_utf8_char failed");

    if (w <= 0x007F) { // U+0000-U+007F
	*it++ = (unsigned char)w;	
    } else if (w <= 0x07FF) { // U+0080-U+07FF
	*it++ = (unsigned char)(0xC0 | (unsigned char)(w >> 6));
	*it++ = (unsigned char)(0x80 | (unsigned char)(w & 0x3F));
    } else if (w <= 0xFFFF) { // U+0800-U+FFFF
	*it++ = (unsigned char)(0xE0 | (unsigned char)(w >> 12));
	*it++ = (unsigned char)(0x80 | (unsigned char)((w >> 6) & 0x3F));	      
	*it++ = (unsigned char)(0x80 | (unsigned char)(w & 0x3F));
    } else { // U+10000-U+10FFFF
	*it++ = (unsigned char)(0xF0 | (unsigned char)(w >> 18));
	*it++ = (unsigned char)(0x80 | (unsigned char)((w >> 12) & 0x3F));	      
	*it++ = (unsigned char)(0x80 | (unsigned char)((w >> 6) & 0x3F));	      
	*it++ = (unsigned char)(0x80 | (unsigned char)(w & 0x3F));	
    }
    return it;
}

// ------------
// implementation details for utf8 iterators

template<class iterator>
class utf8_wo_val_proxy;

template<class iterator>
class utf8_wo_iterator;

template<class iterator, class validator>
class utf8_ro_iterator;

template<class iterator>
class utf8_wo_val_proxy
{
public:
    typedef utf8_wo_val_proxy proxy;

    template<class OtherIt>
    friend class utf8_wo_iterator;

    proxy& operator=(wchar_t w) 
    {
        wchar_to_utf8_char<iterator&>(w, m_it);
        return *this;
    }
    
    template<class OtherIt>
    proxy& operator=(const OtherIt it)
    {
        copy_utf8_char<OtherIt, iterator&>(it, m_it);    
        return *this;
    }  

private:
    iterator& m_it;
    utf8_wo_val_proxy(iterator& it) : m_it(it) {}    
};

template <class iterator>
struct utf8_noop_validator
{
    utf8_noop_validator(iterator&, iterator)
    {}

    utf8_noop_validator()
    {}

    void increment(iterator& it)
    { it = next_utf8_char(it); }
   
    wchar_t dereference(iterator& it)
    { return utf8_char_to_wchar(it); }
};

template <class iterator>
struct utf8_sanitizing_validator
{
    iterator e_;

    utf8_sanitizing_validator(iterator& it, iterator e)
	: e_(e)
    {
	while (it != e && !is_valid_utf8_char(it, e))
	    it = next_utf8_char(it, e);
    }

    utf8_sanitizing_validator()
    {}

    void increment(iterator& it)
    { it = next_utf8_char(it, e_); }

    wchar_t dereference(iterator& it)
    {
	while (it != e_ && !is_valid_utf8_char(it, e_))
	    it = next_utf8_char(it, e_);
	if (it == e_)
	    return 0;
	return utf8_char_to_wchar(it);
    }
};

template<class iterator, class validator = utf8_noop_validator<iterator> >
class utf8_ro_iterator
{
public:
    typedef std::forward_iterator_tag iterator_category;
    typedef wchar_t value_type;
    typedef typename std::iterator_traits<iterator>::difference_type difference_type;
    typedef void pointer;
    typedef void reference;    
    typedef utf8_ro_iterator self;
    
    utf8_ro_iterator() {}
    
    explicit utf8_ro_iterator(iterator it, iterator e=iterator()) 
        : m_base(it), 
          m_v(m_base, e)
    {
    }
    
    utf8_ro_iterator(const self& it) : m_base(it.m_base), m_v(it.m_v) {}

    template<class OtherIterator>
    utf8_ro_iterator(const utf8_ro_iterator<OtherIterator>& it) : m_base(it.base()) {}

    utf8_ro_iterator& operator=(const self&) = default;

    wchar_t operator*() { return m_v.dereference(m_base); }
    
    bool operator==(const self& it) const
    {
        return m_base == it.m_base;
    }
    
    bool operator!=(const self& it) const
    {
        return m_base != it.m_base;
    }    
    
    self& operator++() 
    { 
        m_v.increment(m_base);
        return *this;
    }
    
    self operator++(int)
    {    
        self tmp = *this;
	m_v.increment(m_base);
        return tmp;
    }
    
    iterator base() const
    { return m_base; }
    
private:
    iterator m_base;
    validator m_v;
};

template<class iterator>
class utf8_wo_iterator
{
public:
    typedef std::output_iterator_tag iterator_category;
    typedef void value_type;
    typedef void difference_type;
    typedef void pointer;
    typedef void reference;        
    typedef utf8_wo_iterator self;
    
    explicit utf8_wo_iterator(iterator it) : m_base(it) {}
    
    utf8_wo_iterator(const self& it) : m_base(it.m_base) {}

    utf8_wo_iterator& operator=(const self&) = default;

    utf8_wo_val_proxy<iterator> operator*() { return utf8_wo_val_proxy<iterator>(m_base); }
    
    bool operator==(const self& it)
    {
        return m_base == it.m_base;
    }
    
    bool operator!=(const self& it)
    {
        return m_base != it.m_base;
    }    
    
    self& operator++() 
    {     
        return *this;
    }
    
    self& operator++(int)
    {    
        return *this;
    }
    
    iterator base() const
    { return m_base; }
    
private:
    iterator m_base;
};


template <class iterator>
inline utf8_ro_iterator<iterator>
make_utf8_ro_iterator(iterator it) 
{
    return utf8_ro_iterator<iterator>(it);
}

template <class iterator>
inline utf8_ro_iterator<iterator, utf8_sanitizing_validator<iterator> >
make_utf8_ro_iterator(iterator it, iterator end) 
{
    return utf8_ro_iterator<iterator, utf8_sanitizing_validator<iterator> >(it, end);
}


template <class iterator>
inline utf8_wo_iterator<iterator>
make_utf8_wo_iterator(iterator it) 
{
    return utf8_wo_iterator<iterator>(it);
}

#pragma GCC diagnostic pop

#endif // FL_UTF8_H
