// -*- C++ -*-
//

#include <mimeparser/rfc2047.h>
#include <butil/butil.h>

namespace mulca_mime {

//
// Note, we put here first charset in std::string.
// It must be crazy to use not one charsets in headers.
// Note! it's incorrect implementation since it doesn't remove SPACE inside comments '(' ')'
//

std::string decode_rfc2047(const std::string& src, std::string &charset)
{
    std::string dest;
    std::string sp_tail;
    bool collect_sp = false;
    charset.clear();
    for (size_t i=0; i<src.length(); ++i) {
        if (src[i]=='=' && (i+1) < src.length() && src[i+1]=='?') {
            // Found start of enc header
            size_t charset_end=src.find('?',i+2);
            if (charset_end==src.npos) {
                // It's not rfc2047 header.
                if (collect_sp) {
                    collect_sp = false;
                    dest += sp_tail;
                    sp_tail = "";
                }
                dest+=src.substr(i);
                return dest;
            }
            if ((charset_end + 2) >= src.size() || src[charset_end+2]!='?' ||
                    (src[charset_end+1]!='q' && src[charset_end+1]!='Q' &&
                    src[charset_end+1]!='b' && src[charset_end+1]!='B')) {
                // It's not rfc2047 header.
                if (collect_sp) {
                    collect_sp = false;
                    dest += sp_tail;
                    sp_tail = "";
                }
                dest+=src.substr(i);
                return dest;
            }
            size_t word_end=src.find("?=",charset_end+3); // XXX Should we check for inner '=?'?
            if (word_end==src.npos) {
                // It's not rfc2047 header.
                if (collect_sp) {
                    collect_sp = false;
                    dest += sp_tail;
                    sp_tail = "";
                }
                dest+=src.substr(i);
                return dest;
            }
            charset= src.substr(i+2,charset_end-(i+2));
            std::string res_word;
            if (src[charset_end+1]=='q' || src[charset_end+1]=='Q') {
                // Change '_' to ' ' as rfc 2047 says.
                std::string wrd=src.substr(charset_end+3,word_end-(charset_end+3));
                size_t pos=wrd.find('_');
                while (pos!=wrd.npos) {
                    wrd[pos]=' ';
                    pos=wrd.find('_',pos);
                }
                res_word=::decode_qp(wrd);
            } else {
                res_word=::decode_base64(src.substr(charset_end+3,word_end-(charset_end+3)));
            }
            dest += res_word;
            collect_sp = true;
            sp_tail = "";
            i = word_end+1;
        } else {
            if (src[i] == '\n') {
                ;
            } else if (src[i] == '\r' && (i+1) < src.length() && src[i+1]=='\n') {
                i++;
            } else if ((src[i] == ' ') || (src[i] == '\t')) {
                if (collect_sp) {
                    sp_tail += src[i];
                } else {
                    dest += src[i];
                }
            } else {
                if (collect_sp) {
                    dest += sp_tail;
                    collect_sp = false;
                }
                dest += src[i];
            }
        }
    }
    return dest;
}


enum EncodingType {
    UNKNOWN,  // illegal type.
    QUOTED_PRINTABLE,
    BASE64
};

inline bool is_whitespace(const char c)
{
    return (c==' ' || c=='\t' || c=='\n');  // and what about '\r'?
}


inline bool
readEncodedWord(const std::string& str,
                std::string::size_type& position, // it's changing
                const std::string::size_type end,
                EncodedWord& encodedWord)
{
    encodedWord.clear();
    EncodingType encoding=UNKNOWN;
    std::string::size_type pos=position;
    position=end;  // changing position to the end of encoded word.
    if (end>str.length()) {
        return false;    // illegal call.
    }
    if (pos+3<end && '='==str[pos] && '?'==str[pos+1]) {
        pos+=2; // at charset beginning.
        const std::string::size_type charset_begin=pos;
        const std::string::size_type charset_end=str.find('?',charset_begin);
        if (charset_end!=std::string::npos && charset_end!=charset_begin && charset_end<end) {
            const std::string charset=str.substr(charset_begin,charset_end-charset_begin);
            pos=charset_end;
            if (pos+4<end && '?'==str[pos+2]) {
                ++pos; // at encoding character.
                if ('b'==str[pos] || 'B'==str[pos]) {
                    encoding=BASE64;
                } else if ('q'==str[pos] || 'Q'==str[pos]) {
                    encoding=QUOTED_PRINTABLE;
                }
                if (UNKNOWN!=encoding) {
                    pos+=2;  // at encoded-text beginning
                    const std::string::size_type word_begin=pos;
                    const std::string::size_type word_end=str.find("?=",word_begin);
                    if (word_end!=std::string::npos && word_end!=word_begin && word_end+2==end) {
                        std::string result_str=str.substr(word_begin,word_end-word_begin);
                        if (QUOTED_PRINTABLE==encoding) {
                            for (std::string::size_type i=0; i<result_str.length(); ++i) {
                                if ('_'==result_str[i]) {
                                    result_str[i]=' ';
                                }
                            }
                            encodedWord.set(charset,::decode_qp(result_str));
                            return true;
                        } else if (BASE64==encoding) {
                            encodedWord.set(charset,::decode_base64(result_str));
                            return true;
                        }
                    }
                }
            }
        }
    }
    return false; // in case of parsing error
}  // readEncodedWord();

// If we can, we split encodedWord with EncodedWord.back(), otherwise - push_back().
inline void addEncodedWord(std::vector<EncodedWord>& encodedWords,const EncodedWord& encodedWord)
{
    if (encodedWord.charset.empty() && encodedWord.word.empty()) {
        return;
    }
    if (!encodedWords.empty() &&
            (encodedWord.charset==encodedWords.back().charset ||
             encodedWord.charset.empty())) {
        encodedWords.back().word+=encodedWord.word;
        return;
    }
    if (!encodedWords.empty() && encodedWords.back().charset.empty()) {
        encodedWords.back().word+=encodedWord.word;
        encodedWords.back().charset=encodedWord.charset;
        return;
    }
    encodedWords.push_back(encodedWord);
} // addEncodedWord()

// define borders of encoded word.
// rapid check for encoded word possibility.
inline std::string::size_type
checkForEncodedWord(const std::string& str,
                    const std::string::size_type start)
{
    std::string::size_type pos=start;
    if (pos+2<str.length() && '='==str[pos] && '?'==str[pos+1]) {
        pos+=2;
        unsigned int interrogations_num=0;
        while (pos<str.length() && 3!=interrogations_num) {
            if ('?'==str[pos]) {
                ++interrogations_num;
            }
            ++pos;
        }
        if (3==interrogations_num && pos<str.length()) {
            if ('='==str[pos]) {
                return pos+1;
            }
        }
    }
    return std::string::npos;
}  //checkForEncodedWord();



/// decoding string with 'encoded-word's with possibility of different charsets.
/// return value can't be empty
std::vector<EncodedWord>
decode_rfc2047(const std::string& src)
{
    std::vector<EncodedWord> result;
    std::string::size_type pos=0;
    std::string::size_type rborder=0;
    std::string plain_text;
    std::string whitespace_tail;
    EncodedWord encodedWord;
    bool last_parsed_was_encoded_word=false;
    while (pos<src.length()) {
        rborder=checkForEncodedWord(src,pos);
        if (std::string::npos!=rborder && readEncodedWord(src,pos,rborder,encodedWord)) {
            if (last_parsed_was_encoded_word) {
                if (!plain_text.empty()) {
                    addEncodedWord(result,EncodedWord("",plain_text));
                }
            } else {
                if (!whitespace_tail.empty()) {
                    addEncodedWord(result,EncodedWord("",whitespace_tail));
                } else {
                    if (!plain_text.empty()) {
                        addEncodedWord(result,EncodedWord("",plain_text));
                    }
                }
            }
            whitespace_tail.clear();
            plain_text.clear();
            addEncodedWord(result,encodedWord);
            last_parsed_was_encoded_word=true;
        } else {
            if (pos>=src.length()) {
                break;
            }
            if ('\r'==src[pos] || '\n'==src[pos]) {
                // do nothing, skip this symbols, as is original function
            } else if (is_whitespace(src[pos])) {
                if (last_parsed_was_encoded_word) {
                    whitespace_tail+=src[pos];
                } else {
                    plain_text+=src[pos];
                }
            } else {
                if (last_parsed_was_encoded_word) {
                    last_parsed_was_encoded_word=false;
                    plain_text=whitespace_tail;
                    whitespace_tail.clear();
                }
                plain_text+=src[pos];
            }
            ++pos;
        }
    }
    if (!plain_text.empty()) {
        addEncodedWord(result,EncodedWord("",plain_text));
    }
    if (result.empty()) { // we must not return empty std::vector.
        result.push_back(EncodedWord());
    }
    return result;
}  // decode_rfc2047()

};
