#include "legacy_first_line.h"

#include <mail/butil/include/butil/StrUtils/utf8.h>

#include <boost/bind.hpp>
#include <boost/range.hpp>
#include <boost/iterator/counting_iterator.hpp>

#include <array>
#include <locale>
#include <sstream>

namespace NNotSoLiteSrv::NFirstline::NLib {


#define LINEMAXSIZE 400

const int recurse_limit = 50000;

namespace {

std::locale ruRU_utf8_loc;

int utf8_strlen(const char* it) {
    try {
        int l = 0;
        for (; *it; it = next_utf8_char(it)) {
            l++;
        }
        return l;
    } catch (const std::exception&) {
    }
    return 0;
}

int utf8_strlen(const char* b, const char* e) {
    try {
        return boost::distance(
            boost::make_iterator_range(make_utf8_ro_iterator(b),
                make_utf8_ro_iterator(e)));
    } catch (const std::exception&) {
    }
    return 0;
}


inline bool isspace_or_nonbreakspace(int i) {
    return std::isspace<wchar_t>(i, ruRU_utf8_loc) || i == 0x00A0;
}

inline ptrdiff_t byte_pref(const char* b, const char* e, ptrdiff_t n) {
    size_t sz = e - b;
    const char* e1 = sz ? next_utf8_char(b + std::max<int>(std::min<int>(n, sz) - 4, 0), b + std::min<int>(n, sz)) : b;
    const char* e2 = sz ? next_utf8_char(b + std::max<int>(std::min<int>(n, sz) - 1, 0), e) : b;
    auto left = boost::make_counting_iterator(make_utf8_ro_iterator(e1));
    auto right = boost::make_counting_iterator(make_utf8_ro_iterator(e2));
    auto less = [&b](auto x, auto y) {
        return std::distance(b, next_utf8_char(x.base())) < (y + 1);
    };
    auto iter = std::lower_bound(left, right, n + 1, less);
    return std::distance(b, iter->base());
}

} // namespace anonymous

CFirstLine::CFirstLine() {
    m_tables = 0;
    m_count1 = 0;
    m_count2 = 0;
    m_count3 = 0;
    m_count4 = 0;

    _fl_regexp_ini();
}

CFirstLine::~CFirstLine() {
    _fl_regexp_destroy();
}

int CFirstLine::FL_LoadRules(std::stringstream& ss) {
    if (m_count1 || m_count2 || m_count3 || m_count4) return 0;

    std::string line;
    int count = 0;
    int set = 1;
    int rule_sw = 0;

    while (std::getline(ss, line)) {
        int p1, p2;
        if (line[0] != '/') {
            continue;
        }

        if (line[1] == '/') {
            if (line[2] == '+') {
                rule_sw = 1;
            } else if (line[2] == '=') {
                rule_sw = 2;
            } else {
                set++;
                if (set == 5) {
                    break;
                }
                continue;
            }
        }

        if (_fl_regexp_match("^\\/(.+)\\/", line.data(), &p1, &p2)) {
            line[p2 - 1] = 0;
            _fl_add_rule(line.data() + p1 + 1, set, rule_sw);
            count++;
        }
    }
    return count;
}

int CFirstLine::FL_Find(const char * text, char * first_line, int buf_len) const {
    if (!m_count1 || !m_count2) {
        first_line[0] = 0;
        return 0;
    }

    int i, i4;
    int text_pos = 0;
    char line[LINEMAXSIZE];

    char line_fl1[LINEMAXSIZE];  // first line of the mail
    int  fl1 = 0;

    char line_fl2[LINEMAXSIZE];  // first line of the mail w/o quotation
    int  fl2 = 0;

    char line_prev[LINEMAXSIZE];  // previous line
    line_prev[0] = 0;

    const int len = std::max(0, std::min(LINEMAXSIZE, buf_len) - 1);

    if (buf_len > LINEMAXSIZE) {
        buf_len = LINEMAXSIZE;
    }

    int sw = 0;
    int sw_fl = 0;
    int sw_fl_is_ready = 0;

    first_line[0] = 0;

    try {
        int text_len = strlen(text);
        while (_fl_get_line(text, &text_pos, text_len, line, len)) {
            strncpy(line_prev, line, LINEMAXSIZE);

            if (!fl1) {
                strncpy(line_fl1, line, LINEMAXSIZE);
                fl1 = 1;
            }

            sw = 0;
            for (i = 0; i < m_count1; i++) {
                int rest = 0;
                int rest_size = 0;
                if (_fl_match(line, i, 1, rest, rest_size)) {
                    sw = 1;
                    break;
                }
            }
            if (sw) {
                continue;
            }

            sw = 0;
            for (i = 0; i < m_count4; i++) {
                int rest = 0;
                int rest_size = 0;
                if ((m_sw_rules[i] == 1) && _fl_match(line, i, 4, rest, rest_size)) {
                    int text_pos4 = text_pos;
                    char line4[LINEMAXSIZE];
                    while (_fl_get_line(text, &text_pos4, text_len, line4, len)) {
                        for (i4 = i + 1; i4 < m_count4; i4++) {
                            if (m_sw_rules[i4] == 2) {
                                break;
                            }
                        }
                        for ( ; i4 < m_count4; i4++) {
                            if (m_sw_rules[i4] != 2) {
                                break;
                            }
                            if  (_fl_match(line4, i4, 4, rest, rest_size)) {
                                sw = 1;
                                text_pos = text_pos4;
                                break;
                            }
                        }
                        if (sw) {
                            break;
                        }
                    }
                    sw = 1;
                }
            }
            if (sw) {
                continue;
            }

            sw = 0;
            for (i = 0; i < m_count3; i++) {
                int rest = 0;
                int rest_size = 0;
                if (_fl_match(line, i, 3, rest, rest_size)) {
                    sw = 1;
                    break;
                }
            }
            if (sw) {
                first_line[0] = 0;
                if (fl2) {
                    strncpy(first_line, line_fl2, buf_len - 1);
                }

                sw_fl_is_ready = 1;
                break;
            }

            if (!fl2) {
                strncpy(line_fl2, line, LINEMAXSIZE);
                fl2 = 1;
            }

            sw = 0;
            if (sw_fl) {
                strncpy(first_line, line, buf_len);
                sw_fl_is_ready = 1;
                break;
            }

            for (i = 0; i < m_count2; i++) {
                int rest = 0;
                int rest_size = 0;
                if (_fl_match(line, i, 2, rest, rest_size)) {
                    if (rest_size > 24) {
                        char * pos = strpbrk(line + rest, "!.");
                        int k = utf8_strlen(line + rest, pos);
                        if (pos && (rest_size - k > 20) && (k < 20) ) {
                            pos++;
                        } else {
                            pos = line + rest;
                        }
                        int sp = _fl_spaces(pos);

                        strncpy(first_line, pos + sp, buf_len);
                        sw_fl_is_ready = 1;
                        break;

                    }
                    sw = 1;
                    sw_fl = 1;
                    break;
                }
            }

            if (sw == 0) {
                if (!sw_fl_is_ready) {
                    strncpy(first_line, line, buf_len);
                    first_line[buf_len - 1] = 0;
                }
                break;
            }

        }

    } catch (const std::exception& e) {
        first_line[0] = 0;
        return 0;
    }

    first_line[buf_len - 1] = 0;

    return utf8_strlen(first_line);
}

int CFirstLine::_fl_regexp_ini() {
    m_tables = pcre_maketables();

    for (int i = 0; i < MAX_RULE_NUMBER; i++) {
        m_rules1[i] = 0;
        m_rules2[i] = 0;
        m_rules3[i] = 0;
        m_rules4[i] = 0;
    }
    return 1;
}

void CFirstLine::_fl_regexp_destroy() {
    for (int i = 0; i < MAX_RULE_NUMBER; i++) {
        if (m_rules1[i]) {
            free(m_rules1[i]);
            m_rules1[i] = 0;
        }
        if (m_rules2[i]) {
            free(m_rules2[i]);
            m_rules2[i] = 0;
        }
        if (m_rules3[i]) {
            free(m_rules3[i]);
            m_rules3[i] = 0;
        }
        if (m_rules4[i]) {
            free(m_rules4[i]);
            m_rules4[i] = 0;
        }
    }

    if (m_tables){
        free((void *)m_tables);
        m_tables = 0;
    }
}
int CFirstLine::_fl_regexp_match(const char * regexp, const char * str, int *pos1, int *pos2) {
    pcre *re = 0;
    const char *error;
    int erroroffset;
    int ovector[OVECSIZE];

    pcre_extra pe;
    pcre_extra* ppe = 0;

    pe.flags = PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    pe.match_limit_recursion = recurse_limit;
    ppe = &pe;

    re = pcre_compile(regexp, 0, &error, &erroroffset, 0);

    if (re ==0)
        return 0;

    int rc = pcre_exec(re, ppe, str, strlen(str), 0, 0, ovector, OVECSIZE);

    if (rc <= 0) {
        free(re);
        return 0;
    }

    for (int i = 0; i < rc; i++) {
        *pos1 = ovector[2 * i];
        *pos2 = ovector[2 * i + 1];
        break;
    }
    free(re);
    return 1;
}
int CFirstLine::_fl_add_rule(const char * str, int set, int sw) {
    const char *error;
    int erroroffset;
    int options = PCRE_CASELESS | PCRE_UTF8;

    pcre *re = pcre_compile(str, options, &error, &erroroffset, m_tables);
    if (re) {
        if (set == 1) {
            m_rules1[m_count1] = re;
            m_count1++;
        } else if (set ==2 ) {
            m_rules2[m_count2] = re;
            m_count2++;
        } else if (set ==3 ) {
            m_rules3[m_count3] = re;
            m_count3++;
        } else {
            m_rules4[m_count4] = re;
            m_sw_rules[m_count4] = sw;
            m_count4++;
        }
    }
    return 1;
}

int CFirstLine::_fl_match(char * str, int rule_n, int rule_set, int& rest, int& rest_size) const {
    rest = 0;
    rest_size = 0;

    pcre_extra pe;
    pcre_extra* ppe = 0;

    pe.flags = PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    pe.match_limit_recursion = recurse_limit;
    ppe = &pe;

    std::array<int, OVECSIZE> offsets;
    int count =
        pcre_exec(
            (rule_set == 1) ? m_rules1[rule_n] : ((rule_set == 2) ? m_rules2[rule_n] :((rule_set == 3)? m_rules3[rule_n] : m_rules4[rule_n])),
            ppe, str, strlen(str), 0, 0, offsets.data(), offsets.size());

    if (count >= 0) {
        rest = offsets[1];
        rest_size = utf8_strlen(str + offsets[1]);
        return 1;
    } else {
        return 0;
    }
}

int CFirstLine::_fl_get_line(const char * text, int *text_pos, int text_len, char *line, int len) const {
    const char* pos;
    if (*text_pos >= text_len) {
        return 0;
    }

    *text_pos += _fl_spaces(text + *text_pos);

    pos = strchr(text + *text_pos , '\n');
    if (pos) {
        const char* apos = (pos != text + *text_pos && *(pos - 1) == '\r' ? pos-1 : pos);
        int new_len = byte_pref(text + *text_pos, text + text_len, std::min<long int>(apos - (text + *text_pos), len - 1));
        if (new_len) {
            memcpy(line, text + *text_pos, new_len);
        }
        line[new_len] = 0;
        *text_pos = next_utf8_char(pos) - text;
    } else {
        int new_len = byte_pref(text + *text_pos, text + text_len, std::min<long int>(text_len - *text_pos, len - 1));
        if (new_len) {
            memcpy(line, text + *text_pos, new_len);
        }
        line[new_len] = 0;
        *text_pos = text_len;
    }
    return 1;
}

int CFirstLine::_fl_spaces(const char * text) const {
    const char* nt = text;
    for (; *nt && isspace_or_nonbreakspace(utf8_char_to_wchar(nt)); nt = next_utf8_char(nt)) {
        ;
    }
    return nt - text;
}

int LegacyFindFirstLine(const MsgText& msg, std::string& fline, const std::string& rules) {
    auto fl = std::make_unique<CFirstLine>();
    std::stringstream ss(rules);
    fl->FL_LoadRules(ss);
    char cfline[384];
    auto res = fl->FL_Find(msg.text(), cfline, sizeof(cfline));
    fline = cfline;
    return res;
}

}  // namespace NNotSoLiteSrv::NFirstline::NLib
