#include <iostream>
#include <sstream>
#include <codecvt>
#include <locale>
#include <algorithm>
#include <cwctype>
#include <boost/algorithm/string/trim.hpp>
#include <boost/format.hpp>
#include "make_search_helper.hpp"

namespace furita {
namespace processor {
namespace msq {

    bool in(const std::wstring& pattern, const wchar_t &c) {
        return std::any_of(pattern.begin(), pattern.end(), [&c](const wchar_t& pc) {
            return (c == pc);
        });
    }

    bool needEscape(const wchar_t &c) {
        return std::iswcntrl(c) || in(LR"(\"'*!^(){}[]:?~-+ )", c);
    }

    bool needDoubleEscape(const wchar_t &c) {
        return in(L"*?", c);
    }

    // Вся логика из MAILDEV-1208 и MAILDEV-1230 выпилина
    // теперь актуализировано до MAILDLV-2211
    std::string preparePattern(const std::string& pattern, bool doubleEscape = false) {
        std::wstring wpattern(std::wstring_convert<std::codecvt_utf8<wchar_t>>().from_bytes(pattern));
        std::wstring res = L"";
        for (wchar_t& c : wpattern) {
            if (needEscape(c)) {
                res += L"\\";
            }
            if (doubleEscape && needDoubleEscape(c)) {
                res += L"\\";
            }
            res += c;
        }
        return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(res);
    }

    inline bool is_address_field(const std::string& field) {
        return field == "from"
            || field == "to"
            || field == "cc"
            || field == "bcc"
            || field == "tocc";
    }

    std::string user_str(const std::string &uid) {
        std::string result;
        if (!uid.empty()) {
            result = "uid:" + uid;
        } else {
            throw std::runtime_error("'uid' should be specified");
        }
        return result;
    }

    void make_search_query_helper(const rules::condition &c, const std::string& uid, std::ostream &s)
    {
        std::string pattern = preparePattern(
            is_address_field(c.field) ? boost::trim_copy(c.pattern) : c.pattern,
            c.oper == rules::condition::oper_type::CONTAINS);

        // см. подробности тут: https://st.yandex-team.ru/MPROTO-3777#1506340936000
        if (c.field_type == "type" || (c.field_type == "flag" && c.field == "ya_systype")) {
            if (c.oper == rules::condition::oper_type::MATCHES) {
                s << "message_type:" << pattern;
            } else {
                throw std::runtime_error("Incorrect operation for " + c.field_type + "=" + c.field);
            }
        } else if (c.field_type == "flag" && c.field == "ya_syslabel") {
            s << "domain_label:" << pattern;
        } else if (c.field_type == "flag" && (c.field == "spam" || c.field == "clearspam" || c.field == "subscribes" || c.field == "nospam")) {
            s << "folder_type: spam"; // NB: "nospam" делается чз отрицание "spam" (c.neg == true)
        } else if (c.field_type == "flag" && c.field == "list") {
            s << "headers:\"X-Yandex-Spam: 2\"";
        } else if (c.field_type == "flag" && c.field == "all") {
            s << user_str(uid);
        } else if (c.field == "from" || c.field == "to" || c.field == "cc") {
            if (c.oper == rules::condition::oper_type::MATCHES) {
                s << "(hdr_" << c.field << "_email:\"" << pattern << "\" OR hdr_" << c.field
                  << "_display_name:\"" << pattern << "\")";
            } else if (c.oper == rules::condition::oper_type::CONTAINS) {
                s << "hdr_" << c.field << "_keyword:*" << pattern << "*";
            } else {
                throw std::runtime_error("Unsupported rule condition: field=" + c.field);
            }
        } else if (c.field == "tocc") {
            if (c.oper == rules::condition::oper_type::MATCHES) {
                s << "(hdr_to_email:\"" << pattern << "\" "
                        "OR hdr_to_display_name:\"" << pattern <<
                  "\" OR hdr_cc_email:\"" << pattern << "\" "
                          "OR hdr_cc_display_name:\"" << pattern <<
                  "\")";
            } else if (c.oper == rules::condition::oper_type::CONTAINS) {
                s << "(hdr_to_keyword:*" << pattern << "* OR" <<
                  " hdr_cc_keyword:*" << pattern << "*)";
            } else {
                throw std::runtime_error("Unsupported rule condition: field=" + c.field);
            }
        } else if (c.field == "subject") {
            if (c.oper == rules::condition::oper_type::MATCHES) {
                s << "hdr_subject:" << pattern;
            } else if (c.oper == rules::condition::oper_type::CONTAINS) {
                s << "hdr_subject_keyword:*" << pattern << "*";
            } else {
                throw std::runtime_error("Unsupported rule condition: field=" + c.field);
            }
        } else if (c.field == "body") {
            s << "(body_text:\"" << pattern << "\" OR pure_body:\""
              << pattern << "\")";
        } else if (c.field == "filename") {
            if (c.oper == rules::condition::oper_type::MATCHES) {
                s << "attachname:" << pattern;
            } else if (c.oper == rules::condition::oper_type::CONTAINS) {
                s << "(attachname:*" << pattern << "* OR attachname_keyword:*"
                  << pattern << "*)";
            } else {
                throw std::runtime_error("Unsupported rule condition: field=" + c.field);
            }
        } else if (c.field == "att") {
            s << "has_attachments:1";
        } else {
            if (c.oper == rules::condition::oper_type::MATCHES) {
                s << "headers:\"" << c.field << ": " << pattern << "\"";
            } else if (c.oper == rules::condition::oper_type::CONTAINS) {
                s << "headers:" << c.field << "\\:\\ *" << pattern << "*";
            } else if (c.oper == rules::condition::oper_type::EXISTS) {
                s << "headers:" << c.field << "\\:*";
            } else {
                throw std::runtime_error("Unsupported rule condition: field=" + c.field);
            }
            if (c.neg) {
                s << " AND NOT hid:0";
            }
        }
    }

std::string make_search_query(const rules::rule_ptr &rule, const std::string &uid)
{
    std::stringstream s;
    // см. ниже
    // флаги "спам", "не спам" учитываются только в одном экземпляре и только с операцией AND
    bool spamFound(false), nospamFound(false);

    std::vector<rules::condition_ptr> and_conditions, or_conditions;

    for (const rules::condition_ptr &c : *rule->conditions) {
        /* Выпилена в MAILDEV-1017, таск на разбор полётов: MAILDEV-1031
        if (c->field == "virus" ||
            c->field == "clearspam" ||
            c->field == "spam" ||
            c->field == "subscribes" ||
            c->field == "list")
        {
            throw std::runtime_error("Unsupported rule condition: field=" + c->field);
        }
        */

        if (c->field_type == "flag" &&
                (c->field == "spam" ||
                 c->field == "clearspam" ||
                 c->field == "subscribes" ||
                 c->field == "list" ||
                 c->field == "all"))
        {
            if (!spamFound) {
                c->neg = false; // НЕ СПАМ можно задать только чз "nospam" (отрицание игнорируется)
                and_conditions.push_back(c);
                spamFound = true;
            }
        } else if (c->field_type == "flag" && c->field == "nospam") {
            if (!nospamFound) {
                c->neg = true; // СПАМ можно задать только чз "spam"|"clearspam"|"subscribes" (см. выше про спам)
                and_conditions.push_back(c);
                nospamFound = true;
            }
        } else if (c->field_type == "flag" && c->field == "virus") {
            c->neg = !c->neg;
            c->field = "all";
            and_conditions.push_back(c);
        } else if (c->pattern.empty() && c->field != "att" && c->oper != rules::condition::oper_type::EXISTS) {
            // ignore conditions with empty patterns for almost all cases except flags, att & EXISTS oper
        } else if (is_address_field(c->field) && boost::trim_copy(c->pattern).empty()) {
            // ignore whitespace-only patterns for address (from/to/cc/tocc) conditions
        } else if (c->link == rules::condition::link_type::AND) {
            and_conditions.push_back(c);
        } else {
            or_conditions.push_back(c);
        }
    }

    if (and_conditions.empty() && or_conditions.empty()) {
        throw std::runtime_error("Unsupported rule conditions: uid=" + uid + ", id=" + std::to_string(rule->id));
    }

    if (or_conditions.size() > 1) {
        s << "(";
    }
    for (unsigned long c = 0; c < or_conditions.size(); c++) {
        rules::condition_ptr current = or_conditions[c];
        if (c > 0) {
            s << " OR ";
        }
        if (current->neg) {
            s << "(" << user_str(uid) << " AND NOT ";
        }
        make_search_query_helper(*current, uid, s);
        if (current->neg) {
            s << ")";
        }
    }
    if (or_conditions.size() > 1) {
        s << ")";
    }

    for (unsigned long c = 0; c < and_conditions.size(); c++) {
        rules::condition_ptr current = and_conditions[c];
        if (c == 0 && or_conditions.empty()) {
            s << (current->neg ? user_str(uid) + " AND NOT " : "");
        } else {
            s << (current->neg ? " AND NOT " : " AND ");
        }
        make_search_query_helper(*current, uid, s);
    }

    if (s.str().empty()) {
        // empty query MUST NOT match, so make it looks like "uid:1 AND NOT uid:1"
        s << user_str(uid) << " AND NOT " << user_str(uid);
    }

    return s.str();
}


}   // namespace msq
}   // namespace processor
}   // namespace furita
