/*
 * tdictshinglerenv.cpp
 *
 *  Created on: 20 янв. 2016 г.
 *      Author: luckybug
 */

#include "tdictshinglerenv.h"
#include <util/stream/str.h>
#include <mail/so/spamstop/tools/so-common/shtime.h>

namespace DictionaryShingler {
    bool Parser::CreateRequestData(const request_vector_t& shingle_list, RequestType rqst_type, CheckINListStruct& check_res, ui32 priznak) {
        check_res.Clear();

        if (shingle_list.empty()) {
            check_res.empty = true;
            return false;
        }

        if (!IS_PERSB_DISABLE_CHECK_REQUEST(priznak)) {
            check_res.traccert_checkrequest_tick = CShingleTime::GetMs();
            checkRequest(shingle_list, rqst_type, priznak, check_res);
            check_res.traccert_checkrequest_tick = CShingleTime::GetMs() - check_res.traccert_checkrequest_tick;
        }

        if (!check_res.CheckOK()) {
            return false;
        }

        TStringStream request_str;
        {
            check_res.traccert_createrequest_tick = CShingleTime::GetMs();

            request_str << "42=";
            for (request_vector_t::const_iterator shingleIt = shingle_list.begin(); shingleIt != shingle_list.end(); ++shingleIt) {
                request_str << *shingleIt << ',';
            }
            request_str << '&';

            check_res.traccert_createrequest_tick = CShingleTime::GetMs() - check_res.traccert_createrequest_tick;
        }

        if (request_str.empty())
            return true;

        check_res.traccert_packrequest_tick = CShingleTime::GetMs();
        TString tstr_err = "";
        if (!CompressAndBase64(request_str.Str().c_str(), request_str.Str().length(), check_res.compress_request_data, tstr_err)) {
            check_res.compress_error = true;
            check_res.compress_request_data = "";

            if (!IS_PERSB_DISABLE_WRITE_CHECK_REQUEST_EXTERROR(priznak)) {
                check_res.error_res += check_res.rasdelitel + "COMPRESS_ERROR: " + tstr_err;
            }
            return false;
        }
        check_res.traccert_packrequest_tick = CShingleTime::GetMs() - check_res.traccert_packrequest_tick;

        return true;
    }

    bool Parser::ParseResponceData(const TString& compress_responce, request_hash_t& shingle_hash, ui32& err, CheckINListStruct& check_res) {
        bool res = false;
        bool warning_size = false;
        TString uncompress_responce = "";
        TString err_str = "";

        err = 0;
        check_res.traccert_unpackresponce_tick = CShingleTime::GetMs();
        if (Unbase64AndUncompress(compress_responce.c_str(), compress_responce.length(), warning_size, uncompress_responce, err_str)) {
            check_res.traccert_unpackresponce_tick = CShingleTime::GetMs() - check_res.traccert_unpackresponce_tick;

            check_res.traccert_parseresponce_tick = CShingleTime::GetMs();
            res = ParseShinglesData(uncompress_responce, shingle_hash);
            check_res.traccert_parseresponce_tick = CShingleTime::GetMs() - check_res.traccert_parseresponce_tick;

        } else {
            check_res.traccert_unpackresponce_tick = CShingleTime::GetMs() - check_res.traccert_unpackresponce_tick;
            if (warning_size)
                err = 1;
            else
                err = 2;
        }

        return res;
    }

    void Parser::checkRequest(const request_vector_t& shingle_list, RequestType /*rqst_type*/, ui32 priznak, CheckINListStruct& check_res) {
        for (request_vector_t::const_iterator shingleIt = shingle_list.begin(); shingleIt != shingle_list.end(); ++shingleIt) {
            ui64 shingle = shingleIt->GetShingle();
            //        const TString & word     = shingleIt->sWord();
            SpamType spamtype = shingleIt->GetSpamType();

            if (!shingleIt->IsValid()) {
                check_res.not_valid_count = IncMax32(check_res.not_valid_count, 1);
                if (!IS_PERSB_DISABLE_WRITE_CHECK_REQUEST_EXTERROR(priznak)) {
                    check_res.error_res += check_res.rasdelitel + "not_valid: " + ShingleToStroka(shingle);
                }

                continue;
            }
            if (!ReqSpamCompatible<RT_GET>(spamtype) && !ReqSpamCompatible<RT_VIRUS>(spamtype) && !ReqSpamCompatible<RT_FISHING>(spamtype) && !ReqSpamCompatible<RT_PUT>(spamtype) && !ReqSpamCompatible<RT_ABUSE>(spamtype) && !ReqSpamCompatible<RT_FISHING_YAN>(spamtype) && !ReqSpamCompatible<RT_HACKED>(spamtype)) {
                check_res.bad_spamtype_count = IncMax32(check_res.bad_spamtype_count, 1);
                if (!IS_PERSB_DISABLE_WRITE_CHECK_REQUEST_EXTERROR(priznak)) {
                    check_res.error_res += (check_res.error_res.empty() ? "" : check_res.rasdelitel) + "bad_spamtype: " + ShingleToStroka(shingle) + ", spamtype=" + SpamTypeToStroka(spamtype);
                }
            }
        }
        {
            std::map<ui64, size_t> counted_shingle_list;

            for (request_vector_t::const_iterator it = shingle_list.begin(); it != shingle_list.end(); ++it) {
                counted_shingle_list[it->GetShingle()]++;
            }

            for (std::map<ui64, size_t>::const_iterator it = counted_shingle_list.begin(); it != counted_shingle_list.end(); ++it) {
                ui64 shingle = it->first;
                size_t count = it->second;

                if (count <= 1)
                    continue;

                check_res.collision_count++;
                if (!IS_PERSB_DISABLE_WRITE_CHECK_REQUEST_EXTERROR(priznak)) {
                    check_res.error_res += (check_res.error_res.empty() ? "" : check_res.rasdelitel) + "collision: " + ShingleToStroka(shingle) + ", count=" + IntToStroka(count);
                }
            }

            if (check_res.collision_count)
                check_res.error_res += " total words " + IntToStroka(shingle_list.size());
        }
    }

    bool Parser::ParseShinglesData(const TString& responce, request_hash_t& shingle_hash) {
        shingle_hash.clear();

        bool res = true;
        if (responce.empty())
            return res;

        const char* pstart = responce.c_str(); //strstr(responce.c_str(), "\r\n\r\n"); //�������� ���������

        const char* end = nullptr;
        const char* begin = strchr(pstart, '<');
        if (begin != nullptr)
            end = strchr(begin, '>');

        while ((begin != nullptr) && (end != nullptr)) {
            //res = res + "<" + ShingleToStroka(m_shingle) + "-" + IntToStroka2(m_tyend) + ": v='01' empty='0' src='" + shingle_source + "' ";
            //res = res + "h='" + IntToStroka(hv1) + "-" + IntToStroka(hv2) + "-" + IntToStroka(hv3) + "-" + IntToStroka(hv4) + "-" + IntToStroka(hv5) + "-" + IntToStroka(hv6) + "-" + IntToStroka(hv7) + "-" + IntToStroka(hv8) + "-" + IntToStroka(hv9) + "-" + IntToStroka(hv10) + "-" + IntToStroka(hv11) + "-" + IntToStroka(hv12) + "-" + IntToStroka(hv13) + "-" + IntToStroka(hv14) + "-" + IntToStroka(hv15) + "-" + IntToStroka(hv16) + "' ";
            //res = res + ">";

            res &= ParseShingle(begin + 1, shingle_hash);

            pstart = end + 1;
            if (pstart == nullptr)
                break;

            end = nullptr;
            begin = strchr(pstart, '<');
            if (begin != nullptr)
                end = strchr(begin, '>');
        }
        return res;
    }

    template <size_t N>
    bool substrByTagEmpty(const char* src, const char (&tag)[N]) {
        const char* b = strstr(src, tag);
        if (!b)
            return true;
        b += sizeof(tag) - 1;
        const char* e = strchr(b, '\'');
        if (!e)
            return true;

        return b >= e;
    }

    //parse str in <...>
    bool Parser::ParseShingle(const TString& responce, request_hash_t& shingle_hash) {
        const char* src = responce.c_str();

        Request req;

        ui64& shingle = req.SetShingle();
        MailType& mailType = req.SetMailType();

        //sscanf(header, "%016lx-%i: v='%s' empty='%s' src='%s';", &shingle, &type, sVersion, sEmpty, shingleSource);

        {
            int assigments = sscanf(src, "%" PRIx64 "-%i", &shingle, (int*)&mailType);
            if (assigments != 2)
                return false;
        }
        {
            static const char vTag[] = "v='";
            if (substrByTagEmpty(src, vTag))
                return false;
        }
        {
            static const char eTag[] = "empty='";
            if (substrByTagEmpty(src, eTag))
                return false;
        }
        {
            static const char sTag[] = "src='";
            substrByTagEmpty(src, sTag);
        }

        Stat& stat = req.SetCounters();

        ParseStat(src, stat);

        shingle_hash.insert(request_hash_t::value_type(req.GetHash(), req));

        return true;
    }

    template <>
    bool Parser::ParseCounter<Stat::TODAY>(const char* src, Counters& c) {
        const char* ptr = strstr(src, TODAY_TAG "='");
        if (!ptr)
            return false;

        int assigments = sscanf(ptr, TODAY_TAG "='%u-%u-%u-%u-%u-%u-%u-%u-%u-%u-%u-%u'",
                                &c.setFirstTime(),
                                &c.setLastTime(),
                                &c.set<ST_HAM>(),
                                &c.set<ST_SPAM>(),
                                &c.set<ST_COMPLHAM>(),
                                &c.set<ST_COMPLSPAM>(),
                                &c.set<ST_VIRUS>(),
                                &c.set<ST_FISHING>(),
                                &c.set<ST_FISHING_YAN>(),
                                &c.set<ST_HACKED>(),
                                &c.set<ST_EXPERT_COMPLHAM>(),
                                &c.set<ST_EXPERT_COMPLSPAM>());
        if (assigments != Counters::fieldsCount)
            return false;
        return true;
    }
    template <>
    bool Parser::ParseCounter<Stat::YESTERDAY>(const char* src, Counters& c) {
        const char* ptr = strstr(src, YESTERDAY_TAG "='");
        if (!ptr)
            return false;

        int assigments = sscanf(ptr, YESTERDAY_TAG "='%u-%u-%u-%u-%u-%u-%u-%u-%u-%u-%u-%u'",
                                &c.setFirstTime(),
                                &c.setLastTime(),
                                &c.set<ST_HAM>(),
                                &c.set<ST_SPAM>(),
                                &c.set<ST_COMPLHAM>(),
                                &c.set<ST_COMPLSPAM>(),
                                &c.set<ST_VIRUS>(),
                                &c.set<ST_FISHING>(),
                                &c.set<ST_FISHING_YAN>(),
                                &c.set<ST_HACKED>(),
                                &c.set<ST_EXPERT_COMPLHAM>(),
                                &c.set<ST_EXPERT_COMPLSPAM>());
        if (assigments != Counters::fieldsCount)
            return false;
        return true;
    }
    template <>
    bool Parser::ParseCounter<Stat::HISTORY>(const char* src, Counters& c) {
        const char* ptr = strstr(src, HISTORY_TAG "='");
        if (!ptr)
            return false;

        int assigments = sscanf(ptr, HISTORY_TAG "='%u-%u-%u-%u-%u-%u-%u-%u-%u-%u-%u-%u'",
                                &c.setFirstTime(),
                                &c.setLastTime(),
                                &c.set<ST_HAM>(),
                                &c.set<ST_SPAM>(),
                                &c.set<ST_COMPLHAM>(),
                                &c.set<ST_COMPLSPAM>(),
                                &c.set<ST_VIRUS>(),
                                &c.set<ST_FISHING>(),
                                &c.set<ST_FISHING_YAN>(),
                                &c.set<ST_HACKED>(),
                                &c.set<ST_EXPERT_COMPLHAM>(),
                                &c.set<ST_EXPERT_COMPLSPAM>());
        if (assigments != Counters::fieldsCount)
            return false;
        return true;
    }

    bool Parser::ParseStat(const char* src, Stat& stat) {
        if (!ParseCounter<Stat::TODAY>(src, stat.set<Stat::TODAY>()))
            return false;
        if (!ParseCounter<Stat::YESTERDAY>(src, stat.set<Stat::YESTERDAY>()))
            return false;
        if (!ParseCounter<Stat::HISTORY>(src, stat.set<Stat::HISTORY>()))
            return false;
        if (!ParseFlags(src, stat.setLanguageRaw()))
            return false;

        return true;
    }

    bool Parser::ParseFlags(const char* src, ui64& flag) {
        const char* ptr = strstr(src, FLAGS_TAG "='");
        if (!ptr)
            return false;

        int assigments = sscanf(ptr, FLAGS_TAG "='%" PRIu64 "'", &flag);
        if (assigments != 1)
            return false;
        return true;
    }
}

NJsonWriter::TBuf& PrintJson(const DictionaryShingler::request_vector_t& request, NJsonWriter::TBuf &json) {
    json.BeginObject();

    json.WriteKey("shingles");
    json.BeginList();
    for(const DictionaryShingler::Request& r: request) {
        json.WriteULongLong(r.GetHash());
    }
    json.EndList();

    return json.EndObject();
}
