#include <library/cpp/http/fetch/exthttpcodes.h>
#include <library/cpp/http/misc/httpcodes.h>
#include <util/string/vector.h>
#include <util/string/split.h>

#include <wmconsole/version3/protos/urltree.pb.h>

#include "sitetree.h"

namespace NWebmaster {

const int TSiteTreeBuilder::ROOT_NODE_ID = 1;

TSiteTreeShard::TSiteTreeShard(const TVector<TTreeRecord> &siteStructure, TUserPatternMatcher::Ptr userPatternMatcher, ProtoSearchSource sourceId, ProtoSearchShard shardId)
    : SourceId(sourceId)
    , ShardId(shardId)
{
    LoadSiteTree(siteStructure, userPatternMatcher);

    auto it = IdNodeMap.find(TSiteTreeBuilder::ROOT_NODE_ID);

    if (it != IdNodeMap.end() && it->second.Get() != nullptr) {
        it->second->ExpandPath(TreePaths);
    }

    TCompactTrie<char>::TBuilder trieBuilder;
    for (size_t i = 0; i < TreePaths.size(); i++) {
        trieBuilder.Add(TreePaths[i].Path, i);
    }

    trieBuilder.Save(TrieStream);
    Trie.Init(TrieStream.Buffer().Data(), TrieStream.Buffer().Size());
}

void TSiteTreeShard::LoadSiteTree(const TVector<TTreeRecord> &siteStructure, TUserPatternMatcher::Ptr userPatternMatcher) {
    for (const TTreeRecord &record : siteStructure) {
        TWrapperNode::Ptr &node = IdNodeMap[record.NodeId];
        TWrapperNode::Ptr &parent = IdNodeMap[record.ParentId];

        if (!node.Get()) {
            node.Reset(new TWrapperNode);
        }

        if (!parent.Get()) {
            parent.Reset(new TWrapperNode);
        }

        if (node->Name.empty()) {
            node->Name = record.Name;
            node->Id = record.NodeId;
            node->ParentId = record.ParentId;
            node->Data = record.Data;
        }

        parent->Children.push_back(node);
    }

    if (userPatternMatcher.Get()) {
        for (const TUserPattern &pattern : userPatternMatcher->Patterns) {
            TWrapperNode::Ptr node(new TWrapperNode);
            node->Name = pattern.Pattern;
            node->Data = pattern.Data;
            UserPatterns.push_back(node);
        }

        UserPatternMatcher = userPatternMatcher->Matcher;
    }
}

bool TSiteTreeShard::IsThereRoot() const {
    auto it = IdNodeMap.find(TSiteTreeBuilder::ROOT_NODE_ID);

    if (it == IdNodeMap.end() || it->second.Get() == nullptr) {
        return false;
    }

    return true;
}

bool IsValidPrefix(const TString &path, size_t prefixLength) {
    using namespace NWmcCopyPaste::NParseSegmentsInner;

    if (path.size() < prefixLength) {
        ythrow yexception() << "prefix bigger than path";
    }

    if (path.size() == prefixLength) {
        return true;
    }

    if (prefixLength == 1) { //root
        return true;
    }

    return IsSeparator(path[prefixLength]);
}

void TSiteTreeShard::AppendCrawlerEvent(const TString &path, int httpCode, size_t countNew, size_t countChanged, size_t countCrawled) {
    for (TPrefixIterator it = MakePrefixIterator(Trie, path.data(), path.size()); it; ++it) {
        size_t filterNo;
        it.GetValue(filterNo);
        int matchedNodeId = TreePaths[filterNo].NodeId;
        TWrapperNode::Ptr &node = IdNodeMap[matchedNodeId];

        if (node.Get() == nullptr) {
            continue;
        }

        if (IsValidPrefix(path, it.GetPrefixLen())) {
            node->Data.CrawledHttpCodes[httpCode] += countCrawled;
            TCrawlerEventCounter &counter = node->Data.CrawledEvents;
            counter.New += countNew;
            counter.Changed += countChanged;
        }
    }

    if (UserPatternMatcher.Get() != nullptr && UserPatternMatcher->Matches(path)) {
        for (const size_t* match = UserPatternMatcher->Accepted.first; match < UserPatternMatcher->Accepted.second; ++match) {
            TWrapperNode::Ptr userFilter = UserPatterns[*match];

            if (userFilter.Get() == nullptr) {
                continue;
            }

            userFilter->Data.CrawledHttpCodes[httpCode] += countCrawled;
            TCrawlerEventCounter &counter = userFilter->Data.CrawledEvents;
            counter.New += countNew;
            counter.Changed += countChanged;
        }
    }
}

TSiteTree::TSiteTree(const TVector<TTreeRecord> &siteStructure, TUserPatternMatcher::Ptr userPatternMatcher) {
    Shards[std::make_pair(proto::urltree::PRODUCTION, proto::urltree::RU)].Reset(new TSiteTreeShard(siteStructure, userPatternMatcher, proto::urltree::PRODUCTION, proto::urltree::RU));
    Shards[std::make_pair(proto::urltree::ACCEPTANCE, proto::urltree::RU_ACCEPTANCE)].Reset(new TSiteTreeShard(siteStructure, userPatternMatcher, proto::urltree::ACCEPTANCE, proto::urltree::RU_ACCEPTANCE));
}

void TSiteTree::SerializeTurboSources(ProtoNode *node, TTreeData::TTurboSources &turboSource) const {
    for(const auto &obj : turboSource){
        ProtoTurboSource *turboSourceProto = node->add_turbo_source_info();
        turboSourceProto->set_source_id(obj.first);
        turboSourceProto->set_total(obj.second);
    }
}
void TSiteTree::SerializeHttpCodes(ProtoNode *node, TTreeData::THttpCodes &httpCodes) const {
    for (const auto &obj : httpCodes) {
        ProtoHttpCode *httpCodeProto = node->add_httpcodes();
        httpCodeProto->set_code(obj.first);
        httpCodeProto->set_total(obj.second);
    }
}

void TSiteTree::SerializeUrlStatuses(ProtoNode *node, TTreeData::TUrlStatuses &urlStatuses) const {
    for (const auto &obj : urlStatuses) {
        proto::urltree::UrlStatusInfo *urlStatusProto = node->add_url_statuses();
        urlStatusProto->set_url_status(obj.first);
        urlStatusProto->set_total(obj.second);
    }
}

void TSiteTree::Serialize(ProtoUrlTreeHost& HostProto) const {
    for (const auto &shardObj : Shards) {
        const TSiteTreeShard &shard = *shardObj.second;

        if (shard.IsThereRoot()) {
            for (const auto &nodeObj : shard.IdNodeMap) { // first = size_t, second = TWrapperNode::Ptr
                const TWrapperNode::Ptr &node = nodeObj.second;
                if (node.Get() == nullptr || node->Name.empty()) {
                    continue;
                }

                proto::urltree::NodeInfo *protoNode = HostProto.add_nodes();
                protoNode->set_search_source_id(shardObj.first.first);
                protoNode->set_shard_id(shardObj.first.second);
                protoNode->set_node_id(node->Id);
                protoNode->set_parent_id(node->ParentId);
                protoNode->set_name(node->Name);
                protoNode->set_num_of_docs(node->Data.NumOfDocs);
                protoNode->set_num_of_doubles(0);

                SerializeTurboSources(protoNode, node->Data.TurboSource);
                SerializeHttpCodes(protoNode, node->Data.HttpCodes);
                SerializeUrlStatuses(protoNode, node->Data.ExcludedUrlStatuses);

                protoNode->set_num_of_docs_on_search(node->Data.DocsOnSearch);
                protoNode->set_num_of_new_search_docs(node->Data.DocsSearchDiffNew);
                protoNode->set_num_of_gone_search_docs(node->Data.DocsSearchDiffGone);
            }
        }

        for (const TWrapperNode::Ptr &userNode : shard.UserPatterns) {
            if (userNode.Get() == nullptr) {
                continue;
            }

            proto::urltree::NodeInfo *protoNode = HostProto.add_user_nodes();
            protoNode->set_search_source_id(shardObj.first.first);
            protoNode->set_shard_id(shardObj.first.second);
            protoNode->set_node_id(0);
            protoNode->set_parent_id(0);
            protoNode->set_name(userNode->Name);
            protoNode->set_num_of_docs(userNode->Data.NumOfDocs);
            protoNode->set_num_of_doubles(0);

            SerializeTurboSources(protoNode, userNode->Data.TurboSource);
            SerializeHttpCodes(protoNode, userNode->Data.HttpCodes);
            SerializeUrlStatuses(protoNode, userNode->Data.ExcludedUrlStatuses);

            protoNode->set_num_of_docs_on_search(userNode->Data.DocsOnSearch);
            protoNode->set_num_of_new_search_docs(userNode->Data.DocsSearchDiffNew);
            protoNode->set_num_of_gone_search_docs(userNode->Data.DocsSearchDiffGone);
        }
    }
}

TUserPatternMatcher::TUserPatternMatcher(const TVector<TString> &filters) {
    for (const TString &filter : filters) {
        Patterns.push_back(TUserPattern(filter));
    }

    Matcher.Reset(new TMatcher(filters));
}

void TSiteTreeBuilder::UrlSegmentParser(const TString& path, TSiteTreeBuilder::TTreeBuilder::TParseResult& segments, bool /*needUrlNormalize*/) {
    using namespace NWmcCopyPaste::NParseSegmentsInner;

    TString::const_iterator i = path.begin();
    TString::const_iterator j = path.begin();
    TString::const_iterator end = path.end();

    bool prevSeparator = false;

    segments.clear();

    if (IsSeparator(*i)) {
        ++i;
        TString& segment = segments.emplace_back();
        segment.assign(j, i);
        prevSeparator = true;
    }

    while(i != end) {
        if (IsSeparator(*i)) {
            if (!prevSeparator) {
                TString& segment = segments.emplace_back();
                segment.assign(j, i);
            }

            j = i;
            prevSeparator = true;
        } else {
            prevSeparator = false;
        }

        i++;
    }

    if (!prevSeparator) {
        TString& segment = segments.emplace_back();
        segment.assign(j, i);
    }
}

TSiteTreeBuilder::TSiteTreeBuilder(ETreeBuildMode mode, size_t DefaultSliceSize, size_t DefaultCompressFactor, size_t DefaultMinNumOfUrlsInGroup)
    : TreeBuilder(
        DefaultSliceSize,
        DefaultCompressFactor,
        DefaultMinNumOfUrlsInGroup,
        false,
        mode == MODE_DEFAULT ? UrlSegmentParser : NWmcCopyPaste::ParseSegments<TString>
    )
    , BuilderState(IDLE)
{
}

void TSiteTreeBuilder::AddTurbo(TTreeData::TTurboSources &turboSource, size_t turboSourceFlags){
    if (turboSourceFlags & (turboSourceFlags - 1UL)  == 0) {
        turboSource[turboSourceFlags]++;
    } else {
        int sourceId = 1;
        while (turboSourceFlags){
            if (turboSourceFlags & 1UL){
                turboSource[sourceId]++;
            }
            sourceId <<= 1;
            turboSourceFlags >>= 1;
        }
    }
}

void TSiteTreeBuilder::AddPath(const proto::urltree::RecordSourceInfo &msg) {
    if (BuilderState == IDLE) {
        BuilderState = BUILDING;
    }

    if (BuilderState != BUILDING) {
        return;
    }

    TTreeData data;
    data.NumOfDocs = 1;

    for (int i = 0; i < msg.url_info_size(); i++) {
        data.HttpCodes[msg.url_info(i).http_code()] = 1;
        if (msg.url_info(i).has_jupiter_url_status_excluded()) {
            data.ExcludedUrlStatuses[msg.url_info(i).jupiter_url_status_excluded()] = 1;
        }
    }

    if (msg.has_search_base_acceptance()) {
        if (msg.search_base_acceptance().from_jupiter()) {
            data.DocsOnSearch++;
            if (msg.turbo_source_flags()) {
                AddTurbo(data.TurboSource,msg.turbo_source_flags());
            }
        }
        if (msg.search_base_acceptance().has_search_diff_status()) {
            if (msg.search_base_acceptance().search_diff_status() == proto::urltree::SEARCH_DIFF_STATUS_NEW) {
                data.DocsSearchDiffNew++;
            } else if (msg.search_base_acceptance().search_diff_status() == proto::urltree::SEARCH_DIFF_STATUS_GONE) {
                data.DocsSearchDiffGone++;
            }
        }
    }

    TreeBuilder.AddUrl(msg.path(), data);

    if (UserPatternMatcher.Get() != nullptr && UserPatternMatcher->Matches(msg.path())) {
        for (const size_t* match = UserPatternMatcher->Matcher->Accepted.first; match < UserPatternMatcher->Matcher->Accepted.second; ++match) {
            TUserPattern &part = UserPatternMatcher->Patterns[*match];
            part.Data.NumOfDocs++;
            for (int i = 0; i < msg.url_info_size(); i++) {
                part.Data.HttpCodes[msg.url_info(i).http_code()]++;
                if (msg.url_info(i).has_jupiter_url_status_excluded()) {
                    part.Data.ExcludedUrlStatuses[msg.url_info(i).jupiter_url_status_excluded()]++;
                }
            }

            if (msg.has_search_base_acceptance()) {
                if (msg.search_base_acceptance().from_jupiter()) {
                    part.Data.DocsOnSearch++;
                   if (msg.turbo_source_flags()) {
                        AddTurbo(part.Data.TurboSource,msg.turbo_source_flags());
                    }
                }
                if (msg.search_base_acceptance().has_search_diff_status()) {
                    if (msg.search_base_acceptance().search_diff_status() == proto::urltree::SEARCH_DIFF_STATUS_NEW) {
                        part.Data.DocsSearchDiffNew++;
                    } else if (msg.search_base_acceptance().search_diff_status() == proto::urltree::SEARCH_DIFF_STATUS_GONE) {
                        part.Data.DocsSearchDiffGone++;
                    }
                }
            }
        }
    }
}

void TSiteTreeBuilder::SetupUserPartitions(const TVector<TString> &filters) {
    UserPatternMatcher.Reset(new TUserPatternMatcher(filters));
}

void TSiteTreeBuilder::Commit(TVector<TTreeRecord> &siteStructure) {
    if (BuilderState != BUILDING) {
        return;
    }

    BuilderState = COMPLETED;

    TreeBuilder.CompleteBuild();

    TTreeRecordWriter writer(siteStructure);
    TTreeSaver<TBuilderNode, TTreeRecordWriter> saver(writer);
    saver.Save(TreeBuilder.GetRoot().Get());

    if (siteStructure.empty()) {
        TTreeData data(TreeBuilder.GetNumOfUrls());
        if (TreeBuilder.GetRoot()) {
            data.HttpCodes = TreeBuilder.GetRoot()->GetData().HttpCodes;
            data.ExcludedUrlStatuses = TreeBuilder.GetRoot()->GetData().ExcludedUrlStatuses;
            data.DocsOnSearch = TreeBuilder.GetRoot()->GetData().DocsOnSearch;
            data.DocsSearchDiffGone = TreeBuilder.GetRoot()->GetData().DocsSearchDiffGone;
            data.DocsSearchDiffNew = TreeBuilder.GetRoot()->GetData().DocsSearchDiffNew;
            data.TurboSource = TreeBuilder.GetRoot()->GetData().TurboSource;
        }
        TTreeRecord stubRoot("/", data, 1, 0);
        siteStructure.push_back(stubRoot);
    }
}

} //namespace NWebmaster
