#include "facet_builder.h"

#include <dict/dictutil/str.h>

#include <library/cpp/logger/global/global.h>
#include <saas/rtyserver/search/fastarchive.h>

#include <util/string/builder.h>

void TFacetBuilder::OnBuildProperty(const char* name, const char* value) {
    TStringBuf property(name);
    if (!ForcedProperties.contains(property) && IgnoreProperty(property))
        return;

    if (value && *value) {
        TPropertyFacetHelper& helper = Result[property];
        ++helper.TimesUsed;

        if (helper.size() >= helper.MaxCateg)
            return;

        for (int c = 0, s = 0; true; c++) {
            char cv = *(value + c);
            if (cv == '\t' || cv == 0) {
                TString current(value + s, value + c);
                ReplaceAnyOf(current, ":;", ' ');
                helper[current]++;
                s = c + 1;
            }
            if (cv == 0)
                break;
        }
    }
}

TFacetBuilder::TFacetBuilder(const IArchiveData& ad, const IFastArchive* fa, FacetMode mode,
                             const TVector<TString>& forcedProperties,
                             const TVector<TString>& forcedFastProperties,
                             const TVector<TString>& ignoredProperties,
                             const TString& propertyPrefix)
    : AD(ad)
    , FA(fa)
    , IgnoredProperties(ignoredProperties.begin(), ignoredProperties.end())
    , PropertyPrefix(propertyPrefix)
    , ProcessedDocCount(0)
    , Mode(mode)
{
    PreprocessProperties(forcedProperties, ForcedProperties);
    PreprocessProperties(forcedFastProperties, ForcedFastProperties);
}

void TFacetBuilder::FillProperties(TSearcherProps* props) const {
    TVector<TString> properties;
    TVector<TString> fastProperties;
    for (THashMap<TString, TPropertyFacetHelper>::const_iterator i = Result.begin(), e = Result.end(); i != e; ++i) {
        if (INTERLEAVING == Mode && i->second.TimesUsed < ProcessedDocCount &&
            !ForcedProperties.contains(i->first) && !ForcedFastProperties.contains(i->first))
            continue;

        properties.push_back(i->first);
        if (i->second.Fast) {
            fastProperties.push_back(i->first);
        }

        TStringBuilder result;
        if (i->second.Top == DefaultTop) {
            for (TPropertyFacetHelper::const_iterator j = i->second.begin(), je = i->second.end(); j != je; ++j) {
                result << j->first << ':' << j->second << ';';
            }
        } else {
            typedef std::pair<ui32, TStringBuf> TFacetPair;
            TVector<TFacetPair> facets;
            facets.reserve(i->second.size());
            for (TPropertyFacetHelper::const_iterator j = i->second.begin(), je = i->second.end(); j != je; ++j) {
                facets.emplace_back(j->second, j->first);
            }
            Sort(facets.begin(), facets.end(), [](const TFacetPair& first, const TFacetPair& second) -> bool {
                return first.first > second.first;
            });

            for (ui32 f = 0; f < Min(facets.size(), (size_t)i->second.Top); ++f) {
                result << facets[f].second << ':' << facets[f].first << ';';
            }
        }
        props->Set(TString::Join("facet_", i->first), result);
    }

    if (properties.size()) {
        props->Set("rty_allfacets", JoinStrings(properties, ","));
    }
    if (fastProperties.size()) {
        props->Set("RTYServer.FastFacets.debug", JoinStrings(fastProperties, ","));
    }
}

void TFacetBuilder::ProcessDoc(ui32 docId, const NGroupingAttrs::TDocsAttrs& /*da*/) {
    if (ForcedProperties.size() || Mode != SINGLE) {
        ProcessArchive(docId);
    }

    if (ForcedFastProperties.size() && Mode == SINGLE && FA) {
        ProcessFastArchive(docId);
    }
    ++ProcessedDocCount;
}

TFacetBuilder::FacetMode TFacetBuilder::DetermineMode(const TString& mode) {
    if ("all" == mode)
        return ALL;
    if ("interleaving" == mode)
        return INTERLEAVING;

    return SINGLE;
}

void TFacetBuilder::ProcessArchive(ui32 docId) {
    try {
        TDocDescr descr;
        TBlob extInfo = AD.GetDocExtInfo(docId)->UncompressBlob();
        descr.UseBlob(extInfo.Data(), (unsigned int)extInfo.Size());
        descr.ConfigureDocInfos(this);
    } catch (std::bad_alloc& e) {
        ERROR_LOG << "Incorrect memory allocation: " << e.what() << Endl;
    }
}

void TFacetBuilder::ProcessFastArchive(ui32 docId) {
    const IFastArchiveDoc* doc = FA->GetDoc(docId);
    if (!doc)
        return;

    for (ui32 i = 0; i < doc->GetPropertiesCount(); ++i) {
        const TStringBuf name = doc->GetPropertyName(i);
        if (!ForcedFastProperties.contains(name) && IgnoreProperty(name))
            continue;

        TPropertyFacetHelper& helper = Result[name];
        helper.Fast = true;
        ++helper.TimesUsed;

        if (helper.size() >= helper.MaxCateg)
            continue;

        const auto& values = doc->GetPropertyValues(i);
        for (const TStringBuf& value : values)
            ++helper[value];
    }
}

bool TFacetBuilder::IgnoreProperty(const TStringBuf& property) const {
    return SINGLE == Mode || IgnoredProperties.contains(property) || !property.StartsWith(PropertyPrefix);
}

void TFacetBuilder::PreprocessProperties(const TVector<TString>& original, TSet<TString>& target) {
    for (auto&& i : original) {
        auto splitted = SplitString(i, ":", 0, KEEP_EMPTY_TOKENS);
        const TString& name = splitted[0];
        target.insert(name);

        if (splitted.size() > 1)
            TryFromString(splitted[1], Result[name].Top);

        if (splitted.size() > 2)
            TryFromString(splitted[2], Result[name].MaxCateg);
    }
}

TString TFacetBuilder::StripFacetOptions(const TString& facet) {
    const size_t p = facet.find(':');
    return p == TString::npos ? facet : TString(facet.data(), p);
}
