#include "common_indexers_config.h"

#include "config.h"
#include "get_value.h"
#include "grouping_config.h"

#include <saas/rtyserver/logging/rty_index.h>
#include <saas/util/logging/exception_process.h>

#include <robot/library/oxygen/indexer/processor/protos/config.pb.h>

#include <ysite/parser/xml/xml_yndx_transform.h>

#include <library/cpp/html/zoneconf/ht_conf.h>
#include <library/cpp/mediator/global_notifications/system_status.h>

#include <util/folder/dirut.h>
#include <util/string/split.h>
#include <util/string/vector.h>

namespace NRTYServer {

TCommonIndexersConfig::TCommonIndexersConfig(const TRTYServerConfig& owner)
    : Owner(owner)
    , Enabled(true)
    , PauseOnStart(false)
    , UseSlowUpdate(false)
    , TokenizeUrl(true)
    , MadeUrlAttributes(true)
    , RejectDuplicates(false)
    , SaveDeletedDocuments(false)
    , ProtocolType("default")
    , TimestampControlEnabled(true)
    , StoreUpdateData(false)
    , StoreTextToArchive(true)
    , UseHTML5Parser(false)
{}

TCommonIndexersConfig::~TCommonIndexersConfig() = default;

void TCommonIndexersConfig::Init(const TYandexConfig::Section* section) {
    TYandexConfig::TSectionsMap children = section->GetAllChildren();
    TYandexConfig::TSectionsMap::const_iterator iter = children.find("HttpOptions");
    AssertCorrectConfig(iter != children.end(), "Common indexer section must contain HttpOptions section");
    ServerOptions.Init(iter->second->GetDirectives());
    iter = children.find("ZonesToProperties");
    if (iter != children.end())
        ZonesToProperties.Parse(iter->second->GetDirectives());
    iter = children.find("TextArchiveParams");
    if (iter != children.end()) {
        try {
            TextArchiveParams.Init(*iter->second);
        } catch (yexception&) {
            AssertCorrectConfig(false, "%s", CurrentExceptionMessage().data());
        }
    }
    Init(section->GetDirectives());
}

void TCommonIndexersConfig::Init(const TYandexConfig::Directives& directives) {
    GET_VALUE(HtmlParserConfigFile);
    GET_VALUE(ProtocolType);
    GET_VALUE(TimestampControlEnabled);
    GET_VALUE(StoreUpdateData);
    GET_VALUE(NGrammZones);
    TVector<TString> nGrammZonesVector = SplitString(NGrammZones, ",");
    for (auto& i : nGrammZonesVector) {
        TNGrammZoneInfo info;
        AssertCorrectConfig(info.Parse(i), "Incorrect NGrammZonesConfiguration");
        NGrammZonesVector[info.Name] = info;
    }
    GET_VALUE(StoreTextToArchive);
    GET_VALUE(UseHTML5Parser);
    GET_VALUE(DocsCountLimit);
    if (!!HtmlParserConfigFile) {
        // Validating config
        // TODO: think on created parser reusage
        TRY
            THtConfigurator temp;
            temp.Configure(HtmlParserConfigFile.data());
        CATCH_AND_RETHROW("while parsing Html parser config")
    }
    GET_VALUE(XmlParserConfigFile);
    if (!!XmlParserConfigFile) {
        // Validating config
        // TODO: think on created parser reusage
        TRY
            NXmlParser::TXmlConfigurator temp;
            temp.Configure(XmlParserConfigFile.data());
        CATCH_AND_RETHROW("while parsing Xml parser config")
    }
    GET_VALUE(IndexLog);
    if (int indexLogLevel; directives.GetValue("IndexLogLevel", indexLogLevel)) {
        IndexLogLevel = indexLogLevel;
    }
    Owner.DaemonConfig.StartLogging<TRTYIndexLog>(IndexLog, IndexLogLevel);
    GET_VALUE(DocProperty);
    GET_VALUE(RecognizeLibraryFile);

    if (RecognizeLibraryFile == "NOTSET") {
        RecognizeLibraryFile = "";
    } else {
        if (TFsPath(RecognizeLibraryFile).IsRelative()) {
            resolvepath(RecognizeLibraryFile, NFs::CurrentWorkingDirectory() + GetDirectorySeparator());
        }
        if (!RecognizeLibraryFile || !TFsPath(RecognizeLibraryFile).Exists())
            ythrow yexception() << "Morphology configuration is incorrect " << RecognizeLibraryFile;
    }
    if (TString storeDateAttrs; directives.GetValue("StoreDateAttrs", storeDateAttrs)) {
        StoreDateAttrsMsk = storeDateAttrs == "msk";
    }

    GET_VALUE(OxygenOptionsFile);
    GET_VALUE(UseSlowUpdate);
    GET_VALUE(TokenizeUrl);
    GET_VALUE(MadeUrlAttributes);
    GET_VALUE(Enabled);
    GET_VALUE(PauseOnStart);
    GET_VALUE(DefaultCharset);
    GET_VALUE(DefaultLanguage);
    GET_VALUE(DefaultLanguage2);
    GET_VALUE(RejectDuplicates);
    GET_VALUE(SaveDeletedDocuments);
    if (int indexDate; directives.GetValue("IndexDate", indexDate)) {
        IndexDate = indexDate;
    }
    GET_VALUE(NoNeededFreqKeys);
    GET_VALUE(BackwardCompatibleTokenization);

    GroupingConfig = MakeHolder<TGroupingConfig>(directives);
    OxygenOptions = ::GetOxygenOptions(OxygenOptionsFile, Owner.ConfigPatcher ? Owner.ConfigPatcher->GetPreprocessor() : nullptr);
}

void TCommonIndexersConfig::ToString(TStringOutput& so) const {
    so << "<Common>" << Endl;
    so << ServerOptions.ToString("HttpOptions") << Endl;
    so << ZonesToProperties.ToString("ZonesToProperties") << Endl;
    so << TextArchiveParams.ToString("TextArchiveParams") << Endl;
    so << "HtmlParserConfigFile : " << HtmlParserConfigFile << Endl
       << "XmlParserConfigFile : " << XmlParserConfigFile << Endl
       << "IndexLog : " << IndexLog << Endl
       << "DocProperty : " << DocProperty << Endl
       << "RecognizeLibraryFile : " << (!!RecognizeLibraryFile ? RecognizeLibraryFile : TString("NOTSET")) << Endl
       << "OxygenOptionsFile : " << OxygenOptionsFile << Endl;
    if (IndexLogLevel) {
        so << "IndexLogLevel : " << *IndexLogLevel << Endl;
    }
    if (!!GroupingConfig) {
        GroupingConfig->ToStringStream(so);
    }
    so << "Enabled : " << Enabled << Endl;
    so << "PauseOnStart : " << PauseOnStart << Endl;
    so << "UseSlowUpdate : " << UseSlowUpdate << Endl;
    so << "TokenizeUrl : " << TokenizeUrl << Endl;
    so << "MadeUrlAttributes : " << MadeUrlAttributes << Endl;
    so << "DefaultCharset : " << NameByCharset(DefaultCharset) << Endl;
    so << "DefaultLanguage : " << NameByLanguage(DefaultLanguage) << Endl;
    so << "DefaultLanguage2 : " << NameByLanguage(DefaultLanguage2) << Endl;
    so << "StoreDateAttrs : " << (StoreDateAttrsMsk ? "msk" : "") << Endl;
    so << "ProtocolType : " << ProtocolType << Endl;
    so << "TimestampControlEnabled : " << TimestampControlEnabled << Endl;
    so << "StoreUpdateData : " << StoreUpdateData << Endl;
    so << "NGrammZones : " << NGrammZones << Endl;
    so << "StoreTextToArchive: " << StoreTextToArchive << Endl;
    so << "UseHTML5Parser: " << UseHTML5Parser << Endl;
    so << "DocsCountLimit: " << DocsCountLimit << Endl;
    so << "RejectDuplicates : " << RejectDuplicates << Endl;
    so << "SaveDeletedDocuments : " << SaveDeletedDocuments << Endl;
    if(IndexDate)
        so << "IndexDate : " << *IndexDate << Endl;
    so << "NoNeededFreqKeys : " << NoNeededFreqKeys << Endl;
    so << "BackwardCompatibleTokenization : " << BackwardCompatibleTokenization << Endl;
    so << "</Common>" << Endl;
}

bool TCommonIndexersConfig::TNGrammZoneInfo::Parse(const TString& info) {
    TVector<TString> splitInfo = SplitString(info, "=", 0, KEEP_EMPTY_TOKENS);
    if (splitInfo.size() <= 1) {
        return false;
    }

    Name = splitInfo[0];

    if (!Name)
        return false;

    if (splitInfo.size() > 1 && !TryFromString(splitInfo[1], NBase))
        return false;

    return true;
}

}
