#include <util/digest/fnv.h>
#include <util/generic/size_literals.h>
#include <util/string/join.h>

#include <library/cpp/containers/comptrie/comptrie_trie.h>
#include <library/cpp/containers/comptrie/comptrie_builder.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>
#include <robot/jupiter/protos/acceptance.pb.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/turbo/library/types.h>
#include <wmconsole/version3/processors/turbo/protos/turbo.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/yt/transfer_manager.h>
#include <wmconsole/version3/wmcutil/url.h>

#include "config.h"
#include "stats.h"

using namespace NJupiter;

namespace NWebmaster {
namespace NTurbo {

TInputTag<TAcceptanceUrlForWebMasterRecord> JupiterSearchUrlsInputTag  (1);
TInputTag<NProto::TurboUrl> PreparedTurboUrlsInputTag                  (2);
TInputTag<NProto::TurboUrlSample> TurboUrlSamplesInputTag              (4);

TOutputTag<NProto::TurboUrl> PreparedTurboUrlsOutputTag                (1);
TOutputTag<NProto::TurboUrlSample> TurboUrlSamplesOutputTag            (2);
TOutputTag<NProto::TurboDomainStats> TurboDomainStatsOutputTag         (3);

namespace {

// Table attributes
const char *ATTR_STATS_SOURCE_DATE = "stats_source_date";
const char *ATTR_TURBO_URLS_DATE = "turbo_urls_date";
const char *ATTR_SEARCH_URLS_DATE = "search_urls_date";
const char *ATTR_TRANSFER_ID = "transfer_id";

// Source stats table fields
const char *FIELD_RAW_STATS_KEY = "key";
const char *FIELD_RAW_STATS_ACTION = "action";
const char *FIELD_RAW_STATS_META = "meta";
const char *FIELD_RAW_STATS_META_SOURCE = "source";
const char *FIELD_RAW_STATS_META_FEED = "feed_url";
const char *FIELD_RAW_STATS_META_TEST = "test";
const char *FIELD_RAW_STATS_META_ORIGINAL_URL = "original_url";

// Prepared stats table fields
const char *FIELD_PREPARED_DOMAIN = "Domain";
const char *FIELD_PREPARED_HOST = "Host";
const char *FIELD_PREPARED_PATH = "Path";
const char *FIELD_PREPARED_FEED = "Feed";
const char *FIELD_PREPARED_SOURCE = "Source";
const char *FIELD_PREPARED_ACTION = "Action";
const char *FIELD_PREPARED_COUNT = "Count";
const char *FIELD_SOURCE_FLAG = "SourceFlag";
const char *FIELD_SAMPLE_DOMAIN = "Domain";
const char *FIELD_SAMPLE_URL = "Url";

// Turbo page actions
const char *ACTION_MODIFY = "modify";

static const THashMap<TString,ui64> SOURCE_FLAG_CONVERTER = {
        {"autoparser", AUTOPARSER}, // autoparser
        {"autoparser_button", AUTOPARSER_BUTTON}, //disabled autoparser
        {"rss", RSS},
        {"yml", YML},
        {"api", API},
        {"yml_listing", YML_LISTING},
        {"auto_morda", AUTO_MORDA}
    };
}

struct TTurboStatsToPreparedMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TTurboStatsToPreparedMapper() = default;

    void Do(TReader *input, TWriter *output) override {
        const THashSet<TString> IGNORED_CGI_PARAMS = {"turboofferid"};
        NYT::TNode resultRow;
        NYT::TNode meta;
        TString host, path, source, feed;
        TString keyHost, keyPath;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            if (NYTUtils::GetNodeFieldOrDefault<TString>(row, FIELD_RAW_STATS_ACTION, "") != ACTION_MODIFY) {
                continue;
            }
            meta = row[FIELD_RAW_STATS_META];
            if (NYTUtils::GetNodeFieldOrDefault<bool>(meta, FIELD_RAW_STATS_META_TEST, false) == true) {
                // ignoring test records
                continue;
            }
            // check for normal url in key
            if (!NUtils::SplitUrl(NYTUtils::GetNodeFieldOrDefault<TString>(row, FIELD_RAW_STATS_KEY, ""), keyHost, keyPath)) {
                continue;
            }
            const TString key = NUtils::FilterCgiParams(IGNORED_CGI_PARAMS,
                NYTUtils::GetNodeFieldOrDefault<TString>(meta, FIELD_RAW_STATS_META_ORIGINAL_URL, ""));
            if (NUtils::SplitUrl(key, host, path) && NUtils::GetDomainWithoutPrefix(keyHost) == NUtils::GetDomainWithoutPrefix(host)) {
                source = NYTUtils::GetNodeFieldOrDefault<TString>(meta, FIELD_RAW_STATS_META_SOURCE, "");
                feed = NYTUtils::GetNodeFieldOrDefault<TString>(meta, FIELD_RAW_STATS_META_FEED, "");
                if (!SOURCE_FLAG_CONVERTER.contains(source)) {
                    continue;
                }
                resultRow[FIELD_PREPARED_HOST] = host;
                resultRow[FIELD_PREPARED_PATH] = path;
                resultRow[FIELD_PREPARED_FEED] = feed;
                resultRow[FIELD_PREPARED_SOURCE] = source;
                resultRow[FIELD_SOURCE_FLAG] = SOURCE_FLAG_CONVERTER.at(source);
                output->AddRow(resultRow);
            }
        }
    }
};

REGISTER_MAPPER(TTurboStatsToPreparedMapper)

/* stats without Jupiter */
struct TTurboStatsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TTurboStatsMapper() = default;

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            NYT::TNode meta = row[FIELD_RAW_STATS_META];
            if (NYTUtils::GetNodeFieldOrDefault<bool>(meta, FIELD_RAW_STATS_META_TEST, false) == true) {
                // ignoring test records
                continue;
            }
            TString key = NYTUtils::GetNodeFieldOrDefault<TString>(meta, FIELD_RAW_STATS_META_ORIGINAL_URL, "");
            THttpURL parsedUrl;
            if (NUtils::ParseUrl(parsedUrl, key)) {
                TString source = NYTUtils::GetNodeFieldOrDefault<TString>(meta, FIELD_RAW_STATS_META_SOURCE, "");
                TString feed = NYTUtils::GetNodeFieldOrDefault<TString>(meta, FIELD_RAW_STATS_META_FEED, "");
                if (!SOURCE_FLAG_CONVERTER.contains(source)) {
                    continue;
                }
                NYT::TNode resultRow;
                resultRow[FIELD_PREPARED_DOMAIN] = NUtils::FixDomainPrefix(parsedUrl.GetField(THttpURL::FieldHost));
                const TStringBuf &path = parsedUrl.GetField(THttpURL::FieldPath);
                const TStringBuf &query = parsedUrl.GetField(THttpURL::FieldQuery);
                TString fullPath(path);
                if (!query.empty()) {
                    fullPath = fullPath + "?" + query;
                }
                resultRow[FIELD_PREPARED_PATH] = fullPath;
                resultRow[FIELD_PREPARED_FEED] = source == "api" ? "" : feed;
                resultRow[FIELD_PREPARED_SOURCE] = source;
                resultRow[FIELD_PREPARED_ACTION] = row[FIELD_RAW_STATS_ACTION];
                output->AddRow(resultRow);
            }
        }
    }
};

REGISTER_MAPPER(TTurboStatsMapper)

/** Т.к. могут быть дубли среди original_url-ов, то надо их сперва вычистить */
struct TTurboStatsUniqueReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TTurboStatsUniqueReducer() = default;

    // Reduce by Domain, Source, Feed, Action
    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }
};

REGISTER_REDUCER(TTurboStatsUniqueReducer)

/** Stats without Jupiter */
struct TTurboStatsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TTurboStatsReducer() = default;

    // Reduce by Domain, Source, Feed, Action
    void Do(TReader *input, TWriter *output) override {
        NYT::TNode row = input->GetRow();
        size_t count = 0;
        for (; input->IsValid(); input->Next()) {
            count++;
        }
        output->AddRow(NYT::TNode()
            (FIELD_PREPARED_DOMAIN, row[FIELD_PREPARED_DOMAIN])
            (FIELD_PREPARED_SOURCE, row[FIELD_PREPARED_SOURCE])
            (FIELD_PREPARED_FEED, row[FIELD_PREPARED_FEED])
            (FIELD_PREPARED_ACTION, row[FIELD_PREPARED_ACTION])
            (FIELD_PREPARED_COUNT, count)
        );
    }
};

REGISTER_REDUCER(TTurboStatsReducer)

void PreprocessTurboStats() {
    const TConfig &config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_TURBO_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();

    auto sourceModTime = NYTUtils::GetModificationTime(client, config.TABLE_HISTORY_STATIC);
    auto targetTableSourceTime = NYTUtils::GetAttrOrDefault<time_t>(client, config.TABLE_TURBO_URLS, ATTR_STATS_SOURCE_DATE, 0);
    if (sourceModTime <= targetTableSourceTime) {
        LOG_INFO("Skipping update of preprocessed turbo stats table");
        return;
    }

    NYT::TRichYPath input(config.TABLE_HISTORY_STATIC);
    input.Schema(NYT::TTableSchema()
                      .AddColumn(FIELD_RAW_STATS_KEY, NYT::EValueType::VT_STRING)   // host of feed/autoparsed site
                      .AddColumn(FIELD_RAW_STATS_META, NYT::EValueType::VT_STRING)   // url path
                      .AddColumn(FIELD_RAW_STATS_ACTION, NYT::EValueType::VT_STRING)
    );

    NYT::TRichYPath output(config.TABLE_TURBO_URLS);
    output.Schema(NYT::TTableSchema()
                      .AddColumn(FIELD_PREPARED_HOST, NYT::EValueType::VT_STRING)   // host of feed/autoparsed site
                      .AddColumn(FIELD_PREPARED_PATH, NYT::EValueType::VT_STRING)   // url path
                      .AddColumn(FIELD_PREPARED_FEED, NYT::EValueType::VT_STRING)   // url path
                      .AddColumn(FIELD_PREPARED_SOURCE, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_SOURCE_FLAG, NYT::EValueType::VT_UINT64)
    );

    TMapCmd<TTurboStatsToPreparedMapper>(tx)
        .Input<NYT::TNode>(input)
        .Output<NYT::TNode>(output)
        .Do();

    TSortCmd<NYT::TNode>(tx)
        .Input<NYT::TNode>(output)
        .Output<NYT::TNode>(output)
        .By({FIELD_PREPARED_HOST, FIELD_PREPARED_PATH})
        .Do();

    NYTUtils::SetAttr(tx, config.TABLE_TURBO_URLS, ATTR_STATS_SOURCE_DATE, sourceModTime);
    NYTUtils::RemoveAttr(tx, config.TABLE_TURBO_URLS, ATTR_TRANSFER_ID);
    tx->Commit();
}

void TransferTurboHostsSearchUrls() {
    const TConfig &config = TConfig::CInstance();

    NYT::IClientPtr destClient = NYT::CreateClient(config.MR_JUPITER_SERVER_HOST);
    NYT::IClientPtr sourceClient = NYT::CreateClient(config.MR_TURBO_SERVER_HOST);

    TString destTable = config.TABLE_TURBO_URLS;
    TString sourceTable = config.TABLE_TURBO_URLS;

    LOG_INFO("Copying %s:%s to %s:%s", config.MR_TURBO_SERVER_HOST.c_str(), sourceTable.c_str(),
        config.MR_JUPITER_SERVER_HOST.c_str(), destTable.c_str());

    auto sourceTableTransferId =
        NYTUtils::GetAttrOrDefault<TString>(sourceClient, sourceTable, ATTR_TRANSFER_ID, "");
    auto destTableTransferId =
        NYTUtils::GetAttrOrDefault<TString>(destClient, destTable, ATTR_TRANSFER_ID, "");

    if (!sourceTableTransferId.Empty() && sourceTableTransferId == destTableTransferId) {
        LOG_INFO("Skipping transfer of turbo hosts search urls table");
        return;
    }
    TTransferManager tm(TConfigBase::GetYTToken());
    TString awaitTransferId;
    if (!sourceTableTransferId.Empty()) {
        // not empty and not equal to dest - maybe transfer still running?
        TTransferManager::TTaskInfo transferInfo;
        if (tm.GetTaskInfo(sourceTableTransferId, transferInfo)) {
            switch (transferInfo.State) {
                case TTransferManager::TTaskInfo::RUNNING:
                    awaitTransferId = sourceTableTransferId;
                    break;
                case TTransferManager::TTaskInfo::FAILED:
                    LOG_WARN("Prev task %s failed, going to start new one", sourceTableTransferId.data());
                    break;
                case TTransferManager::TTaskInfo::COMPLETED:
                    LOG_ERROR("Inconsistent state: tm task %s completed, but target table annotated with %s",
                              sourceTableTransferId.data(), destTableTransferId.data());
                    break;
                default:
                    ythrow yexception() << "Unknown transfer state " << (int) transferInfo.State;
            }
        }
    }
    if (awaitTransferId.Empty()) {
        awaitTransferId =
            tm.PostTask(config.MR_TURBO_SERVER_HOST, sourceTable, config.MR_JUPITER_SERVER_HOST, destTable);
        NYTUtils::SetAttr(sourceClient, sourceTable, ATTR_TRANSFER_ID, awaitTransferId);
    }
    LOG_INFO("Waiting for transfer manager task %s", awaitTransferId.data());
    tm.WaitForTaskDone(awaitTransferId);
    NYTUtils::SetAttr(destClient, destTable, ATTR_TRANSFER_ID, awaitTransferId);
}

// generating turbo hosts variations
struct TTurboUrlsGenerationMapper : public TTaggedMapper {
public:
    TTurboUrlsGenerationMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        const TVector<TString> prefixes = {"http://", "https://", "http://www.", "https://www.", "http://m.", "https://m."};
        for (; reader.IsValid(); reader.Next()) {
            NProto::TurboUrl turboUrl = reader.GetRow(PreparedTurboUrlsInputTag);
            turboUrl.SetOriginalHost(turboUrl.GetHost());
            TString domain = NUtils::GetDomainWithoutPrefix(turboUrl.GetHost());
            for (const TString &prefix : prefixes) {
                turboUrl.SetHost(prefix + domain);
                writer.AddRow(turboUrl, PreparedTurboUrlsOutputTag);
            }
        }
    }
};
REGISTER_MAPPER(TTurboUrlsGenerationMapper)

//turbo search urls join
struct TTurboSearchUrlsReducer : public TTaggedReducer {
public:
    TTurboSearchUrlsReducer() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        NProto::TurboUrl turboUrl;
        THashSet<TString> sources;
        TString domain, url, feed;
        bool searchable = false;
        i64 urlStatus = -1; // NOTHING_FOUND
        uint64_t sourceFlags = 0;
        for (; reader.IsValid(); reader.Next()) {
            if (reader.IsCurrentTable(JupiterSearchUrlsInputTag)) {
                TAcceptanceUrlForWebMasterRecord searchUrl = reader.GetRow(JupiterSearchUrlsInputTag);
                searchable = searchUrl.GetIsSearchable();
                urlStatus = searchUrl.GetUrlStatus();
            } else {
                turboUrl = reader.GetRow(PreparedTurboUrlsInputTag);
                if (domain.Empty()) {
                    domain = NUtils::GetDomainWithoutPrefix(turboUrl.GetHost());
                    url = turboUrl.GetOriginalHost() + turboUrl.GetPath();
                }
                sources.insert(turboUrl.GetSource());
                sourceFlags |= SOURCE_FLAG_CONVERTER.at(turboUrl.GetSource());
                if (feed.Empty()) {
                    feed = turboUrl.GetFeed(); // save only one feed
                }
            }
        }
        if (!domain.Empty()) {
            NProto::TurboUrlSample sample;
            sample.SetDomain(domain);
            sample.SetUrl(url);
            sample.SetFeed(feed);
            sample.SetSource(JoinSeq(",", sources));
            sample.SetIsSearchable(searchable);
            sample.SetUrlStatus(urlStatus);
            sample.SetSourceFlags(sourceFlags);
            writer.AddRow(sample, TurboUrlSamplesOutputTag);
        }
    }
};
REGISTER_REDUCER(TTurboSearchUrlsReducer)

struct TTurboSearchUrlsUniqueReducer : public TTaggedReducer {
public:
    TTurboSearchUrlsUniqueReducer() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        NProto::TurboUrlSample result = reader.GetRow(TurboUrlSamplesInputTag);
        reader.Next();

        for (; reader.IsValid(); reader.Next()) {
            const NProto::TurboUrlSample &sample = reader.GetRow(TurboUrlSamplesInputTag);
            // any url with normal status is better
            if (sample.GetIsSearchable() || result.GetUrlStatus() == -1) {
                result.SetUrlStatus(sample.GetUrlStatus());
                result.SetIsSearchable(sample.GetIsSearchable());
            }
            result.SetSourceFlags(result.GetSourceFlags() | sample.GetSourceFlags());
        }
        writer.AddRow(result, TurboUrlSamplesOutputTag);
    }
};
REGISTER_REDUCER(TTurboSearchUrlsUniqueReducer)

struct TTurboSearchStatsReducer : public TTaggedReducer {

public:
    TTurboSearchStatsReducer() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TString domain;
        TString turboPath, lastTurboPath, lastNotAutoparserPath;
        TString turboSource;
        uint64_t turboUrls = 0;
        uint64_t turboSearchUrls = 0;
        uint64_t turboSearchUrlsNoAP = 0;
        uint64_t turboSourceFlags;
        THashMap<uint64_t, uint64_t> turboSourceCount;
        for (; reader.IsValid(); reader.Next()) {
            NProto::TurboUrlSample sample = reader.GetRow(TurboUrlSamplesInputTag);
            if (domain.Empty()) {
                domain = sample.GetDomain();
            }
            turboUrls++;
            if (!sample.GetIsSearchable()) {
                continue;
            }
            turboPath = sample.GetUrl();
            turboSourceFlags = sample.GetSourceFlags();
            if ((turboSourceFlags & (turboSourceFlags - 1)) == 0){
                turboSourceCount[turboSourceFlags]++;
            } else {
                int sourceId = 1;
                while (turboSourceFlags) {
                    if (turboSourceFlags & 1UL) {
                        turboSourceCount[sourceId]++;
                    }
                    sourceId <<= 1;
                    turboSourceFlags >>= 1;
                }
            }

            if (turboPath != lastNotAutoparserPath) {
                if ((turboSourceFlags & AUTOPARSER) == 0 && (turboSourceFlags & AUTOPARSER_BUTTON) == 0) {
                    turboSearchUrlsNoAP++;
                    lastNotAutoparserPath = turboPath;
                }
            }
            if (turboPath != lastTurboPath) {
                turboSearchUrls++;
                lastTurboPath = turboPath;
            }
        }
        NProto::TurboDomainStats stats;
        stats.SetDomain(domain);
        stats.SetTurboUrls(turboUrls);
        stats.SetTurboSearchUrls(turboSearchUrls);
        stats.SetTurboSearchUrlsNoAP(turboSearchUrlsNoAP);
        stats.SetAutoparsedUrls(0);
        if (turboSourceCount.contains(AUTOPARSER)) {
            stats.SetAutoparsedUrls(turboSourceCount.at(AUTOPARSER));
        }
        stats.SetAutoparsedButtonUrls(0);
        if (turboSourceCount.contains(AUTOPARSER_BUTTON)) {
            stats.SetAutoparsedButtonUrls(turboSourceCount.at(AUTOPARSER_BUTTON));
        }
        stats.SetRssUrls(0);
        if (turboSourceCount.contains(RSS)) {
            stats.SetRssUrls(turboSourceCount.at(RSS));
        }
        stats.SetYmlUrls(0);
        if (turboSourceCount.contains(YML)) {
            stats.SetYmlUrls(turboSourceCount.at(YML));
        }
        stats.SetApiUrls(0);
        if (turboSourceCount.contains(API)) {
            stats.SetApiUrls(turboSourceCount.at(API));
        }
        stats.SetYmlListingUrls(0);
        if (turboSourceCount.contains(YML_LISTING)) {
            stats.SetYmlListingUrls(turboSourceCount.at(YML_LISTING));
        }
        stats.SetAutoMordaUrls(0);
        if (turboSourceCount.contains(AUTO_MORDA)) {
            stats.SetAutoMordaUrls(turboSourceCount.at(AUTO_MORDA));
        }
        writer.AddRow(stats, TurboDomainStatsOutputTag);
    }
};

REGISTER_REDUCER(TTurboSearchStatsReducer)

void EvaluateTurboStats() {
    const TConfig &config = TConfig::CInstance();

    NYT::IClientPtr client = NYT::CreateClient(config.MR_JUPITER_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();

    // search urls table
    TString searchUrlsTablePath = GetJupiterAcceptanceInProdTable(tx);
    TString turboUrlsTablePath = config.TABLE_TURBO_URLS;

    TString samplesTablePath = config.TABLE_TURBO_URLS_SEARCH_SAMPLES;
    TString statsTablePath = config.TABLE_TURBO_URLS_SEARCH_STATS;

    // check for changes
    auto destTableTurboDate = NYTUtils::GetAttrOrDefault<time_t>(tx, samplesTablePath, ATTR_TURBO_URLS_DATE, 0);
    auto destTableSearchDate = NYTUtils::GetAttrOrDefault<TString>(tx, samplesTablePath, ATTR_SEARCH_URLS_DATE, "");
    auto tableTurboDate = NYTUtils::GetModificationTime(tx, turboUrlsTablePath);
    auto tableSearchDate = GetJupiterStateFromPath(searchUrlsTablePath);
    if (tableTurboDate <= destTableTurboDate && tableSearchDate <= destTableSearchDate) {
        LOG_INFO("Skipping evaluation of search turbo stats");
        return;
    }

    auto turboUrlsTable = TTable<NProto::TurboUrl>(tx, turboUrlsTablePath);
    auto turboUrlsIntmTable = TTable<NProto::TurboUrl>(tx, turboUrlsTablePath + ".intm");
    auto searchUrlsTable = TTable<TAcceptanceUrlForWebMasterRecord>(tx, searchUrlsTablePath)
        .SelectFields({"Host", "Path", "IsSearchable", "UrlStatus"});


    auto samplesTable = TTable<NProto::TurboUrlSample>(tx, samplesTablePath);
    auto statsTable = TTable<NProto::TurboDomainStats>(tx, statsTablePath);

    LOG_INFO("Generating turbo urls with schema/www/m variations");
    TMapCmd<TTurboUrlsGenerationMapper>(tx)
        .Input(turboUrlsTable, PreparedTurboUrlsInputTag)
        .Output(turboUrlsIntmTable, PreparedTurboUrlsOutputTag)
        .Do();

    LOG_INFO("Sorting turbo search url samples before reduce by Url");
    TSortCmd<NProto::TurboUrl>(tx, turboUrlsIntmTable)
        .By({FIELD_PREPARED_HOST, FIELD_PREPARED_PATH})
        .Do();

    LOG_INFO("Joining turbo url with jupiter search urls");
    TReduceCmd<TTurboSearchUrlsReducer>(tx)
        .Input(searchUrlsTable.AsForeign(), JupiterSearchUrlsInputTag)
        .Input(turboUrlsIntmTable.AsPrimary(), PreparedTurboUrlsInputTag)
        .Output(samplesTable, TurboUrlSamplesOutputTag)
        .JoinBy({FIELD_PREPARED_HOST, FIELD_PREPARED_PATH})
        .ReduceBy({FIELD_PREPARED_HOST, FIELD_PREPARED_PATH})
        .DataSizePerJob(8_GB)
        .CpuLimit(0.4)
        .Do();

    LOG_INFO("Sorting turbo search url samples before reduce by Url");
    TSortCmd<NProto::TurboUrlSample>(tx, samplesTable)
        .By({FIELD_SAMPLE_URL})
        .Do();

    LOG_INFO("Reducing turbo search urls by original url");
    TReduceCmd<TTurboSearchUrlsUniqueReducer>(tx)
        .Input(samplesTable, TurboUrlSamplesInputTag)
        .Output(samplesTable, TurboUrlSamplesOutputTag)
        .ReduceBy({FIELD_SAMPLE_URL})
        .Do();

    LOG_INFO("Sorting turbo search url samples for stats aggregation");
    TSortCmd<NProto::TurboUrlSample>(tx, samplesTable)
        .By({FIELD_SAMPLE_DOMAIN, FIELD_SAMPLE_URL})
        .Do();

    // calc stats
    LOG_INFO("Reducing turbo search url stats");
    TReduceCmd<TTurboSearchStatsReducer>(tx)
        .Input(samplesTable, TurboUrlSamplesInputTag)
        .Output(statsTable, TurboDomainStatsOutputTag)
        .ReduceBy({FIELD_SAMPLE_DOMAIN})
        .Do();

    TSortCmd<NProto::TurboDomainStats>(tx, statsTable)
        .By({FIELD_SAMPLE_DOMAIN})
        .Do();

    NYTUtils::SetAttr(tx, samplesTablePath, ATTR_TURBO_URLS_DATE, tableTurboDate);
    NYTUtils::SetAttr(tx, samplesTablePath, ATTR_SEARCH_URLS_DATE, tableSearchDate);
    tx->Commit();
}

bool CheckForNewJupiterBase() {
    const TConfig &config = TConfig::CInstance();

    NYT::IClientPtr client = NYT::CreateClient(config.MR_JUPITER_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();

    // search urls table
    TString searchUrlsTable = GetJupiterAcceptanceInProdTable(tx);
    TString samplesTablePath = config.TABLE_TURBO_URLS_SEARCH_SAMPLES;

    // check for changes
    auto destTableSearchDate = NYTUtils::GetAttrOrDefault<TString>(tx, samplesTablePath, ATTR_SEARCH_URLS_DATE, "");
    auto tableSearchDate = GetJupiterStateFromPath(searchUrlsTable);
    if (tableSearchDate <= destTableSearchDate) {
        LOG_INFO("No fresh jupiter data. Skipping");
        return false;
    }
    return true;
}

int ProcessSearchTurboStats(int, const char **) {
    if (CheckForNewJupiterBase()) {
        PreprocessTurboStats();
        TransferTurboHostsSearchUrls();
        EvaluateTurboStats();
    }
    return 0;
}

int ProcessTurboStats(int, const char **) {
    const TConfig &config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_TURBO_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();
    LOG_INFO("Processing turbo stats (without Jupiter)");

    NYT::TRichYPath input(config.TABLE_HISTORY_STATIC);
    input.Schema(NYT::TTableSchema()
                      .AddColumn(FIELD_RAW_STATS_KEY, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_RAW_STATS_META, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_RAW_STATS_ACTION, NYT::EValueType::VT_STRING)
    );

    NYT::TRichYPath intermediate(config.TABLE_TURBO_URLS + ".intm");
    intermediate.Schema(NYT::TTableSchema()
                      .AddColumn(FIELD_PREPARED_DOMAIN, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_PATH, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_SOURCE, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_FEED, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_ACTION, NYT::EValueType::VT_STRING)
    );

    NYT::TRichYPath output(config.TABLE_TURBO_URLS_STATS);
    output.Schema(NYT::TTableSchema()
                      .AddColumn(FIELD_PREPARED_DOMAIN, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_SOURCE, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_FEED, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_ACTION, NYT::EValueType::VT_STRING)
                      .AddColumn(FIELD_PREPARED_COUNT, NYT::EValueType::VT_UINT64)
    );

    LOG_INFO("Processing history_static and removing duplicates");
    TMapReduceCmd<TTurboStatsMapper, TTurboStatsUniqueReducer>(tx)
        .Input<NYT::TNode>(input)
        .Output<NYT::TNode>(intermediate)
        .ReduceBy({FIELD_PREPARED_DOMAIN, FIELD_PREPARED_PATH, FIELD_PREPARED_SOURCE})
        .Do();

    LOG_INFO("Sorting before aggregation");
    TSortCmd<NYT::TNode>(tx)
        .Input<NYT::TNode>(intermediate)
        .Output<NYT::TNode>(intermediate)
        .By({FIELD_PREPARED_DOMAIN, FIELD_PREPARED_SOURCE, FIELD_PREPARED_FEED, FIELD_PREPARED_ACTION})
        .Do();

    LOG_INFO("Aggregating stats");
    TReduceCmd<TTurboStatsReducer>(tx)
        .Input<NYT::TNode>(intermediate)
        .Output<NYT::TNode>(output)
        .ReduceBy({FIELD_PREPARED_DOMAIN, FIELD_PREPARED_SOURCE, FIELD_PREPARED_FEED, FIELD_PREPARED_ACTION})
        .Do();

    LOG_INFO("Sorting result (chunk optimization)");
    TSortCmd<NYT::TNode>(tx)
        .Input<NYT::TNode>(output)
        .Output<NYT::TNode>(output)
        .By({FIELD_PREPARED_DOMAIN, FIELD_PREPARED_SOURCE, FIELD_PREPARED_FEED, FIELD_PREPARED_ACTION})
        .Do();

    LOG_INFO("Finished successfully");
    tx->Commit();
    return 0;
}

} //namespace NTurbo
} //namespace NWebmaster

