
#include <saas/tools/distcl/lib/inddoc.h>

#include <saas/api/clientapi.h>
#include <saas/library/sharding/rules/urlhash.h>
#include <saas/util/hex.h>

#include <yweb/protos/indexeddoc.pb.h>
#include <yweb/realtime/distributor/client/distclient.h>

#include <library/cpp/getopt/last_getopt.h>
#include <library/cpp/getopt/modchooser.h>
#include <library/cpp/regex/pcre/regexp.h>
#include <library/cpp/protobuf/json/proto2json.h>

#include <google/protobuf/text_format.h>
#include <util/stream/file.h>
#include <util/string/cast.h>

using namespace NLastGetopt;

int main_move(int argc, const char **argv) {
    TOpts options = TOpts::Default();
    options.AddHelpOption();
    options.AddVersionOption();
    options.AddLongOption('i', "input-stream", "stream name").RequiredArgument("STRING").Required();
    options.AddLongOption('o', "output-stream", "stream name").RequiredArgument("STRING").Required();
    options.AddLongOption('a', "age", "max age to get").RequiredArgument("NUM").Required();
    options.AddLongOption('c', "count", "max document to get").RequiredArgument("NUM").Optional();
    options.SetFreeArgsNum(2);
    options.SetFreeArgTitle(0, "source", "the source distributor(s)");
    options.SetFreeArgTitle(1, "destination", "the destination distributor(s)");
    TOptsParseResult res(&options, argc, argv);

    {
        NRealTime::TDistributorClient input(res.GetFreeArgs()[0]);
        NRealTime::TDepositDistributorClient output(res.GetFreeArgs()[1]);

        input.SetAge(res.Get<ui32>("age"));
        input.SetStream(res.Get<TString>("input-stream"));

        const ui64 limit = res.Has("count") ? res.Get<ui64>("count") : Max<ui64>();
        ui64 count = 0;
        while (count < limit) {
            TString data;
            ui64 key;
            TVector<TString> attrs;
            while (input.GetNextRawRecord(data, nullptr, &key, &attrs) && (count < limit)) {
                NRTYServer::TMessage message;
                if (!message.ParseFromString(data)) {
                    Cerr << "skipped incorrect record" << Endl;
                    continue;
                }
                NRealTime::TIndexedDocDeposit deposit;
                deposit.SetDoc(data);
                deposit.SetUrlKey(key);
                deposit.SetUrlId(NSaas::GetActionDescription(message));
                deposit.SetVersion(NSaas::GetActionVersion(message));
                deposit.AddStream(res.Get<TString>("output-stream"));
                for (auto&& value : attrs) {
                    deposit.AddAttrs(value);
                }

                TString error;
                if (!output.Deposit(deposit)) {
                    Cerr << "Could not deposit " << NSaas::GetActionDescription(message) << ": " << error << Endl;
                } else {
                    count += 1;
                }
            }
            Sleep(TDuration::Seconds(1));
        }
    }

    return EXIT_SUCCESS;
}

bool ValidateOptsEx(TOptsParseResult& res) {
    if (res.Has("age") != res.Has("timestamp"))
        return true;

    try {
        TUsageException usage;
        usage << "At least one of the following options should be provided: -a (--age) or -t (--timestamp)";
        throw usage;
    }
    catch (...) {
        res.HandleError();
        return false;
    }
}

int main_find(int argc, const char** argv) {
    TIndDocOutput::TOptions inddocOpts;
    THolder<TIndDocOutput> inddoc;

    TOpts options = TOpts::Default();
    options.AddHelpOption();
    options.AddVersionOption();
    options.AddLongOption('c', "server", "distributor").RequiredArgument("SERVER").Required();
    options.AddLongOption('s', "stream", "distributor stream").RequiredArgument("STREAM").Required();
    options.AddLongOption('a', "age", "max age to get (in seconds)").OptionalArgument("NUM");
    options.AddLongOption('t', "timestamp", "max age to get (as distributor's timestamp)").OptionalArgument("TIME");
    options.AddLongOption('f', "format", "format to display output: (signature|binary|hr|json|inddoc)").Optional().DefaultValue("signature");
    options.AddLongOption("stop-at", "stop processing at the given distributor timestamp").OptionalArgument("TIME");
    options.AddLongOption("docs-dir", "output directory for inddoc files").StoreResult(&inddocOpts.OutputDir).DefaultValue(".");
    options.AddLongOption("docs-begin", "inddoc index range begin").StoreResult(&inddocOpts.NumBegin).DefaultValue("1");
    options.AddLongOption("docs-end", "inddoc index range end").StoreResult(&inddocOpts.NumEnd).DefaultValue("1000");
    options.SetFreeArgsNum(1);
    options.SetFreeArgTitle(0, "regexp", "regular expression to match");
    TOptsParseResult res(&options, argc, argv);
    if (!ValidateOptsEx(res))
        return EXIT_FAILURE;

    NRealTime::TDistributorClient input(res.Get<TString>("server"));
    input.SetSignaturesOnly(true);
    input.SetStream(res.Get<TString>("stream"));
    if (res.Has("age"))
        input.SetAge(res.Get<ui32>("age"));
    else
        input.SetAge((int)time(0) - res.Get<ui32>("timestamp") + 1);

    ui64 maxDistTime = res.GetOrElse<ui64>("stop-at", 0);

    TRegExMatch regexp;
    regexp.Compile(res.GetFreeArgs()[0]);

    const TString& format = res.Get<TString>("format");
    if (format == "inddoc") {
        inddoc = MakeHolder<TIndDocOutput>(inddocOpts);
    }

    while (true) {
        NRealTime::TDistributorClient::TSignature signature;
        while (input.GetNextDocSignature(signature)) {
            if (!!maxDistTime && signature.DistTime > maxDistTime)
                return EXIT_SUCCESS;

            if (!regexp.Match(signature.Id.data())) {
                continue;
            }

            if (format == "signature") {
                Cout << signature.DistTime << '\t' << signature.Id << '\t' << signature.Version << Endl;
                continue;
           }

            TString data;
            if (!input.SyncFetch(signature, data)) {
                Cerr << "cannot fetch data for " << signature.Id << ':' << signature.Version;
                continue;
            }
            if (format == "binary") {
                Cout << data;
            }

            NRTYServer::TMessage message;
            if (!message.ParseFromString(data)) {
                Cerr << "cannot parse data for " << signature.Id << ':' << signature.Version;
                continue;
            }
            if (format == "hr") {
                TString hr;
                Y_VERIFY(google::protobuf::TextFormat::PrintToString(message, &hr));
                Cout << hr << Endl;
                continue;
            }
            if (format == "inddoc") {
                if (!message.GetDocument().HasIndexedDoc())
                    continue; //skip TAction::atDelete and other non-document kinds

                auto fileName = inddoc->NextFile();
                if (!fileName.Defined())
                    return EXIT_SUCCESS; //requested number of documents was stored

                Cerr << *fileName << ": " << signature.Id;
                inddoc->Write(*fileName, message.GetDocument());
                continue;
            }
            if (format == "json") {
                Cout << NProtobufJson::Proto2Json(message) << Endl;
                continue;
            }

            throw yexception() << "unknown format: " << format;
        }
        Sleep(TDuration::Seconds(1));
    }

    return EXIT_SUCCESS;
}

int main_urlhash(int argc, const char** argv) {
    TOpts options = TOpts::Default();
    options.AddHelpOption();
    options.AddVersionOption();

    options.SetFreeArgsNum(1);
    options.SetFreeArgTitle(0, "url", "URL string to calculate hash from");
    TOptsParseResult res(&options, argc, argv);

    auto hash = NSaas::TUrlShardingRule::GetUrlShard(res.GetFreeArgs()[0], NSearchMapParser::SearchMapShards);
    Cout << "URL:          " << res.GetFreeArgs()[0] << Endl;
    Cout << "Shard index:  " << hash << " (0x" << Ui64ToHex(hash, 4) << ")" << Endl;

    return EXIT_SUCCESS;
}



int main(int argc, const char** argv) {
    TModChooser modChooser;
    modChooser.AddMode("move", main_move, "transfer documents from one distributor to another");
    modChooser.AddMode("find", main_find, "match document url by regexp");
    modChooser.AddMode("urlhash", main_urlhash, "calculate url hash and sharding");

    try {
        return modChooser.Run(argc, argv);
    } catch (...) {
        Cerr << "An exception has occurred: " << CurrentExceptionMessage() << Endl;
        return  EXIT_FAILURE;
    }
}
