#include <travel/avia/analytics/flight_timetable_comparison/proto/flight_info.pb.h>

#include <library/cpp/getopt/last_getopt.h>
#include <library/cpp/logger/global/global.h>
#include <library/cpp/scheme/scheme.h>

#include <mapreduce/yt/client/client.h>

#include <util/datetime/base.h>
#include <util/generic/hash.h>
#include <util/generic/string.h>
#include <util/string/builder.h>
#include <util/generic/yexception.h>
#include <util/system/guard.h>
#include <util/stream/file.h>
#include <util/thread/pool.h>

namespace {
    TInstant ParseDateTime(TStringBuf buf) {
        auto res = TInstant::FromValue(0);
        if (TInstant::TryParseIso8601(buf, res)) {
            return res;
        }
        return TInstant::FromValue(0);
    }

}

using TPointId = i64;

class TRouteStorage {
public:
    using TRouteId = ui32;
    static const TRouteId NotFound = -1;
    static const TString RouteNotFound;

    TRouteStorage() = default;
    TRouteId GetOrCreateRouteId(const TString& route) {
        auto iter = Storage.find(route);
        if (iter == Storage.end()) {
            TGuard<TSpinLock> guard(Lock);
            iter = Storage.find(route);
            if (iter == Storage.end()) {
                auto size = Storage.size();
                Storage[route] = size;
                RouteById[size] = route;
                return size;
            }
            return iter->second;
        }

        return iter->second;
    }

    TRouteId GetRouteId(const TString& route) {
        auto iter = Storage.find(route);
        TRouteId result = NotFound;
        if (iter != Storage.end()) {
            result = iter->second;
        }

        return result;
    }

    const TString& GetRouteById(TRouteId routeId) const {
        auto iter = RouteById.find(routeId);
        if (iter == RouteById.end()) {
            return RouteNotFound;
        }

        return iter->second;
    }

    size_t Size() const {
        return Storage.size();
    }

private:
    THashMap<TString, TRouteId> Storage;
    THashMap<TRouteId, TString> RouteById;
    TSpinLock Lock;
};

const TString TRouteStorage::RouteNotFound = "";

struct TKey {
    TRouteStorage::TRouteId RouteId;
    TInstant Date;
    TKey(){};
    TKey(TRouteStorage::TRouteId routeId, TInstant date)
        : RouteId(routeId)
        , Date(date)
    {
    }

    TKey(const TKey& other)
        : RouteId(other.RouteId)
        , Date(other.Date)
    {
    }

    bool operator==(const TKey& other) const {
        return (RouteId == other.RouteId) && (Date == other.Date);
    }
};

struct TAviaFlightInfo {
    TInstant DepartureDay = TInstant::FromValue(0);
    TInstant Departure = TInstant::FromValue(0);
    TInstant Arrival = TInstant::FromValue(0);
    TRouteStorage::TRouteId RouteId = TRouteStorage::NotFound;
    TPointId DeparturePointId = 0;
    TPointId ArrivalPointId = 0;
    TString QueryId = "";
    ui64 Unixtime = 0;

    TAviaFlightInfo() = default;
    bool operator==(const TAviaFlightInfo& other) const {
        return DepartureDay == other.DepartureDay && Departure == other.Departure &&
               Arrival == other.Arrival && RouteId == other.RouteId && DeparturePointId == other.DeparturePointId &&
               ArrivalPointId == other.ArrivalPointId && QueryId == other.QueryId && Unixtime == other.Unixtime;
    }

    TAviaFlightInfo(TInstant departureDay, TInstant departure, TInstant arrival, TRouteStorage::TRouteId routeId, TPointId departurePointId, TPointId arrivalPointId, TString queryId, ui64 unixtime)
        : DepartureDay(departureDay)
        , Departure(departure)
        , Arrival(arrival)
        , RouteId(routeId)
        , DeparturePointId(departurePointId)
        , ArrivalPointId(arrivalPointId)
        , QueryId(queryId)
        , Unixtime(unixtime)
    {
    }

    void FillValue(NSc::TValue& value, const TRouteStorage* routeStorage) const {
        value["route"] = routeStorage->GetRouteById(RouteId);
        value["avia_arrival"] = Arrival.ToRfc822String();
        value["avia_departure"] = Departure.ToRfc822String();
        value["arrive_to"] = ArrivalPointId;
        value["depart_from"] = DeparturePointId;
        value["unixtime"] = Unixtime;
        value["query_id"] = QueryId;
    }
};

template <>
struct THash<TAviaFlightInfo> {
    using TTimeHasher = THash<TInstant>;
    using TPointHasher = THash<TPointId>;
    using TRouteHasher = THash<TRouteStorage::TRouteId>;
    using TQueryHasher = THash<TString>;
    using TUnixtimeHasher = THash<ui64>;
    size_t operator()(const TAviaFlightInfo& flightInfo) {
        auto timeHasher = TTimeHasher();
        auto pointHasher = TPointHasher();
        auto routeHasher = TRouteHasher();
        return timeHasher(flightInfo.DepartureDay) ^ timeHasher(flightInfo.Departure) ^ timeHasher(flightInfo.Arrival) ^
               routeHasher(flightInfo.RouteId) ^ pointHasher(flightInfo.DeparturePointId) ^
               pointHasher(flightInfo.ArrivalPointId) ^ TQueryHasher()(flightInfo.QueryId) ^ TUnixtimeHasher()(flightInfo.Unixtime);
    }
};

IOutputStream& operator<<(IOutputStream& out, const TKey& key) {
    out << "TKey{RouteId=" << key.RouteId << ", Date=" << key.Date << "}";
    return out;
}

struct TTimeInfo {
    TInstant Arrival;
    TInstant Departure;
    static constexpr TInstant Empty = TInstant::FromValue(0);
    TTimeInfo(TInstant arrival, TInstant departure)
        : Arrival(arrival)
        , Departure(departure)
    {
    }

    TTimeInfo() = default;

    TTimeInfo(const TTimeInfo&) = default;

    TTimeInfo& operator=(const TTimeInfo&) = default;

    bool operator==(const TTimeInfo& other) const {
        return (Arrival == other.Arrival) && (Departure == other.Departure);
    }
};

template <>
struct THash<TKey> {
    size_t operator()(const TKey& key) const {
        return THash<TInstant>()(key.Date) ^ THash<TRouteStorage::TRouteId>()(key.RouteId);
    }
};

class TTimeConflictException: public yexception {};
class TPointNotFoundExcepion: public yexception {};

class TRaspThreadInfo {
public:
    TRaspThreadInfo() = default;
    TRaspThreadInfo(const NSc::TValue& thread)
        : StationsById()
    {
        const NSc::TArray& stations = thread.Get(TStringBuf("stations")).GetArray();
        for (const NSc::TValue& station : stations) {
            TPointId stationId = station.Get(TStringBuf("id")).GetIntNumber();
            StationsById[stationId] = TTimeInfo{
                ParseDateTime(GetTime(station, TStringBuf("arrival"))),
                ParseDateTime(GetTime(station, TStringBuf("departure")))};
        }
    }

    void AddFromThread(const NSc::TValue& thread) {
        const NSc::TArray& stations = thread.Get(TStringBuf("stations")).GetArray();
        for (const NSc::TValue& station : stations) {
            TPointId stationId = station.Get(TStringBuf("id")).GetIntNumber();
            const NSc::TValue& arrival = station.Get(TStringBuf("arrival"));
            TTimeInfo& old = StationsById[stationId];
            if (!arrival.IsNull()) {
                if (old.Arrival != TTimeInfo::Empty) {
                    ythrow TTimeConflictException() << "Arrive two times at station "sv << stationId;
                }

                old.Arrival = ParseDateTime(arrival);
            }

            const NSc::TValue& departure = station.Get(TStringBuf("departure"));
            if (!departure.IsNull()) {
                if (old.Departure != TTimeInfo::Empty) {
                    ythrow TTimeConflictException() << "Depart two times from station "sv << stationId;
                }

                old.Departure = ParseDateTime(departure);
            }
        }
    }

    TTimeInfo FindSegmentInfo(TPointId fromPointId, TPointId toPointId) const {
        auto fromIter = StationsById.find(fromPointId);
        if (fromIter == StationsById.end()) {
            ythrow TPointNotFoundExcepion() << "From point not found " << fromPointId;
        }

        auto toIter = StationsById.find(toPointId);
        if (toIter == StationsById.end()) {
            ythrow TPointNotFoundExcepion() << "To point not found " << toPointId;
        }

        return TTimeInfo(toIter->second.Arrival, fromIter->second.Departure);
    }

private:
    THashMap<TPointId, TTimeInfo> StationsById;
    TStringBuf GetTime(const NSc::TValue& station, const TStringBuf direction) {
        return station.Get(direction).Get(TStringBuf("time")).GetString("");
    }
};

IOutputStream& operator<<(IOutputStream& out, const TTimeInfo& timeInfo) {
    out << "TTimeInfo(arrival=" << timeInfo.Arrival.ToRfc822String() << ", departure=" << timeInfo.Departure.ToRfc822String() << ")";
    return out;
}

class TRaspThreadStorage: public THashMap<TKey, TRaspThreadInfo> {
public:
    void AddThreadInfo(const NSc::TValue& parsedLine, TRouteStorage::TRouteId routeId) {
        TInstant departureDay;
        TStringBuf departureDayStr = parsedLine["departure_day"].GetString();
        if (departureDayStr.size() < 10) {
            WARNING_LOG << "Empty departure day" << Endl;
            return;
        }
        if (!TInstant::TryParseIso8601(departureDayStr.data(), departureDay)) {
            WARNING_LOG << "Bad departure day " << departureDayStr << Endl;
            return;
        }

        TKey key{routeId, departureDay};
        TGuard<TSpinLock> guard{Lock};
        auto iter = (*this).find(key);
        if (iter != (*this).end()) {
            try {
                iter->second.AddFromThread(parsedLine);
                TString s = TStringBuilder() << "[DUPLICATED] " << parsedLine.ToJson() << Endl;
                ERROR_LOG << s;
            } catch (const TTimeConflictException& e) {
                ERROR_LOG << e.what() << " while processing " << key << Endl;
            }
        } else {
            (*this)[key] = TRaspThreadInfo(parsedLine);
        }
    }

private:
    TSpinLock Lock;
};

struct TFillOptions {
    int ReadBreaker = 0;
    bool FilterRoutes = false;
    int ThreadCount = 2;
    int QueueSize = 500000;

    TFillOptions(int readBreaker, bool filterRoutes, int threadCount)
        : ReadBreaker(readBreaker)
        , FilterRoutes(filterRoutes)
        , ThreadCount(threadCount)
    {
    }
};

struct TFillTask: public IObjectInQueue {
    TRaspThreadStorage& Storage;
    TRouteStorage* RouteStorage;
    TString Line;
    TFillOptions& Opts;

    TFillTask(TRaspThreadStorage& storage, TRouteStorage* routeStorage, TString line, TFillOptions& opts)
        : Storage(storage)
        , RouteStorage(routeStorage)
        , Line(line)
        , Opts(opts)
    {
    }

    void Process(void*) override {
        const NSc::TValue parsedLine = NSc::TValue::FromJson(Line);

        TRouteStorage::TRouteId routeId;
        if (Opts.FilterRoutes) {
            routeId = RouteStorage->GetRouteId(TString{parsedLine["number"].GetString()});
            if (routeId == TRouteStorage::NotFound) {
                return;
            }
        } else {
            routeId = RouteStorage->GetOrCreateRouteId(TString{parsedLine["number"].GetString()});
        }

        Storage.AddThreadInfo(parsedLine, routeId);
    }
};

void FillStorage(IInputStream& inp, TRaspThreadStorage& storage, TRouteStorage* routeStorage, TFillOptions opts) {
    TString line;
    size_t readed = inp.ReadLine(line);

    TThreadPool queue(TThreadPool::TParams().SetBlocking(true).SetCatching(false));
    queue.Start(opts.ThreadCount, opts.QueueSize);
    int i = 0;
    while (readed) {
        queue.SafeAddAndOwn(THolder(new TFillTask(storage, routeStorage, line, opts)));
        ++i;
        if (opts.ReadBreaker && i % opts.ReadBreaker == 0) {
            INFO_LOG << "Done: " << i << Endl;
        }

        readed = inp.ReadLine(line);
    }
}

struct TCommandLineArguments {
    TString NewFileName;
    TString OldFileName;
    TString YtFlightPath;
    int YtLimit;
    int ReadBreaker;
    int ThreadCount;
    TString Output;
    TString Report;
};

TCommandLineArguments ParseArgs(int argc, const char** argv) {
    auto opts = NLastGetopt::TOpts::Default();

    TCommandLineArguments args;
    opts.AddLongOption("new-file").StoreResult(&args.NewFileName);
    opts.AddLongOption("old-file").StoreResult(&args.OldFileName);
    opts.AddLongOption("yt-flight-path").StoreResult(&args.YtFlightPath).Required();
    opts.AddLongOption("yt-limit").StoreResult(&args.YtLimit).DefaultValue(0);
    opts.AddLongOption("read-breaker").StoreResult(&args.ReadBreaker).DefaultValue(500000);
    opts.AddLongOption("thread-count").StoreResult(&args.ThreadCount).DefaultValue(7);
    opts.AddLongOption("output").StoreResult(&args.Output).Required();
    opts.AddLongOption("report").StoreResult(&args.Report).Required();

    NLastGetopt::TOptsParseResult result(&opts, argc, argv);

    return args;
}

struct TStat {
    int Difference = 0;
    int Same = 0;
    int NotFound = 0;
    int PointError = 0;
    TStat() = default;
};

void WriteComparisonResult(IOutputStream& outp, TStringBuf reason, const TAviaFlightInfo* flightInfo, const TTimeInfo* raspInfo, const TRouteStorage* routeStorage) {
    NSc::TValue report;
    report["reason"] = reason;
    if (flightInfo) {
        flightInfo->FillValue(report, routeStorage);
    }

    if (raspInfo) {
        report["rasp_arrival"] = raspInfo->Arrival.ToRfc822String();
        report["rasp_departure"] = raspInfo->Departure.ToRfc822String();
    }

    if (raspInfo && flightInfo) {
        report["arrival_difference"] = (flightInfo->Arrival - raspInfo->Arrival).Seconds();
        report["departure_difference"] = (flightInfo->Departure - raspInfo->Departure).Seconds();
    }

    outp << report.ToJson() << Endl;
}

TStat Compare(TRaspThreadStorage& storage, THashSet<TAviaFlightInfo>& toCheck, IOutputStream& outp, TRouteStorage* routeStorage) {
    TStat result;
    int counter = 0;
    for (const auto& flightInfo : toCheck) {
        TKey key{flightInfo.RouteId, flightInfo.DepartureDay};
        auto iter = storage.find(key);
        if (iter == storage.end()) {
            ++result.NotFound;
            WriteComparisonResult(outp, TStringBuf("not_found"), &flightInfo, nullptr, routeStorage);
        } else {
            TRaspThreadInfo& raspInfo = iter->second;
            try {
                TTimeInfo raspTimeInfo = raspInfo.FindSegmentInfo(flightInfo.DeparturePointId,
                                                                  flightInfo.ArrivalPointId);
                if ((raspTimeInfo.Arrival == flightInfo.Arrival) && (raspTimeInfo.Departure == flightInfo.Departure)) {
                    ++result.Same;
                    WriteComparisonResult(outp, TStringBuf("equal"), &flightInfo, &raspTimeInfo, routeStorage);
                } else {
                    ++result.Difference;
                    WriteComparisonResult(outp, TStringBuf("difference"), &flightInfo, &raspTimeInfo, routeStorage);
                }
            } catch (TPointNotFoundExcepion& r) {
                WriteComparisonResult(outp, TStringBuf("point_not_found"), &flightInfo, nullptr, routeStorage);
                ++result.PointError;
            }
        }

        ++counter;
        if (counter % 200000 == 0) {
            INFO_LOG << "Read: " << counter << Endl;
        }
    }

    return result;
}

int main(int argc, const char** argv) {
    InitGlobalLog2Console();

    TRouteStorage routeStorage;

    auto args = ParseArgs(argc, argv);
    THashSet<TAviaFlightInfo> toCheck;
    TFileOutput outp{args.Output};
    {
        TRaspThreadStorage newStorage;
        {
            INFO_LOG << "Read new threads from " << args.NewFileName << Endl;
            TFile file(args.NewFileName, EOpenModeFlag::RdOnly);
            TFileInput inp{file};
            TFillOptions opts{args.ReadBreaker, false, args.ThreadCount};
            FillStorage(inp, newStorage, &routeStorage, opts);
        }

        INFO_LOG << "Storage size: " << newStorage.size() << Endl;
        INFO_LOG << "Route storage size: " << routeStorage.Size();

        {
            INFO_LOG << "Read flights from partners from table " << args.YtFlightPath << Endl;
            NYT::Initialize(argc, argv);
            auto ytClient = NYT::CreateClient("hahn");
            auto reader = ytClient->CreateTableReader<NAvia::NProto::TAviaFlightInfo>(args.YtFlightPath);
            INFO_LOG << "Read YT table: " << args.YtFlightPath << Endl;

            int ytBreak = args.YtLimit;
            int ytCounter = 0;
            for (; reader->IsValid(); reader->Next()) {
                auto& row = reader->GetRow();
                const TString& route = row.GetRoute();
                const TString& departureTimestamp = row.GetDepartureTimestamp();
                TRouteStorage::TRouteId routeId = routeStorage.GetRouteId(route);
                if (routeId == TRouteStorage::NotFound) {
                    // We don't now actual timetable;
                    outp << "Not found in future: " << route << ", " << departureTimestamp << Endl;
                    continue;
                }

                TInstant departureDate;
                if (!TInstant::TryParseIso8601(TStringBuf(departureTimestamp.data(), 10), departureDate)) {
                    WARNING_LOG << "Bad date: " << departureTimestamp << Endl;
                    continue;
                }

                TKey key{routeId, departureDate};
                auto iter = newStorage.find(key);
                if (iter == newStorage.end()) {
                    outp << "Not found in future: " << route << ", " << departureTimestamp << Endl;
                    continue;
                }

                TRaspThreadInfo& raspInfo = iter->second;
                auto departurePoint = static_cast<TPointId>(row.GetDepartureStationId());
                auto arrivalPoint = static_cast<TPointId>(row.GetArrivalStationId());
                try {
                    TTimeInfo raspTimeInfo = raspInfo.FindSegmentInfo(departurePoint, arrivalPoint);
                    const TString& arrival = row.GetArrivalTimestamp();
                    TInstant arrivalTime = ParseDateTime(arrival);
                    TInstant departureTime = ParseDateTime(departureTimestamp);
                    if ((raspTimeInfo.Arrival == arrivalTime) && (raspTimeInfo.Departure == departureTime)) {
                        toCheck.insert(TAviaFlightInfo{departureDate, departureTime, arrivalTime, routeId, departurePoint,
                                                       arrivalPoint, row.GetQueryId(), row.GetUnixtime()});
                    }
                } catch (TPointNotFoundExcepion& r) {
                    ERROR_LOG << r.what() << " for key " << key << Endl;
                }

                ++ytCounter;
                if (ytCounter % 200000 == 0) {
                    INFO_LOG << "Read: " << ytCounter << Endl;
                }
                if (ytBreak && ytCounter > ytBreak) {
                    break;
                }
            }
        }
    }

    TRaspThreadStorage oldStorage;
    {
        INFO_LOG << "Read old threads from " << args.OldFileName << Endl;
        TFile file(args.OldFileName, EOpenModeFlag::RdOnly);
        TFileInput inp{file};
        FillStorage(inp, oldStorage, &routeStorage, {args.ReadBreaker, true, args.ThreadCount});
    }

    TFileOutput report{args.Report};
    TStat result = Compare(oldStorage, toCheck, report, &routeStorage);
    INFO_LOG << "Stat: " << Endl;
    INFO_LOG << "Not found: " << result.NotFound << Endl;
    INFO_LOG << "Same: " << result.Same << Endl;
    INFO_LOG << "Differ: " << result.Difference << Endl;
    INFO_LOG << "Point Error: " << result.PointError << Endl;

    return 0;
}
