#include "chunk_cleaner.h"

#include <util/generic/hash.h>
#include <util/generic/algorithm.h>
#include <util/system/fs.h>
#include <util/system/file.h>

using namespace NCleaner;
using namespace NHistDb;

TFilter::TFilter(const TVector<TString>& prefixes, const TString& excludedFile, bool backup)
    : Prefixes(prefixes)
    , Backup(backup)
{
    TDefaultAhoCorasickBuilder builder;

    if (!excludedFile.empty()) {
        TFileInput input(excludedFile);
        for (TString line; input.ReadLine(line);) {
            builder.AddString(line, 0);
        }
    }

    ExcludedSearcher.Reset(new TDefaultMappedAhoCorasick(builder.Save()));
}

void TFilter::Start(const TStringBuf chunkPath) {
    auto inputHeaderFilePath{chunkPath + HeaderSuffix};
    TFile inputHeaderFile{inputHeaderFilePath, EOpenModeFlag::RdOnly | EOpenModeFlag::Seq};
    TString headerFileContent{TFileInput(inputHeaderFile).ReadAll()};
    TStringBuf inputHeader{headerFileContent};
    inputHeader.Skip(VersionOffset);

    const auto inputDataFilePath{chunkPath + DataSuffix};

    TSomethingFormat inputFormat{inputHeader};
    TSnappyFile inputData{inputDataFilePath, ESnappyMode::READ, inputFormat.GetBlocks()};

    const auto outputHeaderFilePath{inputHeaderFilePath + TempSuffix};
    TFileOutput outputHeaderFile{TFile{outputHeaderFilePath, EOpenModeFlag::CreateAlways | EOpenModeFlag::WrOnly}};

    const auto outputDataFilePath{inputDataFilePath + TempSuffix};
    TSnappyFile outputData{outputDataFilePath};

    CleanChunk(inputFormat, inputData, outputData, outputHeaderFile);

    if (Backup) {
        NFs::Rename(inputHeaderFilePath, inputHeaderFilePath + BackupSuffix);
        NFs::Rename(inputDataFilePath, inputDataFilePath + BackupSuffix);
    }

    NFs::Rename(outputHeaderFilePath, inputHeaderFilePath);
    NFs::Rename(outputDataFilePath, inputDataFilePath);
}

bool TFilter::MatchSignalPrefix(const TString& signalName) const {
    return AnyOf(Prefixes, [&signalName](const TStringBuf prefix) {
        return signalName.StartsWith(prefix);
    });
;}

void TFilter::CleanChunk(
    TSomethingFormat& inputFormat,
    NHistDb::TSnappyFile& inputData,
    NHistDb::TSnappyFile& outputData,
    TFileOutput& outputHeaderFile) {
    using namespace NZoom::NValue;
    using namespace NZoom::NSignal;
    using TSignalValue = std::pair<NZoom::NSignal::TSignalName, TValue>;

    auto firstRecordTimePtr = inputFormat.FirstRecordTime().Get();
    auto secondRecordTimePtr = inputFormat.LastRecordTime().Get();

    if (firstRecordTimePtr == nullptr || secondRecordTimePtr == nullptr) {
        ythrow yexception() << "no timestamps";
    }

    unsigned long firstRecordTime = *firstRecordTimePtr;
    unsigned long lastRecordTime = *secondRecordTimePtr;

    auto iterator = inputFormat.IterateRecords(xrange(firstRecordTime, lastRecordTime + 1), inputData.GetInputStream());

    TSomethingFormat outputFormat;
    TVector<TSignalValue> filteredPairs;

    THashSet<TString> goodPrefixSignals;
    THashSet<TString> badPrefixSignals;

    while (iterator.Next()) {
        TSomethingFormat::TIteratorRow row = std::move(iterator.Get());
        NZoom::NRecord::TRecord& record = std::get<2>(row);

        filteredPairs.reserve(record.Len());
        for (TSignalValue& signalValue : record.GetValues()) {
            const TSignalName signal = std::get<0>(signalValue);
            auto& signalName = signal.GetName();

            if (MatchSignalPrefix(signalName)) {
                if (!badPrefixSignals.contains(signalName) && !goodPrefixSignals.contains(signalName)) {
                    if (ExcludedSearcher->AhoContains(signalName)) {
                        goodPrefixSignals.insert(signalName);
                    } else {
                        badPrefixSignals.insert(signalName);
                    }
                }

                if (goodPrefixSignals.contains(signalName)) {
                    filteredPairs.emplace_back(signal, std::move(signalValue.second));
                }
            } else {
                filteredPairs.emplace_back(signal, std::move(signalValue.second));
            }
        }
        NZoom::NRecord::TRecord filteredRecord{std::move(filteredPairs)};
        outputFormat.WriteRecord(
            std::get<0>(row),
            std::get<1>(row),
            filteredRecord,
            outputData.GetOutputStream());
        filteredPairs.clear();
    }

    outputData.Finish();

    outputFormat.SaveBlocks(outputData.Blocks());

    outputHeaderFile.Write(CurrentVersion.data(), CurrentVersion.size());
    outputHeaderFile.Write(outputFormat.Dump());
    outputHeaderFile.Finish();
}
