#include <saas/tools/repack/params.pb.h>

#include <kernel/multipart_archive/multipart.h>

#include <library/cpp/getoptpb/getoptpb.h>
#include <library/cpp/logger/global/global.h>
#include <library/cpp/protobuf/json/proto2json.h>
#include <library/cpp/json/json_value.h>
#include <library/cpp/json/json_writer.h>

#include <util/folder/path.h>
#include <util/generic/hash.h>
#include <util/random/random.h>
#include <util/string/split.h>

#include <type_traits>

void RecreateOutputdir(const TFsPath& outputDir) {
    outputDir.ForceDelete();
    outputDir.MkDirs();
}

template <typename TCallback>
void TraverseDirectory(const TFsPath& path, TCallback&& callback) {
    if (!path.Exists() || path.IsSymlink()) {
        return;
    }
    if (path.IsFile()) {
        callback(path);
    } else if (path.IsDirectory()) {
        TVector<TFsPath> children;
        path.List(children);
        for (auto& c : children) {
            TraverseDirectory(c, callback);
        }
    } else {
        ERROR_LOG << "Unknown object: " << path << Endl;
    }
}

ui64 GetFileSizeRecursive(const TFsPath& path) {
    ui64 total = 0;
    TraverseDirectory(path, [&total](const TFsPath& path) {
        TFileStat stat;
        path.Stat(stat);
        total += stat.Size;
    });
    return total;
}

template <typename TInputArchive, typename TOutputArchive>
std::pair<ui32, ui64> Repack(TInputArchive& input, TOutputArchive& output, ui32 maxDocs) {
    auto inputIterator = input.CreateIterator();
    ui32 docsRead = 0;
    ui64 bytesRead = 0;
    for (; inputIterator->IsValid() && (maxDocs == 0 || docsRead < maxDocs); inputIterator->Next()) {
        auto blob = inputIterator->GetDocument();
        output.PutDocument(blob, docsRead, false /*doPrepare*/);
        ++docsRead;
        bytesRead += blob.Size();
        if (docsRead % 100000 == 0) {
            INFO_LOG << "Processed " << docsRead << " so far." << Endl;
        }
    }
    output.Flush();
    INFO_LOG << "Total docs processed: " << docsRead << Endl;
    return {docsRead, bytesRead};
}

TVector<ui32> GenerateSample(ui32 sampleSize, ui32 totalDocs) {
    TVector<ui32> sample(Reserve(sampleSize));
    for (size_t i = 0; i < sampleSize; ++i) {
        sample.push_back(RandomNumber<ui32>(totalDocs));
    }
    return sample;
}

template <typename TArchive>
std::pair<double, double> DoMeasureReadLatency(TArchive& archive, const TVector<ui32>& sample) {
    ui64 totalBlobSize = 0;
    const TInstant start = TInstant::Now();
    for (auto docId : sample) {
        auto blob = archive.GetDocument(docId);
        totalBlobSize += blob.Size();
    }
    const TInstant finish = TInstant::Now();
    const double latency = double((finish - start).MicroSeconds()) / sample.size();
    const double averReadBytes = totalBlobSize / sample.size();
    return {latency, averReadBytes};
}

template <typename TArchive>
auto MeasureReadLatency(TArchive& archive, const TVector<ui32>& sample) {
    // warming up first
    DoMeasureReadLatency(archive, sample);

    // do actual measuring
    return DoMeasureReadLatency(archive, sample);
}

namespace NRTYArchive {
    TString PrintCompressionConfig(const NRTYArchive::TMultipartConfig& config) {
        TStringStream out;
        out << config.Compression;
        switch (config.Compression) {
            case IArchivePart::RAW:
                break;
            case IArchivePart::COMPRESSED:
                out << ";" << config.CompressionParams.Algorithm
                    << ";" << config.CompressionParams.Level;
                break;
            case IArchivePart::COMPRESSED_EXT:
                out << ";" << config.CompressionParams.ExtParams.CodecName
                    << ";" << config.CompressionParams.ExtParams.BlockSize
                    << ";" << config.CompressionParams.ExtParams.LearnSize;
                break;
            default:
                Y_ENSURE(false, "Unsupported compression type " << config.Compression);
        }
        out << ";PART_LIMIT=" << config.PartSizeLimit;
        return out.Str();
    }

    struct IFieldSetter {
        virtual ~IFieldSetter() = default;
        virtual void Set(TStringBuf strValue) = 0;
    };
    using TFieldSetterPtr = TSimpleSharedPtr<IFieldSetter>;

    template <typename TConfig, typename T>
    struct TFieldSetter : IFieldSetter {
        TFieldSetter(TConfig& config, T TConfig::* field)
            : Config(config)
            , Field(field)
        {
        }

        void Set(TStringBuf strValue) final {
            Config.*Field = FromString<T>(strValue);
        }

    private:
        TConfig& Config;
        T TConfig::* Field;
    };

    template <typename TConfig, typename T>
    TFieldSetterPtr CreateSetter(TConfig& config, T TConfig::* field) {
        return MakeSimpleShared<TFieldSetter<TConfig, T>>(config, field);
    }

    #define DEFINE_SETTER(config, field) {"Output"#field, CreateSetter(config, &std::remove_reference<decltype(config)>::type::field)}

    struct TMultipartConfigurator {
        TMultipartConfigurator(TMultipartConfig& config) {
            Fields = THashMap<TString, TFieldSetterPtr>{
                DEFINE_SETTER(config, PartSizeLimit),
                DEFINE_SETTER(config, Compression),
                DEFINE_SETTER(config.CompressionParams, Algorithm),
                DEFINE_SETTER(config.CompressionParams, Level),
                DEFINE_SETTER(config.CompressionParams.ExtParams, CodecName),
                DEFINE_SETTER(config.CompressionParams.ExtParams, BlockSize),
                DEFINE_SETTER(config.CompressionParams.ExtParams, LearnSize)
            };
        }

        bool HasField(TStringBuf field) const {
            return Fields.contains(field);
        }

        void Set(TStringBuf field, TStringBuf strValue) {
            if (Fields.contains(field)) {
                Fields.at(field)->Set(strValue);
            }
        }

    private:
        THashMap<TString, TFieldSetterPtr> Fields;
    };

    #undef DEFINE_SETTER

    template <typename TCallback>
    void GenerateOutputConfigs(const NRepack::TParams& params, TCallback&& callback) {
        TMultipartConfig outputConfig;
        outputConfig.ReadContextDataAccessType = IDataAccessor::MEMORY_MAP;
        outputConfig.PartSizeLimit = 67108864;
        outputConfig.CompressionParams.ExtParams.LearnSize = 262144;
        TMultipartConfigurator configurator(outputConfig);

        NJson::TJsonValue jsonConfig;
        NProtobufJson::Proto2Json(params, jsonConfig);
        Y_ENSURE(jsonConfig.IsMap());

        TVector<std::pair<TString, TVector<TString>>> dimensions;
        for (auto& [key, value] : jsonConfig.GetMapSafe()) {
            if (configurator.HasField(key)) {
                dimensions.emplace_back(key,
                    StringSplitter(value.GetStringRobust()).SplitByString(params.GetDelimiter()).SkipEmpty().ToList<TString>());
            }
        }
        TVector<int> choices(dimensions.size(), -1);
        THashSet<TString> seenChoices;

        int currentDimension = 0;
        while (currentDimension >= 0) {
            const auto& dimension = dimensions[currentDimension];
            size_t choice = ++choices[currentDimension];
            for (; choice < dimension.second.size(); ++choice) {
                try {
                    configurator.Set(dimension.first, dimension.second[choice]);
                    break;
                } catch (...) {
                    ERROR_LOG << "Unable to set dimension " << dimension.first << " = " << dimension.second[choice] << Endl;
                }
            }
            if (choice >= dimension.second.size()) {
                choices[currentDimension] = -1;
                --currentDimension;
            } else {
                if (currentDimension + 1 < static_cast<int>(dimensions.size())) {
                    ++currentDimension;
                } else {
                    auto fingerprint = PrintCompressionConfig(outputConfig);
                    auto [_, inserted] = seenChoices.insert(fingerprint);
                    if (inserted) {
                        callback(outputConfig, fingerprint);
                    }
                }
            }
        }
    }
}


int main(int argc, const char* argv[]) {
    NGetoptPb::TGetoptPbSettings getoptSettings;
    getoptSettings.DumpConfig = false;
    NRepack::TParams params = NGetoptPb::GetoptPbOrAbort(argc, argv, getoptSettings);
    if (params.GetVerbose()) {
        InitGlobalLog2Console(TLOG_INFO);
    } else {
        InitGlobalLog2Console(TLOG_ERR);
    }
    try {
        using namespace NRTYArchive;

        if (params.GetDelimiter().empty()) {
            params.SetDelimiter(",");
        }

        TMultipartConfig inputConfig;
        inputConfig.ReadContextDataAccessType = IDataAccessor::MEMORY_MAP;
        inputConfig.Compression = FromString<IArchivePart::TType>(params.GetInputCompression());
        inputConfig.CompressionParams.Algorithm = FromString<IArchivePart::TConstructContext::TCompressionParams::TAlgorithm>(params.GetInputAlgorithm());

        const auto inputArchivePath = TFsPath(params.GetInputDir()) / params.GetInputPrefix();
        const auto outputArchivePath = TFsPath(params.GetOutputDir()) / params.GetOutputPrefix();

        auto inputArchive = TArchiveOwner::Create(inputArchivePath, inputConfig, 0, true);
        const ui32 inputSize = Min(inputArchive->GetDocsCount(false), params.GetInputMaxDocs());
        const auto sample = GenerateSample(params.GetTestReadDocs(), inputSize);

        if (params.GetStatFormat() == NRepack::TSV_WITH_HEADER) {
            Cout << "compression"
                << "\tunpacked_size"
                << "\tdoc_count"
                << "\tpacked_size"
                << "\tcompression_ratio"
                << "\tlatency_us"
                << "\taver_read_bytes"
                << Endl;
        }

        GenerateOutputConfigs(params, [&](const TMultipartConfig& outputConfig, const TString& outputFingerprint) {
            ui32 docCount = 0;
            ui64 docSize = 0;

            const bool skipRepack = params.GetSkipRepacking() && NFs::Exists(params.GetOutputDir());
            if (skipRepack) {
                INFO_LOG << "Skip repacking" << Endl;
            } else {
                INFO_LOG << "Recreate directory " << params.GetOutputDir() << Endl;
                RecreateOutputdir(params.GetOutputDir());

                INFO_LOG << "Repack archive " << PrintCompressionConfig(inputConfig) << " into " << outputFingerprint << Endl;
                INFO_LOG << "Input: " << inputArchivePath << Endl;
                INFO_LOG << "Outupt: " << outputArchivePath << Endl;

                auto outputArchive = TArchiveOwner::Create(outputArchivePath, outputConfig, 0, false);

                const auto [docs, bytes] = Repack(*inputArchive, *outputArchive, params.GetInputMaxDocs());
                docCount = docs;
                docSize = bytes;
            }

            auto readonlyArchive = TReadOnlyArchiveOwner::Create(outputArchivePath, outputConfig);
            if (skipRepack) {
                docCount = readonlyArchive->GetAllDocsCount();
            }

            const ui64 outputSize = GetFileSizeRecursive(params.GetOutputDir());
            double latencyUs = 0;
            double averReadBytes = 0;
            const double compressionRatio = docSize ? double(outputSize) * 100 / docSize : 0;

            if (!sample.empty()) {
                INFO_LOG << "Measure read latency" << Endl;
                const auto [latency, averRead] = MeasureReadLatency(*readonlyArchive, sample);
                latencyUs = latency;
                averReadBytes = averRead;
            }

            switch (params.GetStatFormat()) {
                case NRepack::EOutputFormat::TSV_WITH_HEADER:
                case NRepack::EOutputFormat::TSV:
                    Cout << outputFingerprint
                        << "\t" << docSize
                        << "\t" << docCount
                        << "\t" << outputSize
                        << "\t" << compressionRatio
                        << "\t" << latencyUs
                        << "\t" << averReadBytes;
                    break;
                case NRepack::EOutputFormat::TSKV:
                    Cout << "compression=" << outputFingerprint
                        << "\tunpacked_size=" << docSize
                        << "\tdoc_count=" << docCount
                        << "\tpacked_size=" << outputSize
                        << "\tcompression_ratio=" << compressionRatio
                        << "\tlatency_us=" << latencyUs
                        << "\taver_read_bytes=" << averReadBytes;
                    break;
            }
            Cout << Endl;
        });

        return 0;

    } catch (...) {
        ERROR_LOG << CurrentExceptionMessage() << Endl;
        return 1;
    }

    return 0;
}
