#include "dir_digest.h"

#include <library/cpp/digest/md5/md5.h>
#include <library/cpp/logger/global/global.h>

#include <sys/stat.h>

#include <util/folder/iterator.h>
#include <util/folder/path.h>
#include <util/stream/format.h>
#include <util/stream/str.h>
#include <util/string/cast.h>
#include <util/system/file.h>

#include <util/generic/maybe.h>


TString DirDigest(const char *path) {
    // Calculate digest of the specified directory.
    // The implementation should stay reasonably fast, since it's called for a number
    // of directories on every HealthMessage - so no content reading/hashing allowed.
    // Also, should not include file timestamps - those may change without content changes.

    MD5 digest;

    TDirIterator::TOptions options(FTS_LOGICAL);
    options.SetSortByName();
    try {
        TDirIterator it(path, options);
        while (auto cur = it.Next()) {
            switch(cur->fts_info) {
                case FTS_F: digest.Update(&cur->fts_statp->st_size, sizeof cur->fts_statp->st_size); [[fallthrough]];
                case FTS_D: digest.Update(cur->fts_name, cur->fts_namelen); break;
            }
        }
    } catch(yexception& e) {
        ERROR_LOG << "Failed to calculate digest of " << path << ": " << e.what() << Endl;
        return "";
    }

    char buf[33]; // fixed string length of 32 characters + \0
    return digest.End(buf);
}

void FillDirHashes(TDirHashInfo& result, const TString& prefix, TFsPath configPath, const TRegExMatch* filter, const TRegExMatch* exclude) {
    if (configPath.IsDirectory()) {
        TVector<TFsPath> children;
        configPath.List(children);
        for (TVector<TFsPath>::const_iterator i = children.begin(); i != children.end(); ++i)
            FillDirHashes(result, (!!prefix ? prefix + "/" : TString()) + i->GetName(), *i, filter, exclude);
    }
    else if (
        (!filter || filter->Match(configPath.GetName().data())) &&
        (!exclude || !exclude->Match(configPath.GetName().data()))
        )
    {
        auto& info = result[!!prefix ? prefix : configPath.GetName()];
        info.Hash =  MD5::File(configPath.GetPath().data());
        info.Size = TFile(configPath.GetPath(), RdOnly).GetLength();
        DEBUG_LOG << "file info for " << configPath.GetPath() << " : " << info.Hash << "/" << info.Size << Endl;
    }
}

TDirHashInfo FilterDirHashes(const TDirHashInfo& hashes, const TRegExMatch* filter, const TRegExMatch* exclude) {
    if (!filter && !exclude) {
        return hashes;
    }
    TDirHashInfo result;
    for (auto& [key, value] : hashes) {
        const auto name = TFsPath(key).GetName();
        bool match = (!filter || filter->Match(name.data())) &&
            (!exclude || !exclude->Match(name.data()));
        if (match) {
            result.emplace(key, value);
        }
    }
    return result;
}

TDirHashInfo GetDirHashes(const TString& path, const TRegExMatch* filter, const TRegExMatch* exclude) {
    TDirHashInfo result;
    FillDirHashes(result, TString(), path, filter, exclude);
    return result;
}

bool CompareHashes(const TDirHashInfo& hashes1, const TDirHashInfo& hashes2, TMap<TString, TString>& report, bool checkInclude1To2) {
    for (const auto& i1 : hashes1) {
        auto i2 = hashes2.find(i1.first);
        if (i2 == hashes2.end()) {
            report[i1.first] = "object found IN FIRST hashes pool but NOT in SECOND";
            continue;
        }
        if (i1.second.Size != i2->second.Size) {
            report[i1.first] = "different size: " + ToString(i1.second.Size) + " != " + ToString(i2->second.Size);
        } else if (i1.second.Hash != i2->second.Hash)
            report[i1.first] = "different hash: " + i1.second.Hash + " != " + i2->second.Hash;
    }
    if (!checkInclude1To2)
    for (const auto& i2 : hashes2)
        if (!hashes1.contains(i2.first))
            report[i2.first] = "object found IN SECOND hashes pool but NOT in FIRST";
    return report.empty();
}

bool DirsAreEqual(const TString& path1, const TString& path2, TMap<TString, TString>& report, TRegExMatch* filter, TRegExMatch* exclude) {
    TDirHashInfo hashes1 = GetDirHashes(path1, filter, exclude);
    TDirHashInfo hashes2 = GetDirHashes(path2, filter, exclude);
    return CompareHashes(hashes1, hashes2, report);
}

TFilesSizeInfo::TFilesSizeInfo(const TString& filePath) {
    if (!TFsPath(filePath).Exists()) {
        return;
    }
    TVector<TFsPath> children;
    TFsPath(filePath).List(children);
    for (auto&& i : children) {
        AddInfoUnlocked(i.GetName(), TFileStat{i.GetPath()}.Size);
    }
}

void TFilesSizeInfo::Merge(const TFilesSizeInfo& other) {
    TGuard<TMutex> g(Mutex);
    for (auto [name, otherInfo] : other.SizesCounts) {
        auto& info = SizesCounts[name];
        info.Size += otherInfo.Size;
        info.Count += otherInfo.Count;
    }
}

NJson::TJsonValue TFilesSizeInfo::GetReport(bool isHumanReadable) const {
    NJson::TJsonValue result(NJson::JSON_MAP);
    auto addSize = [&](const TString& name, ui64 size) {
        if (isHumanReadable) {
            TStringStream out;
            out << HumanReadableSize(size, SF_BYTES);
            result.InsertValue(name, out.Str());
        } else {
            result.InsertValue(name, size);
        }
    };

    TGuard<TMutex> g(Mutex);
    ui64 sumSize = 0;
    ui64 sumCount = 0;
    for (auto [name, info] : SizesCounts) {
        addSize(name, info.Size);
        sumSize += info.Size;
        if (info.Count > 1) {
            result.InsertValue(name + "_count", info.Count);
            sumCount += info.Count;
        }
    }
    addSize("__SUM", sumSize);
    result.InsertValue("__COUNT", sumCount);
    return result;
}

TMaybe<TString> TryRMPartNum(const TString& fileName) {
    static const TString partPart(".part.");
    const auto pos = fileName.find(partPart);
    if (pos == TString::npos) {
        return Nothing();
    }
    const auto startPos = pos + partPart.size();
    if (startPos >= fileName.size()) {
        return Nothing();
    }
    auto pointPos = fileName.find('.', startPos + 1);
    if (pointPos == TString::npos) {
        pointPos = fileName.size();
    }

    for (auto i = startPos; i < pointPos; ++i) {
        if (fileName[i] < '0' || fileName[i] > '9') {
            return Nothing();
        }
    }
    TString res;
    const auto rSize = (startPos - 1) + (fileName.size() - pointPos);
    res.reserve(rSize);
    res.append(fileName.data(), 0, startPos - 1).append(fileName.data(), pointPos, fileName.size() - pointPos);
    return TMaybe<TString>(std::move(res));
}

void TFilesSizeInfo::AddInfoUnlocked(const TString& fileName, const ui64 size) {
    auto& info = SizesCounts[TryRMPartNum(fileName).GetOrElse(fileName)];
    info.Size += size;
    ++info.Count;
}

ui64 TFilesSizeInfo::GetTotalSize() const {
    ui64 sumSize = 0;
    for (auto [_, info] : SizesCounts) {
        sumSize += info.Size;
    }
    return sumSize;
}
