#include "split_finder.h"
#include "helpers.h"

#include <yandex/maps/wiki/common/batch.h>
#include <yandex/maps/wiki/revision/commit.h>
#include <yandex/maps/wiki/revision/branch_manager.h>
#include <yandex/maps/wiki/revision/revisionsgateway.h>
#include <maps/libs/common/include/profiletimer.h>
#include <maps/libs/log8/include/log8.h>

#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>

#include <queue>

namespace maps {
namespace wiki {
namespace diffalert {

namespace rev = revision;
namespace rf = rev::filters;
namespace algo = boost::algorithm;

namespace {

const std::string ATTR_EDIT_NOTES = "edit_notes:";
const std::string NOTE_CREATED_SPLIT = "created-split";
const std::string NOTE_MODIFIED_SPLIT = "modified-split";

const double GEOMETRY_SPLIT_TOLERANCE = 0.001; // mercator meters

const size_t BATCH_LOADING_SIZE = 500;

rev::DBID tryGetObjectId(const std::string& key, const std::string& value, const std::string& note)
{
    if (value.find(note) == std::string::npos) {
        return 0;
    }
    try {
        return boost::lexical_cast<rev::DBID>(key.substr(ATTR_EDIT_NOTES.size()));
    } catch (const std::bad_cast&) {
        ERROR() << "bad edit_notes: `" << key << "'";
    }
    return 0;
}

} // namespace


SplitFinder::SplitFinder(
        pqxx::transaction_base& txn,
        revision::DBID branchId,
        revision::DBID fromCommitId,
        revision::DBID toCommitId,
        std::map<revision::DBID, AoiDiffData>& diffs)
    : txn_(txn)
    , branchId_(branchId)
    , fromCommitId_(fromCommitId)
    , toCommitId_(toCommitId)
    , diffs_(diffs)
{
}

void SplitFinder::run()
{
    if (fromCommitId_ == toCommitId_) {
        return;
    }

    ProfileTimer timer;

    collectObjectIdsFromDiffs();

    if (oldObjectIds_.empty() || newObjectIds_.empty()) {
        return;
    }

    collectEditNotes();

    checkForSplit();

    INFO() << "Split search duration: " << timer.getElapsedTime() << "s.";
}

void SplitFinder::collectObjectIdsFromDiffs()
{
    auto check = [](const boost::optional<FullyLoadedObjectData>& object) {
        return object
            && !object->geom.isNull()
            && isLinear(object->geom);
    };

    for (const auto& pair : diffs_) {
        auto objectId = pair.first;
        const auto& diff = pair.second;

        if (check(diff.oldObject)) {
            oldObjectIds_.insert(objectId);
        }

        if (check(diff.newObject)) {
            newObjectIds_.insert(objectId);
        }
    }
}

TObjectIds SplitFinder::collectEditNotesForObjects(const TObjectIds& objectIds)
{
    if (objectIds.empty()) {
        return {};
    }
    alreadyLoadedObjectIds_.insert(objectIds.begin(), objectIds.end());

    TObjectIds moreObjectIdsToCheckSplit;

    auto branch = rev::BranchManager(txn_).load(branchId_);
    rev::RevisionsGateway gateway(txn_, branch);
    auto snapshot = gateway.historicalSnapshot(fromCommitId_ + 1, toCommitId_);

    auto objectFilter = rf::ObjRevAttr::isNotRelation()
        && rf::Geom::defined();

    TObjectIds commitIds;
    common::applyBatchOp(
        objectIds,
        BATCH_LOADING_SIZE,
        [&](const TObjectIds& batchObjectIds) {
            for (const auto& revId : snapshot.revisionIdsByFilter(batchObjectIds, objectFilter)) {
                auto res = alreadyLoadedCommitIds_.insert(revId.commitId());
                if (res.second) {
                    commitIds.insert(revId.commitId());
                }
            }
        });

    if (commitIds.empty()) {
        return {};
    }

    common::applyBatchOp(
        commitIds,
        BATCH_LOADING_SIZE,
        [&](const TCommitIds& batchCommitIds) {
            auto commitFilter = rf::CommitAttr::id().in(batchCommitIds)
                && rf::CommitAttr::id() >= (*batchCommitIds.begin())
                && rf::CommitAttr::id() <= (*batchCommitIds.rbegin());

            auto commits = rev::Commit::load(txn_, commitFilter);
            for (const auto& commit : commits) {
                TObjectIds createdObjectIds;
                TObjectIds modifiedObjectIds;

                for (const auto& attr : commit.attributes()) {
                    const auto& attrKey = attr.first;
                    const auto& attrValue = attr.second;

                    if (!algo::starts_with(attrKey, ATTR_EDIT_NOTES)) {
                        continue;
                    }
                    auto createdObjectId = tryGetObjectId(attrKey, attrValue, NOTE_CREATED_SPLIT);
                    if (createdObjectId) {
                        createdObjectIds.insert(createdObjectId);
                    }

                    auto modifiedObjectId = tryGetObjectId(attrKey, attrValue, NOTE_MODIFIED_SPLIT);
                    if (modifiedObjectId) {
                        modifiedObjectIds.insert(modifiedObjectId);
                    }
                }

                for (auto objectId : createdObjectIds) {
                    objectIdToSourceCandidateIds_.emplace(objectId, modifiedObjectIds);
                }

                //Some objects could be created then splitted and then deleted
                //Try to look through all the tree of splits
                for (auto objectId : modifiedObjectIds) {
                    if (oldObjectIds_.count(objectId)) {
                        modifiedBySplitObjectIds_.insert(objectId); //original object was splitted
                    } else if (!alreadyLoadedObjectIds_.count(objectId)) {
                        moreObjectIdsToCheckSplit.insert(objectId); //intermediate object was splitted
                    }
                }
            }
        });

    return moreObjectIdsToCheckSplit;
}

void SplitFinder::collectEditNotes()
{
    auto objectIdsToCheck = newObjectIds_;
    while (!objectIdsToCheck.empty()) {
        objectIdsToCheck = collectEditNotesForObjects(objectIdsToCheck);
    }
}

void SplitFinder::checkForSplit()
{
    for (auto objectId : newObjectIds_) {
        auto diffIt = diffs_.find(objectId);
        auto& diff = diffIt->second;

        if (!checkCreatedBySplitObject(diff)) {
            checkModifiedBySplitObject(diff);
        }
    }
}

bool SplitFinder::checkModifiedBySplitObject(AoiDiffData& diff)
{
    ASSERT(diff.splitStatus == SplitStatus::None);

    if (!modifiedBySplitObjectIds_.count(diff.objectId)) {
        return false;
    }

    if (!diff.oldObject || diff.oldObject->geom.isNull()) {
        WARN() << "Object " << diff.objectId << " is modified by split but has no geometry";
        return false;
    }

    const auto& oldGeom = diff.oldObject->geom;
    Geom bufferedGeom(oldGeom->buffer(GEOMETRY_SPLIT_TOLERANCE));

    const auto& newGeom = diff.newObject->geom;

    if (bufferedGeom->contains(newGeom.geosGeometryPtr())) {
        diff.splitStatus = SplitStatus::Modified;
        return true;
    }

    return false;
}

/**
 * One commit can contain multiply splitted objects. We cann't determine what part each part corresponds to.
 * So we collect candidate source objectIds for every created objectId.
 * One object can be splitted many times in different commits.
 * So there is a "tree of splits". Here we traverse this tree to find candidate ids that are present in the original set oldObjectIds_
 * Then for every candidate id we execute geometry check.
*/
bool SplitFinder::checkCreatedBySplitObject(AoiDiffData& diff)
{
    auto it = objectIdToSourceCandidateIds_.find(diff.objectId);
    if (it == objectIdToSourceCandidateIds_.end()) {
        return false;
    }
    const auto& initialCandidateObjectIds = it->second;

    if (diff.oldObject) {
        WARN() << "Object " << diff.objectId << " is created by split but has old geometry";
        return false;
    }

    std::queue<rev::DBID> candidateObjectIdsQueue;
    for (auto id : initialCandidateObjectIds) {
        candidateObjectIdsQueue.push(id);
    }

    TObjectIds resultCandidateObjectIds;
    TObjectIds alreadyTraversedObjectIds;

    while (!candidateObjectIdsQueue.empty()) {
        auto candidateId = candidateObjectIdsQueue.front();
        candidateObjectIdsQueue.pop();
        alreadyTraversedObjectIds.insert(candidateId);

        if (oldObjectIds_.count(candidateId)) {
            resultCandidateObjectIds.insert(candidateId);
            continue;
        }

        auto it = objectIdToSourceCandidateIds_.find(candidateId);
        if (it == objectIdToSourceCandidateIds_.end()) {
            continue;
        }
        const auto& candidateObjectIds = it->second;
        for (auto newCandidateId : candidateObjectIds) {
            if (!alreadyTraversedObjectIds.count(newCandidateId)) {
                candidateObjectIdsQueue.push(newCandidateId);
            }
        }
    }

    if (resultCandidateObjectIds.empty()) {
        return false;
    }

    const auto& newGeom = diff.newObject->geom;

    for (auto candidateId : resultCandidateObjectIds) {
        auto candidateDiffIt = diffs_.find(candidateId);
        auto& candidateDiff = candidateDiffIt->second;

        const auto& oldGeom = candidateDiff.oldObject->geom;
        Geom bufferedGeom(oldGeom->buffer(GEOMETRY_SPLIT_TOLERANCE));

        if (bufferedGeom->contains(newGeom.geosGeometryPtr())) {
            diff.splitStatus = SplitStatus::Created;
            diff.oldObject = candidateDiff.oldObject;
            return true;
        }
    }

    return false;
}

} // namespace diffalert
} // namespace wiki
} // namespace maps
