from __future__ import unicode_literals

import collections
import copy
import json
import logging
import re

from sandbox import sdk2
from sandbox.common import enum as scenum
from sandbox.common import itertools as scit
from sandbox.common.types import resource as ctr
from sandbox.projects.common import decorators
from sandbox.projects.common import link_builder as lb
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import paths
from sandbox.projects.common import string
from sandbox.projects.common.testenv_client import TEClient
from sandbox.projects.release_machine import resources as rm_res
from sandbox.projects.release_machine import security as rm_sec
from sandbox.projects.release_machine.core import const as rm_const
from sandbox.projects.release_machine.helpers.arcanum_helper import ArcanumApi
from sandbox.projects.release_machine.helpers.svn_helper import SvnHelper
from sandbox.projects.release_machine.helpers.vcs_indexer_client import VcsIndexerClient
from sandbox.projects.release_machine.helpers import wiki_helper as rm_wiki


LOGGER = logging.getLogger(__name__)


def get_rm_changelog(changelog_json_resource):
    if isinstance(changelog_json_resource, (int, str)):
        changelog_json_resource = sdk2.Resource[changelog_json_resource]
    changelog_path = str(sdk2.ResourceData(changelog_json_resource).path)
    changelog = fu.json_load(changelog_path)
    logging.debug("Changelog info:\n%s", json.dumps(changelog, indent=2))
    return changelog


def get_changelog_resource(component_name, major_release_num, minor_release_num=0):
    return sdk2.Resource.find(
        state=ctr.State.READY,
        type=rm_res.RELEASE_MACHINE_CHANGELOG,
        attrs={
            "component": component_name,
            "major_release_num": major_release_num,
            "minor_release_num": minor_release_num,
        },
    ).first()


def build_changelog_lines(resource_name, major_release_num, minor_release_num):
    changelog_lines = []
    changelog_resource = get_changelog_resource(resource_name, major_release_num, minor_release_num)
    if not changelog_resource:
        LOGGER.warning("Changelog resource not found, skip building changelog lines")
        return changelog_lines
    changelog = get_rm_changelog(changelog_resource)
    appeared_commits = set()
    for all_changes in changelog.get("all_changes", []):
        for change in all_changes.get("changes", []):
            if change["revision"] in appeared_commits:
                continue
            appeared_commits.add(change["revision"])
            changelog_entry_parts = [
                t for t in change["startrek_tickets"]
                if t not in change["commit_message"]
            ]
            changelog_entry_parts.append(change["commit_message"])
            changelog_lines.append(' '.join(changelog_entry_parts))
    return changelog_lines


def deduplicate_changelog_by_changes(all_changes):
    duplicates = collections.defaultdict(dict)
    for release_item_info in all_changes:
        release_item_info_copy = copy.deepcopy(release_item_info)
        release_item_changes = release_item_info_copy.pop("changes")
        revisions_hash = hash(tuple(sorted(str(i["revision"]) for i in release_item_changes)))
        if "release_items" not in duplicates[revisions_hash]:
            duplicates[revisions_hash] = {
                "release_items": [release_item_info_copy],
                "changes": release_item_changes,
            }
        else:
            duplicates[revisions_hash]["release_items"].append(release_item_info_copy)
    return sorted(duplicates.values(), key=lambda x: -len(x["release_items"]))


def get_important_changes(release_item_changes, commit_importance_threshold, important_changes_limit):
    # type: (list, int, int) -> (list, bool)
    """

    :param release_item_changes:
        Changelog for one release item (an item of changelog["all_changes"])
    :param commit_importance_threshold:
        Ignore commits with importance level below this threshold
    :param important_changes_limit:
        Limit the number of elements in output

    :return:
        a list of impostant changes + a boolean value (whether or not the list of important changes had been
        trimmed according to the :param important_changes_limit: value
    """

    important_changes = [
        c for c in release_item_changes if
        c["commit_importance"] >= commit_importance_threshold or not c["added"]
    ]

    if len(important_changes) > important_changes_limit:
        return important_changes[:important_changes_limit], True

    return important_changes, False


class Reasons(scenum.Enum):
    PATHS = None
    MARKER = None
    REVIEW = None
    TESTENV = None
    DEFAULT = None
    BANNED_AUTHORS = None


class ChangeLogEntry(object):
    """ Changelog item representation """
    STARTREK_REGEXP = re.compile(r"(?:^|\W)([A-Z]+-[0-9]+)")
    _ARCANUM_API = None

    class Attrs(scenum.Enum):
        scenum.Enum.lower_case()
        scenum.Enum.preserve_order()

        REVISION = None
        DATE = None
        REVIEW_IDS = None
        COMMIT_AUTHOR = None
        STARTREK_TICKETS = None
        SUMMARY = None
        COMMIT_MESSAGE = None
        TE_PROBLEMS = None
        TE_PROBLEM_OWNER = None
        COMMIT_IMPORTANCE = None
        REVISION_PATHS = None
        ADDED = None
        REASONS = None
        COMMIT_AUTHOR_ORIG = None

    def __init__(
        self, vcs_info, importance, reasons,
        patches=None,
    ):
        self.__vcs_info = vcs_info
        self.__importance = importance
        self.__reasons = reasons
        self.__patches = {}
        if patches:
            for p in patches:
                if p:
                    self.__patches.update(p)

    def __lt__(self, other):
        return int(self.vcs_info["revision"]) < int(other.vcs_info["revision"])

    def __eq__(self, other):
        return (
            self.vcs_info["revision"] == other.vcs_info["revision"] and
            self.vcs_info["author"] == other.vcs_info["author"] and
            self.vcs_info["msg"] == other.vcs_info["msg"] and
            self.review_ids == other.review_ids and
            self.importance == other.importance and
            sorted(self.problems.keys()) == sorted(other.problems.keys()) and
            self.problem_owners == other.problem_owners and
            self.reasons == other.reasons
        )

    @property
    def vcs_info(self):
        return self.__vcs_info

    @property
    def importance(self):
        return self.__importance

    @decorators.memoized_property
    def reasons(self):
        return list(self.__reasons)

    @decorators.memoized_property
    def commit_author(self):
        if self.__vcs_info["author"].startswith("robot") or self.__vcs_info["author"].startswith("zomb-"):
            diff_resolver = rm_const.DIFF_RESOLVER_RE.search(self.__vcs_info["msg"])
            if diff_resolver:
                better_author = diff_resolver.group(1)
                if not better_author.startswith("robot"):
                    return better_author
        return self.__vcs_info["author"]

    @decorators.memoized_property
    def commit_author_orig(self):
        """The real original commit author, no tricky changes"""
        return self.__vcs_info["author"]

    @decorators.memoized_property
    def review_ids(self):
        review_ids = {int(i) for i in rm_const.REVIEW_RE.findall(self.__vcs_info["msg"])}
        patched_review_ids = self.__patches.get("reviews", {})
        for patched_review_id in patched_review_ids.keys():
            review_ids.add(patched_review_id)
        return list(review_ids)

    def st_tickets(self, arcanum_api=None):
        st_tickets = set(self.STARTREK_REGEXP.findall(self.__vcs_info["msg"]))
        if arcanum_api is None:
            arcanum_api = self._arcanum_api()
        for review_id in self.review_ids:
            if review_id not in self.__patches.get("reviews", {}):
                review_request = arcanum_api.get_review_request(review_id)
                tickets_from_review = set(i["name"] for i in review_request["bugsClosed"])
            else:
                tickets_from_review = set(self.__patches["reviews"][review_id])
            LOGGER.info("Got tickets from review %s: %s", review_id, tickets_from_review)
            st_tickets |= tickets_from_review
        LOGGER.info("Total tickets for revision %s: %s", self.vcs_info["revision"], st_tickets)
        return list(st_tickets)

    @property
    def problems(self):
        return self.__patches.get("problems", {})

    @decorators.memoized_property
    def problem_owners(self):
        problem_owners = self.__patches.get("problem_owners") or []
        diff_resolver_matcher = rm_const.DIFF_RESOLVER_RE.search(self.__vcs_info["msg"])
        if diff_resolver_matcher:
            diff_resolver = diff_resolver_matcher.group(1)
            if diff_resolver not in problem_owners:
                problem_owners.append(diff_resolver)
        return problem_owners

    @decorators.memoized_property
    def summary(self):
        """ This is commit message, cleaned up for convenience """
        summary = self.__vcs_info["msg"].replace(rm_const.Urls.STARTREK, "")
        summary = rm_const.REVIEW_RE.sub(" ", summary)
        summary = rm_const.DIFF_RESOLVER_RE.sub("", summary)
        summary = rm_const.DEVEXP_RE.sub("", summary)  # RMDEV-1866
        return summary.strip()

    @classmethod
    def _arcanum_api(cls):
        if cls._ARCANUM_API is None:
            cls._ARCANUM_API = ArcanumApi(token=rm_sec.get_rm_token(None))
        return cls._ARCANUM_API

    @decorators.memoized_property
    def has_unresolved_problems(self):
        for problem_info in self.problems.values():
            for p in problem_info:
                if not p["resolved"]:
                    return True
        return False

    def to_dict(self, arcanum_api=None):
        return {
            ChangeLogEntry.Attrs.ADDED: self.vcs_info["added"],
            ChangeLogEntry.Attrs.REVISION: self.vcs_info["revision"],
            ChangeLogEntry.Attrs.DATE: self.vcs_info["date"],
            ChangeLogEntry.Attrs.REVIEW_IDS: self.review_ids,
            ChangeLogEntry.Attrs.COMMIT_AUTHOR: self.commit_author,
            ChangeLogEntry.Attrs.COMMIT_AUTHOR_ORIG: self.commit_author_orig,
            ChangeLogEntry.Attrs.STARTREK_TICKETS: self.st_tickets(arcanum_api=arcanum_api),
            ChangeLogEntry.Attrs.SUMMARY: string.all_to_unicode(self.summary),
            ChangeLogEntry.Attrs.COMMIT_MESSAGE: string.all_to_unicode(self.vcs_info["msg"]),
            ChangeLogEntry.Attrs.TE_PROBLEMS: self.problems,
            ChangeLogEntry.Attrs.TE_PROBLEM_OWNER: self.problem_owners,
            ChangeLogEntry.Attrs.COMMIT_IMPORTANCE: self.importance,
            ChangeLogEntry.Attrs.REVISION_PATHS: self.vcs_info.get("paths", []),
            ChangeLogEntry.Attrs.REASONS: self.reasons,
        }

    @property
    def mark(self):  # todo: remove after RMDEV-596
        return self.importance

    def __repr__(self):
        return str(self.to_dict())

    def short_repr(self):
        return "ChangeLogEntry(rev={}, added={}, reasons={}, importance={})".format(
            self.vcs_info["revision"],
            self.vcs_info["added"],
            self.reasons,
            self.importance,
        )


class ChangeLogMaster(object):
    def __init__(
        self,
        first_rev,
        prod_released_path, prod_released_revision,
        candidate_path, candidate_revision,
        filters,
    ):
        self.__first_rev = first_rev
        self.__prod_path = string.left_strip(prod_released_path.split("@")[0], "arcadia:/arc")
        self.__prod_rev = prod_released_revision
        self.__cand_path = string.left_strip(candidate_path.split("@")[0], "arcadia:/arc")
        self.__cand_rev = candidate_revision
        self.__filters = filters or [DefaultFilter()]
        LOGGER.info("Init ChangeLogMaster:")
        LOGGER.info("First revision: %s", first_rev)
        LOGGER.info("Baseline: %s@%s", self.__prod_path, self.__prod_rev)
        LOGGER.info("Candidate: %s@%s", self.__cand_path, self.__cand_rev)
        LOGGER.info("Filters: %s", filters)
        self._vcs_indexer = VcsIndexerClient()

    def get_changelog(self):
        prod_vcs_info = self.get_modified_changelog(self.__prod_path, self.__prod_rev, "production")
        cand_vcs_info = self.get_modified_changelog(self.__cand_path, self.__cand_rev, "candidate")
        xored_log = self.xor_log(prod_vcs_info, cand_vcs_info)
        filtered_log_entries = list(self.apply_filters(xored_log))
        LOGGER.info("Got %d entries after filtering", len(filtered_log_entries))
        LOGGER.debug("Filtered entries:\n%s", "\n".join([i.short_repr() for i in filtered_log_entries]))
        return filtered_log_entries

    def get_modified_changelog(self, path, rev, changelog_type):
        full_log = self._get_full_changelog(path, rev)
        LOGGER.info("Got %s log: (%d) revisions", changelog_type, len(full_log))
        return self._modify_changelog(full_log)

    def _get_full_changelog(self, path, rev):
        return self._vcs_indexer.interval_info(self.__first_rev, rev, base_path=path)

    def _modify_changelog(self, changelog):
        """
        Map non-trunk revisions to trunk
        :param changelog: List(vcs_info)
        """
        LOGGER.info("Modifying changelog")
        trunk_changelog = {}
        for vcs_info in changelog:
            for init_rev in SvnHelper.get_initial_revs(vcs_info):
                if -init_rev.rev in trunk_changelog:
                    # remove rollbacked commits
                    del trunk_changelog[-init_rev.rev]
                elif init_rev.rev not in trunk_changelog:
                    if init_rev.rev == vcs_info["revision"]:
                        init_vcs_info = vcs_info
                    else:
                        init_vcs_info = None
                    trunk_changelog[init_rev.rev] = init_vcs_info
        no_info_revs = [r for r, val in trunk_changelog.items() if val is None]
        LOGGER.info("Found %d revisions with initial ones", len(no_info_revs))
        if no_info_revs:
            # potentially could be obtained later, after xor
            # will decrease amount of required revisions
            # but it is not very relevant for batch requests
            init_vcs_infos = []
            for chunk in scit.chunker(no_info_revs, 100):
                init_vcs_infos.extend(self._vcs_indexer.batch_info(map(abs, chunk)))
            for rev, rev_info in zip(no_info_revs, init_vcs_infos):
                trunk_changelog[rev] = rev_info
        return trunk_changelog

    @staticmethod
    def xor_log(prod_log, cand_log):
        """
        :param prod_log: dict with {revision: Dict(vcs_info)}
        :param cand_log: dict with {revision: Dict(vcs_info)}
        :return: generator of Dict(vcs_info) with added 'status' field
        """
        prod_log_keys = set(prod_log.keys())
        cand_log_keys = set(cand_log.keys())
        removed_revs = prod_log_keys - cand_log_keys
        LOGGER.info("Got %d removed revisions", len(removed_revs))
        LOGGER.debug("Removed revisions: %s", removed_revs)
        for rev in removed_revs:
            if not isinstance(prod_log[rev], dict):
                raise TypeError("Info for revision {} is {}. Should be dict!".format(rev, prod_log[rev]))
            prod_log[rev]["added"] = rev < 0
            yield prod_log[rev]

        added_revs = cand_log_keys - prod_log_keys
        LOGGER.info("Got %d added revisions", len(added_revs))
        LOGGER.debug("Added revisions: %s", added_revs)
        for rev in added_revs:
            if not isinstance(cand_log[rev], dict):
                raise TypeError("Info for revision {} is {}. Should be dict!".format(rev, cand_log[rev]))
            cand_log[rev]["added"] = rev > 0
            yield cand_log[rev]

    def apply_filters(self, changelog):
        LOGGER.info("Apply filters: %s", self.__filters)

        reasons_counter = collections.defaultdict(int)

        for i, vcs_info in enumerate(changelog):

            if i % 500 == 0:
                LOGGER.info("Filtering %sth item", i)

            filters = [(f, f.get_importance(vcs_info)) for f in self.__filters]
            importance = max(sum(f[1] for f in filters), 0)  # `max` handles negative importance levels (RMDEV-3219)

            triggered_filters = [i[0] for i in filters if i[1]]
            reasons = {f.REASON for f in triggered_filters}

            cle = ChangeLogEntry(
                vcs_info,
                importance=importance,
                reasons=reasons,
                patches=[f.get_patch(vcs_info) for f in triggered_filters],
            )

            if not importance:
                continue

            for reason in reasons:
                reasons_counter[reason] += 1

            yield cle

        LOGGER.info("Filter reasons: %s", dict(reasons_counter))


class CommitFilter(object):
    """ Base class for commit filter, used in ChangeLogMaster """
    REASON = None
    MAX_IMPORTANCE = 10

    def get_importance(self, vcs_info):
        return min(self.MAX_IMPORTANCE, self._get_importance(vcs_info))

    def _get_importance(self, vcs_info):
        raise NotImplementedError

    def get_patch(self, vcs_info):
        return {}

    def __str__(self):
        return str(self.__class__.__name__)

    __repr__ = __str__


class PathsFilter(CommitFilter):
    REASON = Reasons.PATHS

    def __init__(self, paths):
        """
        Gets paths and their importances
        :param paths: List(Tuple(int, str | Path)).
        """
        LOGGER.debug("PathsFilter with paths: %s", paths)
        self._paths = paths

    def _get_importance(self, vcs_info):
        """
        If any path in vcs_info is important: include revision into changelog with max importance found
        If all paths have importance = 0: not include this revision into changelog
        """
        importance = 0
        for _, p in vcs_info["paths"]:
            p = "".join(p.partition("arcadia/")[1:])
            importance = max(self._get_single_path_importance(p), importance)
        # LOGGER.debug("Total importance for revision: %s", importance)
        return importance

    def _get_single_path_importance(self, p):
        """
        If path is not important -> all its subpaths are not important and will be ignored.
        If path is important, some of its subpaths could be not important.
        Ex:
            /not/important/path -> all paths like /not/important/path/subpath are not important
            /important/path could be important, but /important/path/not/important/subpaths could be not important
        """
        importance = 0
        for i, path in self._paths:
            if paths.is_sub_path_of(path, p):
                if i:  # got path in filters with non-zero importance => try to increase importance
                    importance = max(importance, i)
                else:  # path 'p' is not important because it is inside the folder with zero importance
                    importance = 0
        # LOGGER.debug("Importance for path '%s': %s", p, importance)
        return importance


class MarkerFilter(CommitFilter):
    REASON = Reasons.MARKER

    def __init__(self, markers):
        """
        Gets compiled regexp patterns with their importances
        :param markers: List(Tuple(int, SRE_Pattern))
        """
        LOGGER.debug("MarkerFilter with markers: %s", markers)
        self._markers = markers

    def _get_importance(self, vcs_info):
        importance = 0
        for marker_importance, marker in self._markers:
            if marker.search(vcs_info["msg"]) is not None:
                importance += marker_importance
        return importance


class TestenvFilter(CommitFilter):
    REASON = Reasons.TESTENV

    def __init__(self, testenv_dbs):
        """
        :param testenv_dbs: List(str)
        """
        LOGGER.debug("TestenvFilter with databases: %s", testenv_dbs)
        self._revisions = collections.defaultdict(list)
        for db in testenv_dbs:
            db_problems = TEClient.get_te_problems(te_db=db)["rows"]
            for i in db_problems:
                self._revisions[int(i["test_diff/revision2"])].append(i)

    def _get_importance(self, vcs_info):
        problems = self._revisions.get(vcs_info["revision"])
        if not problems:
            return 0
        if all(job_info["resolve_comment"].startswith("auto resolved:") for job_info in problems):
            return 0
        if all(
            "disabled" in job_info["comment"] or "diff is not stable:" in job_info["comment"]
            for job_info in problems
        ):
            return 0
        return 3 if any("RESPONSES" in job_info["test_name"] for job_info in problems) else 2  # SEARCH-2829

    def get_patch(self, vcs_info):
        problem_owners = []
        problems = collections.defaultdict(list)
        for job_info in self._revisions.get(vcs_info["revision"], []):
            problem_owners.append(job_info["owner"])
            problems[job_info["resolve_comment"].strip() or "!!empty!!"].append({
                "te_diff_id": job_info["test_diff_id"],
                "resolved": job_info["is_resolved"] == "yes",
            })

        return {
            "problem_owners": problem_owners,
            "problems": problems
        }


class ReviewFilter(CommitFilter):
    REASON = Reasons.REVIEW

    def __init__(self, review_groups, first_rev, last_rev):
        """
        :param review_groups: List(str)
        :param first_rev: int
        :param last_rev: int
        """
        LOGGER.debug("ReviewFilter with review_groups: %s", review_groups)
        self._review_groups = review_groups
        self._first_rev = first_rev
        self._last_rev = last_rev
        self._revisions = self.get_revisions()

    @decorators.memoized_property
    def _arcanum_api(self):
        return ArcanumApi(token=rm_sec.get_rm_token(None))

    def get_revisions(self):
        result = {}
        review_ids = self.get_review_ids()
        for review_id in review_ids:
            LOGGER.debug("Process review request: %s", review_id)
            request = self._arcanum_api.get_review_request(review_id)
            if request["commits"]:
                result[int(request["commits"][0]["committedAtRevision"])] = request
        return result

    def get_review_ids(self):
        review_ids = set()
        for group in self._review_groups:
            for review in self._arcanum_api.get_dashboard(params={
                "repo": "arc",
                "view": "raw",
                "reviewGroup": group,
                "status": "submitted",
                "limit": 500,
                "fromRevision": self._first_rev,
                "toRevision": self._last_rev,
            })["reviews"]:
                review_ids.add(int(review["id"]))
        LOGGER.info("Got %s Pull Requests: %s", len(review_ids), review_ids)
        return review_ids

    def _get_importance(self, vcs_info):
        return 1 if vcs_info["revision"] in self._revisions else 0

    def get_patch(self, vcs_info):
        review = self._revisions[vcs_info["revision"]]
        return {
            "reviews": {review["id"]: [i["name"] for i in review["bugsClosed"]]}
        }


class DefaultFilter(CommitFilter):
    REASON = Reasons.DEFAULT

    def _get_importance(self, vcs_info):
        return 1


class BannedAuthorsFilter(CommitFilter):
    """
    `BannedAuthorsFilter(["user_1", "user_2"])` bans any commit made by either user_1 or user_2.
    Commits made by these authors will NOT appear in changelog
    """
    REASON = Reasons.BANNED_AUTHORS

    def __init__(self, usernames):
        self._usernames = usernames

    def _get_importance(self, vcs_info):

        if vcs_info.get("author", "") in self._usernames:
            return -100

        return 0


def format_release_items_info(release_items, changelog):
    head = [
        "",
        "Release item",
        "Major N",
        "Minor N",
        "First revision",
        "Trunk revision",
        "Branch or tag revision",
        "Resource link",
    ]
    body = [[
        "**Candidate**",
        "",
        changelog["major_release_num"],
        changelog["minor_release_num"],
        "",
        lb.revision_link(changelog.get("candidate_rev_trunk", ""), link_type=lb.LinkType.wiki),
        lb.revision_link(changelog.get("candidate_revision", ""), link_type=lb.LinkType.wiki),
        "",
    ]]
    for release_item in release_items:
        item = release_item.get("release_item")
        if item:
            body.append([
                "**Baseline**",
                item.get("resource_name", "?"),
                item.get("major_release", "?"),
                item.get("minor_release", "?"),
                lb.revision_link(release_item["first_revision"], link_type=lb.LinkType.wiki),
                lb.revision_link(release_item["baseline_rev_trunk"], link_type=lb.LinkType.wiki),
                lb.revision_link(release_item["baseline_revision"], link_type=lb.LinkType.wiki),
                lb.resource_wiki_link(item.get("id")),
            ])
    return rm_wiki.format_table(head, body)
