# -*- coding: utf-8 -*-
import datetime
import requests
from collections import namedtuple
import logging
import time
import urllib3.util

PENDING_DELTA = datetime.timedelta(hours=3)
TRUSTED_ORG_IDS = {
    7409213,   # yandex
    28471076,  # altinity
    54801242,  # clickhouse
}
OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"])
DO_NOT_TEST_LABEL = "do not test"
FakePR = namedtuple("FakePR", "number")
DANGEROUS_API_RATE_LIMIT = 50

# Individual trusted contirbutors who are not in any trusted organization.
# Can be changed in runtime: we will append users that we learned to be in
# a trusted org, to save GitHub API calls.
trusted_contributors = {
    "achimbab",
    "adevyatova ",  # DOCSUP
    "Algunenano",   # Raúl Marín, Tinybird
    "AnaUvarova",   # DOCSUP
    "anauvarova",   # technical writer, Yandex
    "annvsh",       # technical writer, Yandex
    "atereh",       # DOCSUP
    "azat",
    "bharatnc",     # Newbie, but already with many contributions.
    "bobrik",       # Seasoned contributor, CloundFlare
    "BohuTANG",
    "damozhaeva",   # DOCSUP
    "den-crane",
    "gyuton",       # DOCSUP
    "gyuton",       # technical writer, Yandex
    "hagen1778",    # Roman Khavronenko, seasoned contributor
    "hczhcz",
    "hexiaoting",   # Seasoned contributor
    "ildus",        # adjust, ex-pgpro
    "javisantana",  # a Spanish ClickHouse enthusiast, ex-Carto
    "ka1bi4",       # DOCSUP
    "kirillikoff",  # DOCSUP
    "kitaisreal",   # Seasoned contributor
    "kreuzerkrieg",
    "lehasm",       # DOCSUP
    "michon470",    # DOCSUP
    "MyroTk",       # Tester in Altinity
    "myrrc",        # Michael Kot, Altinity
    "nikvas0",
    "nvartolomei",
    "olgarev",      # DOCSUP
    "otrazhenia",   # Yandex docs contractor
    "pdv-ru",       # DOCSUP
    "podshumok",    # cmake expert from QRator Labs
    "s-mx",         # Maxim Sabyanin, former employee, present contributor
    "sevirov",      # technical writer, Yandex
    "spongedu",     # Seasoned contributor
    "ucasFL",       # Amos Bird's friend
    "vdimir",       # Employee
    "vzakaznikov",
    "YiuRULE",
    "zlobober"      # Developer of YT
}


def pr_is_by_trusted_user(pr_info):
    user = pr_info.pygithub_pr.user.login
    if user in trusted_contributors:
        logging.info("User '{}' is trusted".format(user))
        return True

    logging.info("User '{}' is not trusted".format(user))

    for org in pr_info.pygithub_pr.user.get_orgs():
        if org.id in TRUSTED_ORG_IDS:
            logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org.id, user))
            trusted_contributors.add(user)
            return True
        logging.info("Org '{}' is not trusted".format(org.id))

    return False


def pr_can_be_tested(pr_info):
    if 'force tests' in pr_info.labels:
        logging.info("PR #{} will be tested because it is labeled 'force tests'".format(pr_info.number))
        return True

    if 'do not test' in pr_info.labels:
        logging.info("PR #{} will not be tested because it is labeled 'do not test'".format(pr_info.number))
        return False

    if 'can be tested' in pr_info.labels:
        logging.info("PR #{} will be tested because it is labeled as 'can be tested'".format(pr_info.number))
        return True

    if pr_is_by_trusted_user(pr_info):
        logging.info("PR #{} will be tested because it is by a trusted user".format(pr_info.number))
        return True

    logging.info("PR #{} will not be tested because it does not satisfy any condition for testing".format(pr_info.number))
    return False


class GithubHelper(object):
    def __init__(self, token):
        from github import Github
        self.gh = Github(token, retry=urllib3.util.Retry(5, backoff_factor=0.1))
        self.pr_commit_cache = {}
        self.can_be_tested_pulls_cache = {}
        self.do_not_test_pulls_cache = {}
        self.yandex_users_pr_cache = {}
        self.fresh_prs_cache = set([])
        self.pr_files_cache = {}

    def get_repo(self, name):
        for repo in self.gh.get_user().get_repos():
            if repo.full_name == name:
                return repo
        raise Exception("Repository {} not found for user".format(name))

    def get_oauth_header(self):
        # bad code
        return self.gh._Github__requester._Requester__authorizationHeader

    def _filter_statuses(self, statuses):
        """
        Squash statuses to latest state
        1. context="first", state="success", update_time=1
        2. context="second", state="success", update_time=2
        3. context="first", stat="failure", update_time=3
        =========>
        1. context="second", state="success"
        2. context="first", stat="failure"
        """
        filt = {}
        for status in sorted(statuses, key=lambda x: x.updated_at):
            filt[status.context] = status
        return filt.values()

    def _can_be_tested_label(self, pull):
        if pull.number not in self.can_be_tested_pulls_cache:
            pr_labels = set([l.name for l in pull.get_labels()])
            self.can_be_tested_pulls_cache[pull.number] = len(OK_TEST_LABEL.intersection(pr_labels)) > 0

        return self.can_be_tested_pulls_cache[pull.number]

    def _do_not_test_label(self, pull):
        if pull.number not in self.do_not_test_pulls_cache:
            self.do_not_test_pulls_cache[pull.number] = (DO_NOT_TEST_LABEL in [l.name for l in pull.get_labels()])

        return self.do_not_test_pulls_cache[pull.number]

    def is_trusted_pr(self, pull_request):
        if pull_request.number not in self.yandex_users_pr_cache:
            if pull_request.user.login in trusted_contributors:
                self.yandex_users_pr_cache[pull_request.number] = True
            else:
                for org in pull_request.user.get_orgs():
                    if org.id in TRUSTED_ORG_IDS:
                        self.yandex_users_pr_cache[pull_request.number] = True
                        break
                else:
                    self.yandex_users_pr_cache[pull_request.number] = False
        return self.yandex_users_pr_cache[pull_request.number]

    def get_filtered_statuses(self, commit):
        return self._filter_statuses(commit.get_statuses())

    def is_commit_checked(self, commit, context, pending_delta=PENDING_DELTA):
        """
        Is commit has status in stat "success" or "failure"
        """
        statuses = self.get_filtered_statuses(commit)
        return self.is_statuses_checked(statuses, context, pending_delta)

    def is_statuses_checked(self, statuses, context, pending_delta=PENDING_DELTA):
        """
        Is commit has status in stat "success" or "failure"
        """
        now = datetime.datetime.utcnow()
        for status in statuses:
            if context == status.context:
                logging.info(
                    "Check '%s' state '%s' updated at '%s' (%s ago, pending delta %s)",
                    context, status.state, status.updated_at, now - status.updated_at, pending_delta)
                if status.state in ("failure", "success"):
                    return True
                elif status.state == "pending" and now - status.updated_at < pending_delta:
                    return True
        return False

    def is_commit_marked_by_status(self, commit, context):
        statuses = commit.get_statuses()
        for status in statuses:
            if context in status.context:
                return True
        return False

    def get_commit_status(self, commit, context):
        for status in self.get_filtered_statuses(commit):
            if context in status.context:
                return status
        return None

    def get_all_unchecked_commits_since(self, since, repo, check_context):
        # Apparently 'since' doesn't work in the expected way and brings old commits
        # from branches that were recently merged. The commit.last_modified field also
        # doesn't work properly in PyGithub: https://github.com/PyGithub/PyGithub/issues/1064
        # We need to be careful here to not pull in old commits, because we may then
        # for example clobber the website by publishing an obsolete version.
        # A proper solution would be to use --first-parent to fetch commits
        # from master, but GitHub API doesn't allow this, so it is going to be
        # complicated.
        commits_orig = [x for x in repo.get_commits(since=since)]
        commits_filtered = [x for x in commits_orig if x.commit.committer.date > since]
        commits_unchecked = [x for x in commits_filtered if not self.is_commit_checked(x, check_context)]
        commits_sorted = sorted(commits_unchecked, key=lambda x: x.commit.committer.date)
        logging.info(
            "Unchecked commits since %s for '%s': by date '%s', filtered '%s', unchecked '%s', sorted '%s'",
            str(since), str(check_context), str(commits_orig), str(commits_filtered), str(commits_unchecked), str(commits_sorted))
        return commits_sorted

    def get_all_commits_since(self, since, repo):
        # See the comment above for get_all_unchecked_commits_since.
        commits_orig = [x for x in repo.get_commits(since=since)]
        commits_filtered = [x for x in commits_orig if x.commit.committer.date > since]
        commits_sorted = sorted(commits_filtered, key=lambda x: x.commit.committer.date)
        logging.info(
            "Commits since %s: by date '%s', filtered '%s', sorted '%s'",
            str(since), str(commits_orig), str(commits_filtered), str(commits_sorted))
        return commits_sorted

    def _get_paginated_last_commit(self, pull, repo):
        if pull.number not in self.pr_commit_cache:
            last_page = min((pull.commits - 1) / 30 + 1, 9)
            commits = requests.get(pull.commits_url + "?page={}".format(last_page), headers={"Authorization": self.get_oauth_header()}).json()
            if not isinstance(commits, list):
                logging.info("Strange answer from github url '%s': '%s' seems like we banned", str(pull.commits_url), str(commits))
                time.sleep(10)
                return None
            latest_commit_sha = commits[-1]["sha"]  # get latest commit
            commit = repo.get_commit(sha=latest_commit_sha)
            self.pr_commit_cache[pull.number] = commit

        return self.pr_commit_cache[pull.number]

    def get_open_pull_requests(self, since, repo):
        if not self.fresh_prs_cache:
            pulls_open = repo.get_pulls(state="open", sort="date")
            for pull in pulls_open:
                if pull.updated_at >= since:
                    pull_is_ok = self._can_be_tested_label(pull)
                    if not pull_is_ok:
                        pull_is_ok = self.is_trusted_pr(pull)

                    if pull_is_ok and not self._do_not_test_label(pull):
                        self.fresh_prs_cache.add(pull)

        result = []
        for pull in list(self.fresh_prs_cache):
            logging.info("Looking at pull: %s", pull.number)
            latest_commit = self._get_paginated_last_commit(pull, repo)
            if latest_commit is None:
                continue

            result.append((pull, latest_commit))
        return result

    def filter_checked_pull_requests(self, new_pulls, check_context, repo, pending_delta=PENDING_DELTA):
        result = []
        for pull, latest_commit in new_pulls:
            if not self.is_commit_checked(latest_commit, check_context, pending_delta):  # is it checked?
                result.append((pull, latest_commit))
        return result

    def check_pr_is_able_to_test(self, pull_request):
        return (not self._do_not_test_label(pull_request)
                and (self._can_be_tested_label(pull_request)
                     or self.is_trusted_pr(pull_request)))

    def get_rest_api_limit(self):
        rest, _ = self.gh.rate_limiting
        return rest

    def check_rate_limit(self):
        rest, total = self.gh.rate_limiting
        if rest < DANGEROUS_API_RATE_LIMIT:
            reset_time = self.gh.rate_limiting_resettime
            readable = datetime.datetime.fromtimestamp(reset_time).strftime("%Y-%m-%d %H:%M:%S")
            message = "There are only {} GitHub api requests from {} for this hour, there will be more after {}".format(rest, total, readable)
            return (False, message)
        return (True, "Everything is ok")

    def get_fake_pr(self):
        return FakePR(0)

    def get_commit_datetime_str(self, commit):
        return commit.raw_data['commit']['committer']['date']

    def get_commit_datetime(self, commit):
        datetime_str = self.get_commit_datetime_str(commit)
        return datetime.datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%SZ") + datetime.timedelta(hours=3)

    def get_commit_changed_files(self, commit):
        if 'files' in commit.raw_data:
            return [f['filename'] for f in commit.raw_data['files']]
        return []

    def get_pr_changed_files(self, pull_request):
        if pull_request.number == 0:
            return []
        if pull_request.number not in self.pr_files_cache:
            self.pr_files_cache[pull_request.number] = [f.filename for f in pull_request.get_files()]

        return self.pr_files_cache[pull_request.number]

    def is_merge_commit(self, commit):
        if 'committer' in commit.raw_data and commit.raw_data['committer'] and 'login' in commit.raw_data['committer']:
            return commit.raw_data['committer']['login'] == 'web-flow'
        return False

    def get_changed_files_between_commits(self, repo, commit_old_sha, commit_new_sha):
        diff_between_commits = repo.compare(commit_old_sha, commit_new_sha)
        if 'files' in diff_between_commits.raw_data:
            return [f['filename'] for f in diff_between_commits.raw_data['files']]
        return []

    def get_first_parent_commit(self, commit):
        return commit.parents[0]
