import logging
from time import mktime, gmtime
from datetime import datetime
import re
from requests import get

from sandbox.projects.SecDis.data_types import Item, ItemType
from sandbox.projects.SecDis.Collectors import BaseCollector


class Github(object):
    base_url = 'https://api.github.yandex-team.ru'

    def __init__(self, owner, repo, client_id, client_secret):
        self.owner = owner
        self.repo = repo
        self.client_id = client_id
        self.client_secret = client_secret

    def get_commit_list(self, since=0, branch='master'):
        url = '{}/repos/{}/{}/commits'.format(self.base_url, self.owner, self.repo)
        pat = re.compile(r'<([^<>]*?)>; rel=\"next\"')
        since = datetime.fromtimestamp(int(since)).strftime('%Y-%m-%dT%H:%M:%SZ')
        payload = {
            'client_id': self.client_id,
            'client_secret': self.client_secret,
            'sha': branch,
            'since': since,
            'per_page': 100,
        }

        result = []
        while True:
            r = get(url, params=payload)
            if r.status_code != 200:
                logging.info('[get_commit_list] Github {}/{} status_code={}'.format(self.owner, self.repo, r.status_code))
                break
            json_data = r.json()
            logging.info('[get_commit_list] Github {}/{} len={}'.format(self.owner, self.repo, len(json_data)))
            commit_urls = [c['url'] for c in json_data]
            result += [self.__get_commit(commit_url, branch) for commit_url in commit_urls]

            m = pat.search(r.headers.get('Link', ''))
            if not m:
                break
            url = m.group(1)

        return result

    def __get_commit(self, commit_url, branch_name):
        payload = {
            'client_id': self.client_id,
            'client_secret': self.client_secret,
        }
        r = get(commit_url, params=payload)
        json_data = r.json()
        author = json_data['author']
        if author:
            login = author['login']
        else:
            email = json_data['commit']['author']['email']
            login = email.split('@')[0]
        url = json_data['url']
        html_url = json_data['html_url']
        stats = json_data['stats']
        date = json_data['commit']['author']['date']
        files = [f['filename'] for f in json_data['files']]

        res = {
            'author': login,
            'url': url,
            'html_url': html_url,
            'stats': stats,
            'date': date,
            'files': files,
            'branch': branch_name,
            'owner': self.owner,
            'repo': self.repo,
            'vcs': 'github'
        }

        return res


class CommitCollector(BaseCollector):
    """ Service Security Discovery: commits by repo collector task """
    collector_name = 'commit'
    input_types = (ItemType.REPO,)
    output_types = (ItemType.COMMIT,)

    def get_commit_list(self, repo_url, since=0, branch='master'):
        if repo_url.startswith('https://github.yandex-team.ru/'):
            client_id = self.github_client_id
            client_secret = self.github_client_secret
            m = re.search(r'https:\/\/github\.yandex-team\.ru\/([\w\-]+)\/([\w\-]+)(?:.*)', repo_url)
            owner = m.group(1)
            repo = m.group(2)
            logging.info('Retrieving Github {}/{} since {}'.format(owner, repo, since))
            r = Github(owner, repo, client_id, client_secret)
        else:
            logging.info('[get_commit_list] repo {} not supported'.format(repo_url))
            return list()

        res = r.get_commit_list(since, branch)

        return res

    def on_execute(self):
        self.github_client_id = self.get_vault('GithubClientIdSecDis')
        self.github_client_secret = self.get_vault('GithubClientSecretSecDis')

        auxiliary = self.load_auxiliary()
        logging.info("auxiliary %s" % auxiliary)
        if auxiliary is None:
            auxiliary = dict()
        cur_ts = int(mktime(gmtime()))
        prev_since = auxiliary.get('since', cur_ts - 10*24*3600)

        since = cur_ts
        item_list = [Item.unserialize(item) for item in self.Parameters.item_list]
        project_id = self.Parameters.project_id

        for item in item_list:
            repo_url = item.get_value()
            ancestor_id = item.get_id()

            commit_list = self.get_commit_list(repo_url, prev_since)
            for commit in commit_list:
                new_item = Item(ItemType.COMMIT, project_id, commit['url'], ancestor_id,
                                commit['date'], commit['date'], **commit)
                self.add_result(new_item, ancestor_id)

        auxiliary = {
            'since': since,
        }
        self.save_auxiliary(auxiliary)

        self.save_result()
