# -*- coding: utf-8 -*-
import logging
import requests
import json


class PRInfo:
    def __init__(self, pygithub_pr, pygithub_commit):
        # Keep in mind that we may get FakePR with number = 0 for master. Not
        # every field makes sense for it.
        self.pygithub_pr = pygithub_pr
        self.pygithub_commit = pygithub_commit
        self.number = pygithub_pr.number
        self.sha = pygithub_commit.sha
        self.labels = set([l.name for l in pygithub_pr.labels]) if pygithub_pr.number else set()
        # Statuses of checks: dict(check name -> pygithub CommitStatus).
        self.statuses = {}
        # None if repo was not cloned yet
        self.changed_files = None
        self.docker_images = None

        logging.info("Gathering info for PR #{} @ {}".format(self.number, self.sha))
        logging.info("Labels {}".format(', '.join(self.labels)))

        """
        Get check statuses from GitHub. Squash statuses to latest state:
            1. context="first", state="success", update_time=1
            2. context="second", state="success", update_time=2
            3. context="first", stat="failure", update_time=3
            =========>
            1. context="second", state="success"
            2. context="first", stat="failure"

        Before we sort the statuses by date, repack them into a normal python
        list from PyGitHub paginated list, because I'm not really sure how the
        latter works if we wrap it into sorted(), maybe it makes a lot of API calls?
        """
        github_statuses = [x for x in self.pygithub_commit.get_statuses()]
        github_statuses.sort(key=lambda x: x.updated_at)
        for status in github_statuses:
            self.statuses[status.context] = status
        logging.info("Statuses {}".format(', '.join(["{}: {}".format(status.context, status.state) for status in self.statuses.values()])))

        # Download the list of changed files prepared by the repo cloner task.
        # We use it to determine which checks to run (e.g. docs checks only run
        # if some docs files were changed). We don't request this from GitHub
        # because its API is slow, and also may be inconsistent with the repo
        # snapshot prepared by the repo cloner task.
        changed_files_path = 'https://clickhouse-test-reports.s3.yandex.net/{}/{}/repo/ci-changed-files.txt'.format(self.number, self.sha)
        response = requests.get(changed_files_path)
        if response.ok:
            # Last line is empty because of the final line break. Have to filter it out.
            self.changed_files = [x for x in response.text.split('\n') if x]
            logging.info("Changed files: downloaded list from {}, status_code {}: {} items (first 20 shown): [\n{}\n]".format(
                changed_files_path, response.status_code, len(self.changed_files), '\n'.join(self.changed_files[:20])))
        else:
            logging.info("Changed files not ready: downloaded list from {}, status_code {}".format(
                changed_files_path, response.status_code))

        docker_images_path = 'https://clickhouse-test-reports.s3.yandex.net/{}/{}/repo/images.json'.format(self.number, self.sha)
        response = requests.get(docker_images_path)
        if response.ok:
            docker_images = json.loads(response.text)
            logging.info("Docker images list: downloaded from {}, status_code {}: {}".format(
                docker_images_path, response.status_code, json.dumps(docker_images)))
        else:
            logging.info("Docker images list not ready: tried to download from {}, status_code {}".format(
                docker_images_path, response.status_code))

    def get_json_dict(self):
        return {
            'number': self.number,
            'sha': self.sha,
            'labels': [l for l in self.labels],
            'changed_files': self.changed_files
        }
