# -*- coding: utf-8 -*-
import logging
from sandbox import sdk2
from sandbox.sdk2.path import Path
import datetime
import os
import sandbox.common.types.resource as ctr
from sandbox.projects.clickhouse.util.git_helper import GitHelper
import sandbox.common.types.client as ctc

from sandbox.projects.clickhouse.BaseOnCommitTask.base import BaseOnCommitTask, PostStatuses
from sandbox.projects.clickhouse.resources import CLICKHOUSE_REPO, CLICKHOUSE_REPO_NO_SUBMODULES

from sandbox.projects.clickhouse.util.task_helper import has_changes_in_submodules, compress_fast

import subprocess

SUBMODULE_CHANGED_LABEL = "submodule changed"


class ClickhouseRepoCloner(BaseOnCommitTask):

    class Requirements(BaseOnCommitTask.Requirements):
        cores = 1
        ram = 8192  # 8GiB or less
        client_tags = ctc.Tag.MULTISLOT

        class Caches(sdk2.Requirements.Caches):
            pass  # means that task do not use any shared caches

    @staticmethod
    def get_context_name():
        return "Clone repo"

    @staticmethod
    def order():
        return 1

    @classmethod
    def get_resources(cls, commit, repo, pull_request):
        resources = sdk2.Resource.find(
            CLICKHOUSE_REPO,
            state=ctr.State.READY,
            attrs=dict(commit=commit.sha)
        ).limit(1)
        if not resources.first():
            return True
        else:
            return None

    def _is_resource_exists(self, res_type, commit):
        resources = sdk2.Resource.find(
            res_type,
            state=ctr.State.READY,
            attrs=dict(commit=commit.sha)
        ).limit(1)
        return resources.first() is not None

    def post_statuses(self):
        return PostStatuses.NEVER

    def _update_version(self, version, pull_request):
        resources = sdk2.Resource.find(
            CLICKHOUSE_REPO,
            state=ctr.State.READY,
            attrs=dict(pr_number=0)
        ).order(-CLICKHOUSE_REPO.id).limit(1)
        res = resources.first()
        if not res:
            logging.info("Will try to get revesion from repo")
        else:
            logging.info("Found resource with some version %s", res.id)
            if res.version != "":
                logging.info("Resource has version %s", res.version)
                splited = res.version.split(".")
                if len(splited) == 3:
                    version.tweak = 0
                else:
                    version.tweak = int(splited[3])

        return version.tweak_update()

    def _check_docs_update(self, commit):
        files = self.gh_helper.get_commit_changed_files(commit)
        for f in files:
            if 'docs/' in f or 'website/' in f or f in ['README.md', 'CHANGELOG.md']:
                logging.info("Found docs change in files %s", f)
                return True
        return False

    def get_version_type(self, pull_request):
        return "testing"

    def process(self, commit, repo, pull_request):
        logging.info("Starting clone of repo %s with commit %s", repo.name, commit)
        work_dir = "./" + repo.name
        os.mkdir(work_dir)
        githelper = GitHelper(self, repo, work_dir, self.task_owner, self.user_email, self.ssh_key)
        if pull_request.number != 0:
            if has_changes_in_submodules(self.gh_helper, commit, pull_request):
                logging.info("Found changes in submodules, creating label")
                pull_request.add_to_labels(SUBMODULE_CHANGED_LABEL)
            elif SUBMODULE_CHANGED_LABEL in [l.name for l in pull_request.labels]:
                logging.info("Submodule change removed")
                pull_request.remove_from_labels(SUBMODULE_CHANGED_LABEL)

        logging.info("Cloning repo")

        is_merged = githelper.clone_repo(pull_request, commit)

        if is_merged is True:
            logging.info("Finished clone, merged with master or master itself")
        elif is_merged is False:
            if pull_request.number != 0 and "release" not in set([l.name for l in pull_request.get_labels()]):
                commit.create_status(state="failure", context="Merge with master", description="Cannot fetch mergecommit")
            else:
                logging.info("Would not mark as unmergeable PR because of release")
            logging.info("Finished clone, can't merge with master, checkouted from branch")
        else:
            logging.info("Github hasn't prepared merge-commit yet. Will try next time")
            raise Exception("GitHub's pull/{}/merge is not yet ready".format(pull_request.number))

        subprocess.Popen('git --version', shell=True).wait()

        # A sanity check. We have to be able to determine the merge base because
        # we'll need it to build the list of changed files.
        merge_base = str(subprocess.check_output('cd "{}" && git merge-base origin/master HEAD'.format(work_dir), shell=True))
        if not merge_base:
            raise Exception("Cannot determine merge base")

        subprocess.Popen(
            'set -x && cd "{}" && echo "origin/master is $(git rev-parse origin/master), '
            'HEAD is $(git rev-parse HEAD), merge_base is $(git merge-base origin/master HEAD)"'.format(work_dir), shell=True).wait()
        subprocess.Popen('set -x && cd "{}" && git log -1 origin/master'.format(work_dir), shell=True).wait()
        subprocess.Popen('set -x && cd "{}" && git log -1 HEAD'.format(work_dir), shell=True).wait()

        # Try to find the commit in master closest to the commit we're testing,
        # and tag it as 'ci-reference'. This will allow us to determine which
        # files have changed in a PR, using only the repository snapshot. The
        # benefits are that the snapshot is immutable, and we won't have to use
        # the slow GitHub API which is also always out of quota.
        # The PR branch may be either pull/*/head which is the author's branch,
        # or pull/*/merge, which is head merged with some master automatically
        # by GitHub. We will use the merge base with master as a reference for
        # testing.
        # TODO this is broken for backports, but to a "false positive" side, i.e.
        # it's going to compare to master and show more changed files that there
        # actually are.
        #
        # For master branch with fake PR == 0 we just use diff between two latest commits.
        if pull_request.number == 0:
            subprocess.Popen('set -x && cd "{}"; git diff --name-only HEAD~1 HEAD | tee ci-changed-files.txt'.format(work_dir), shell=True).wait()
            # While we're at it, check that this commit is on the first-parent
            # chain from origin/master. It is impossible to check through github
            # api, so we can fetch some commit from merged branch that grows
            # from very old master, and this leads to problems such as
            # clobbering the site with old version of docs. Checking the time is
            # not sufficient, because the commit itself may be very recent, but
            # it grows from an old version.
            # In case the commit is not first-parent, we will refuse to clone
            # the repo, so other checks won't run as well.
            try:
                is_first_parent = subprocess.check_output(
                    'cd "{}"; git rev-list --first-parent -100 origin/master | grep $(git rev-parse HEAD) ||:'.format(work_dir),
                    shell=True, stderr=subprocess.STDOUT).rstrip()
            except subprocess.CalledProcessError as e:
                logging.info("Error checking first parent for %s: %s", str(commit.sha), str(e.output))
                raise

            if not is_first_parent:
                return "failure", "The commit {} is not found on the first parent chain from origin/master".format(str(commit)), None
            if str(is_first_parent) != str(commit.sha):
                return "exception", "Wrong match for first parent: '{}', expected '{}'".format(str(is_first_parent), str(commit.sha))[:139], None
        else:
            subprocess.Popen('set -x && cd "{}" && git log -1 $(git merge-base origin/master HEAD)'.format(work_dir), shell=True).wait()
            subprocess.Popen('set -x && cd "{}" && git diff --name-only $(git merge-base origin/master HEAD) HEAD | tee ci-changed-files.txt'.format(work_dir), shell=True).wait()

        self.upload_repo_file_to_s3(pull_request, commit, os.path.join(work_dir, 'ci-changed-files.txt'))
        self.upload_repo_file_to_s3(pull_request, commit, os.path.join(work_dir, 'tests/ci/ci_config.json'))
        self.upload_repo_file_to_s3(pull_request, commit, os.path.join(work_dir, 'docker/images.json'))

        version = githelper.get_version_from_repo()
        logging.info("Received version %s", version.get_version_string())
        version = self._update_version(version, pull_request)
        githelper.update_version_local(commit, version, version_type=self.get_version_type(pull_request))

        is_merge_commit = self.gh_helper.is_merge_commit(commit)
        docs_change = self._check_docs_update(commit)
        if not self._is_resource_exists(CLICKHOUSE_REPO_NO_SUBMODULES, commit):
            logging.info("Starting archive cloned repo")
            compress_fast(work_dir, "clickhouse_no_subs.tar.gz")
            logging.info("Archive of cloned repo is ready")
            logging.info("Writing archive to resource")
            no_subs_res = CLICKHOUSE_REPO_NO_SUBMODULES(
                self,
                "Clickhouse repo with commit {} ".format(commit.sha),
                "./ClickHouseNoSubs.tar.gz",
                commit=commit.sha,
                date=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                pr_number=pull_request.number,
                revision=version.revision,
                version=version.get_version_string(),
                docs_change=docs_change,
                is_merge_commit=is_merge_commit,
            )

            no_subs_res_data = sdk2.ResourceData(no_subs_res)
            no_subs_res_data.path.write_bytes(Path("clickhouse_no_subs.tar.gz").read_bytes())
            no_subs_res_data.ready()
            logging.info("Write of Sandbox resource finished")

            self.upload_repo_file_to_s3(pull_request, commit, "clickhouse_no_subs.tar.gz")
            logging.info("Upload to S3 finished")
        else:
            logging.info("Submodules resource already exists")

        logging.info("Updating submodules")
        sync_log_name = "submodules_sync"
        update_log_name = "submodules_update"
        try:
            githelper.update_submodules(pull_request, sync_log_name, update_log_name)
        except Exception as ex:
            logging.info("Cannot update submodules with exception %s", ex)
            err_log_path = str(self.log_path(update_log_name + ".err.log"))
            logging.info("Abs err path %s", err_log_path)
            if os.path.exists(err_log_path):

                s3_path_prefix = str(pull_request.number) + "/" + self.commit.sha + "/" + self.get_context_name().lower().replace(' ', '_')
                logging.info("Path exists, will upload to s3")
                url = self.s3_client.upload_test_report_to_s3(err_log_path, s3_path_prefix + "/submodules_init.err.log")
                logging.info("Uploaded to %s", url)
                commit.create_status(state="error", context=self.get_context_name(), description="Cannot clone submodules", target_url=url)
            else:
                commit.create_status(state="error", context=self.get_context_name(), description="Cannot clone submodules (no logs found)")

            raise ex

        logging.info("Update finished")

        logging.info("Starting archive repo with submodules")
        compress_fast(work_dir, "clickhouse.tar.gz")
        logging.info("Archive of repo with submodules is ready")

        if not self._is_resource_exists(CLICKHOUSE_REPO, commit):
            logging.info("Writing archive to resource")

            resource = CLICKHOUSE_REPO(
                self,
                "Clickhouse repo with commit {}".format(commit.sha),
                "./ClickHouse.tar.gz",
                commit=commit.sha,
                date=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                pr_number=pull_request.number,
                revision=version.revision,
                version=version.get_version_string(),
                docs_change=docs_change,
                is_merge_commit=is_merge_commit,
            )

            res_data = sdk2.ResourceData(resource)
            res_data.path.write_bytes(Path("clickhouse.tar.gz").read_bytes())
            res_data.ready()
            logging.info("Write finished")
        else:
            logging.info("Repo resource already exists")

        return "success", "Repo was cloned", None

    def upload_repo_file_to_s3(self, pull_request, commit, full_path):
        if not os.path.exists(full_path):
            logging.info("File %s doesn't exist, will not upload anything", full_path)
            return
        s3_path = str(pull_request.number) + "/" + commit.sha + "/repo/" + os.path.basename(full_path)
        url = self.s3_client.upload_test_report_to_s3(full_path, s3_path)
        logging.info("Uploaded file %s to s3, url %s", full_path, url)
