# -*- coding: utf-8 -*-
from collections import Counter
import datetime
import logging
import random
import re

from sandbox import sdk2
from sandbox.common.types import resource as ctr
from sandbox.sandboxsdk import environments
import sandbox.common.types.misc as ctm
import sandbox.common.types.task as ctt

from sandbox.projects.clickhouse.BaseOnCommitTask.base import PostStatuses
from sandbox.projects.clickhouse.util.docker_image_helper import DockerImageHelper
from sandbox.projects.clickhouse.util.pr_info import PRInfo
from sandbox.projects.clickhouse.util.github_helper import pr_is_by_trusted_user

from sandbox.projects.clickhouse.ClickhouseBuildLauncher import ClickhouseBuildLauncher
# from sandbox.projects.clickhouse.ClickhouseSpecialBuildLauncher import ClickhouseSpecialBuildLauncher
# from sandbox.projects.clickhouse.ClickhouseIntegrationTests import ClickhouseIntegrationTestsRelease
# from sandbox.projects.clickhouse.ClickhouseIntegrationTests import ClickhouseIntegrationTestsAsan
# from sandbox.projects.clickhouse.ClickhouseIntegrationTests import ClickhouseIntegrationTestsTsan
# from sandbox.projects.clickhouse.ClickhouseIntegrationTests import ClickhouseIntegrationTestsMsan
# from sandbox.projects.clickhouse.ClickhouseIntegrationTestsFlakyCheck import ClickhouseIntegrationTestsFlakyCheck
# from sandbox.projects.clickhouse.ClickhouseStyle import ClickhouseStyle
from sandbox.projects.clickhouse.ClickhouseKeeperJepsen import ClickhouseKeeperJepsen
from sandbox.projects.clickhouse.ClickhousePullRequestPatcher import ClickhousePullRequestPatcher
# from sandbox.projects.clickhouse.ClickhouseStressTest import ClickhouseStressTestAsan
# from sandbox.projects.clickhouse.ClickhouseStressTest import ClickhouseStressTestTsan
# from sandbox.projects.clickhouse.ClickhouseStressTest import ClickhouseStressTestMsan
# from sandbox.projects.clickhouse.ClickhouseStressTest import ClickhouseStressTestUBsan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestRelease
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestReleaseDatabaseOrdinary
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestReleaseDatabaseReplicated
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestAsan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestDebug
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestUBsan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestTsan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestPolymorphicParts
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTest import ClickhouseStatelessFunctionalTestMsan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatelessTestFlakyCheck import ClickhouseStatelessFunctionalTestFlakyCheck
# from sandbox.projects.clickhouse.ClickhousePVSStudioCheck import ClickhousePVSStudioCheck
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestRelease
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestReleaseDatabaseOrdinary
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestReleaseDatabaseReplicated
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestAsan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestUBSan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestDebug
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestTsan
# from sandbox.projects.clickhouse.ClickhouseFunctionalStatefulTest import ClickhouseFunctionalStatefulTestMsan
# from sandbox.projects.clickhouse.ClickhouseFuzzer import ClickhouseFuzzerDebug
# from sandbox.projects.clickhouse.ClickhouseFuzzer import ClickhouseFuzzerASan
# from sandbox.projects.clickhouse.ClickhouseFuzzer import ClickhouseFuzzerMSan
# from sandbox.projects.clickhouse.ClickhouseFuzzer import ClickhouseFuzzerTSan
# from sandbox.projects.clickhouse.ClickhouseFuzzer import ClickhouseFuzzerUBSan
# from sandbox.projects.clickhouse.ClickhousePerformanceComparison import ClickhousePerformanceComparison
# from sandbox.projects.clickhouse.ClickhouseGlibcCompatibilityCheck import ClickhouseGlibcCompatibilityCheck
# from sandbox.projects.clickhouse.ClickhouseDocsCheck import ClickhouseDocsCheck
# from sandbox.projects.clickhouse.ClickhouseSplitBuildSmokeTest import ClickhouseSplitBuildSmokeTest
# from sandbox.projects.clickhouse.ClickhouseSqlancerTest import ClickhouseSqlancerTest
from sandbox.projects.clickhouse.ClickhouseDockerHubPush import ClickhouseDockerHubPush
# from sandbox.projects.clickhouse.ClickhouseTestflows import ClickhouseTestflows
# from sandbox.projects.clickhouse.ClickhouseFastTest import ClickhouseFastTest
from sandbox.projects.clickhouse.util.github_helper import GithubHelper
from sandbox.projects.clickhouse.util.task_helper import get_nda_task_url
import sandbox.projects.common.binary_task as binary_task

# Fast test and repo cloner are run separately.
ON_COMMIT_TASKS = {
    ClickhouseBuildLauncher.get_context_name(): ClickhouseBuildLauncher,
    # ClickhouseSpecialBuildLauncher.get_context_name(): ClickhouseSpecialBuildLauncher,
    # ClickhouseIntegrationTestsRelease.get_context_name(): ClickhouseIntegrationTestsRelease,
    # ClickhouseIntegrationTestsAsan.get_context_name(): ClickhouseIntegrationTestsAsan,
    # ClickhouseIntegrationTestsMsan.get_context_name(): ClickhouseIntegrationTestsMsan,
    # ClickhouseIntegrationTestsTsan.get_context_name(): ClickhouseIntegrationTestsTsan,
    # ClickhouseStyle.get_context_name(): ClickhouseStyle,
    # ClickhouseStressTestAsan.get_context_name(): ClickhouseStressTestAsan,
    # ClickhouseStressTestTsan.get_context_name(): ClickhouseStressTestTsan,
    # ClickhouseStatelessFunctionalTestRelease.get_context_name(): ClickhouseStatelessFunctionalTestRelease,
    # ClickhouseStatelessFunctionalTestReleaseDatabaseOrdinary.get_context_name(): ClickhouseStatelessFunctionalTestReleaseDatabaseOrdinary,
    # ClickhouseStatelessFunctionalTestReleaseDatabaseReplicated.get_context_name(): ClickhouseStatelessFunctionalTestReleaseDatabaseReplicated,
    # ClickhouseStatelessFunctionalTestAsan.get_context_name(): ClickhouseStatelessFunctionalTestAsan,
    # ClickhouseStatelessFunctionalTestDebug.get_context_name(): ClickhouseStatelessFunctionalTestDebug,
    # ClickhouseStatelessFunctionalTestUBsan.get_context_name(): ClickhouseStatelessFunctionalTestUBsan,
    # ClickhouseStatelessFunctionalTestPolymorphicParts.get_context_name(): ClickhouseStatelessFunctionalTestPolymorphicParts,
    # ClickhousePVSStudioCheck.get_context_name(): ClickhousePVSStudioCheck,
    # ClickhouseFunctionalStatefulTestRelease.get_context_name(): ClickhouseFunctionalStatefulTestRelease,
    # ClickhouseFunctionalStatefulTestReleaseDatabaseOrdinary.get_context_name(): ClickhouseFunctionalStatefulTestReleaseDatabaseOrdinary,
    # ClickhouseFunctionalStatefulTestReleaseDatabaseReplicated.get_context_name(): ClickhouseFunctionalStatefulTestReleaseDatabaseReplicated,
    # ClickhouseFunctionalStatefulTestAsan.get_context_name(): ClickhouseFunctionalStatefulTestAsan,
    # ClickhouseFunctionalStatefulTestUBSan.get_context_name(): ClickhouseFunctionalStatefulTestUBSan,
    # ClickhouseFunctionalStatefulTestDebug.get_context_name(): ClickhouseFunctionalStatefulTestDebug,
    # ClickhouseFunctionalStatefulTestTsan.get_context_name(): ClickhouseFunctionalStatefulTestTsan,
    # ClickhouseFunctionalStatefulTestMsan.get_context_name(): ClickhouseFunctionalStatefulTestMsan,
    # ClickhouseFuzzerDebug.get_context_name(): ClickhouseFuzzerDebug,
    # ClickhouseFuzzerASan.get_context_name(): ClickhouseFuzzerASan,
    # ClickhouseFuzzerMSan.get_context_name(): ClickhouseFuzzerMSan,
    # ClickhouseFuzzerTSan.get_context_name(): ClickhouseFuzzerTSan,
    # ClickhouseFuzzerUBSan.get_context_name(): ClickhouseFuzzerUBSan,
    # ClickhouseDocsCheck.get_context_name(): ClickhouseDocsCheck,
    # ClickhousePerformanceComparison.get_context_name(): ClickhousePerformanceComparison,
    # ClickhouseGlibcCompatibilityCheck.get_context_name(): ClickhouseGlibcCompatibilityCheck,
    # ClickhouseStatelessFunctionalTestTsan.get_context_name(): ClickhouseStatelessFunctionalTestTsan,
    # ClickhouseUnitTest.get_context_name(): ClickhouseUnitTest,
    # ClickhouseUnitTestClang.get_context_name(): ClickhouseUnitTestClang,
    # ClickhouseUnitTestAsan.get_context_name(): ClickhouseUnitTestAsan,
    # ClickhouseUnitTestTsan.get_context_name(): ClickhouseUnitTestTsan,
    # ClickhouseUnitTestMsan.get_context_name(): ClickhouseUnitTestMsan,
    # ClickhouseUnitTestUbsan.get_context_name(): ClickhouseUnitTestUbsan,
    ClickhouseDockerHubPush.get_context_name(): ClickhouseDockerHubPush,
    # ClickhouseIntegrationTestsFlakyCheck.get_context_name(): ClickhouseIntegrationTestsFlakyCheck,
    # ClickhouseStatelessFunctionalTestFlakyCheck.get_context_name(): ClickhouseStatelessFunctionalTestFlakyCheck,
    # ClickhouseSplitBuildSmokeTest.get_context_name(): ClickhouseSplitBuildSmokeTest,
    # ClickhouseStatelessFunctionalTestMsan.get_context_name(): ClickhouseStatelessFunctionalTestMsan,
    # ClickhouseStressTestMsan.get_context_name(): ClickhouseStressTestMsan,
    # ClickhouseStressTestUBsan.get_context_name(): ClickhouseStressTestUBsan,
    # ClickhouseTestflows.get_context_name(): ClickhouseTestflows,
    ClickhouseKeeperJepsen.get_context_name(): ClickhouseKeeperJepsen,
    # ClickhouseSqlancerTest.get_context_name(): ClickhouseSqlancerTest,
}

PR_DESCRIPTION_URL = "https://github.com/yandex/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md"

MARKER_CHECK_TEXT = "Marker check"
MARKER_CHECK_URL = "https://clickhouse.tech/docs/en/development/continuous-integration/"
ROBOT_CLICKHOUSE_STAFF_LOGIN = 'robot-clickhouse'
TOO_OLD_PR_DESCR = "Too old PR, merge with master"


# Returns whether we should look into individual checks for this PR. If not, it
# can be skipped entirely.
def should_run_checks_for_pr(pr_info):
    state_counts = Counter([status.state for status in pr_info.statuses.values()])
    failed_checks = state_counts.get("failure", 0)
    succeeded_checks = state_counts.get("success", 0)
    total_checks = len(pr_info.statuses)

    logging.info("Check statuses: {}, {} total".format(str(state_counts), total_checks))

    # Stop processing if all checks finished.
    marker_check_status = pr_info.statuses.get(MARKER_CHECK_TEXT)
    if marker_check_status and marker_check_status.state == 'success':
        # We can't stop processing the PR just because the marker check has
        # succeeded, because there might be some pending tasks which we'll have
        # to restart ('pending' or 'exception' status). Let's check for them.
        if failed_checks + succeeded_checks == total_checks:
            return False, "All checks finished ({} failed, {} succeeded)".format(failed_checks, succeeded_checks)
    elif marker_check_status and marker_check_status.state == 'failure' and marker_check_status.description == TOO_OLD_PR_DESCR:
        return False, "Too old PR, want run anything for it"

    # Consider the labels and whether the user is trusted.
    force_labels = set(['force tests', 'release', 'jepsen-test']).intersection(pr_info.labels)
    if force_labels:
        return True, "Labeled '{}'".format(', '.join(force_labels))
    else:
        return False, "Not run for non release PRs"

    if 'do not test' in pr_info.labels:
        return False, "Labeled 'do not test'"

    if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info):
        return False, "Needs 'can be tested' label"

    # Stop processing any checks for a PR when Fast Test fails.
    # fast_test_status = pr_info.statuses.get(ClickhouseFastTest.get_context_name())
    # if fast_test_status and fast_test_status.state == 'failure':
    #     return False, "Fast Test has failed"

    # Stop processing if too many checks have failed already.
    max_failed_checks = 5
    if failed_checks >= max_failed_checks:
        return False, "{} checks failed (max {})".format(failed_checks, max_failed_checks)

    return True, "No special conditions apply"


def get_new_clickhouse_images_names(image_names):
    return [x.replace('yandex/clickhouse-', 'clickhouse/') for x in image_names]


class ClickhousePullRequestTrigger(binary_task.LastBinaryTaskRelease, sdk2.Task):
    """
    Task polling github api for pull requests and running checks
    """

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.Group("GitHub parameters") as gh_block:
            github_token_key = sdk2.parameters.List("Token for github interactions")
            github_repo = sdk2.parameters.String("GitHub repo name")

        with sdk2.parameters.Group("Check parameters") as check_block:
            check_commit_time_window = sdk2.parameters.Integer("Hours to check pull requests in past")

        with sdk2.parameters.Group("S3 parameters") as s3_block:
            s3_access_key_id = sdk2.parameters.String("S3 key id")
            s3_access_key = sdk2.parameters.String("S3 key")

        with sdk2.parameters.Group("Other parameters") as other_block:
            ext_params = binary_task.binary_release_parameters(stable=True)

    class Requirements(sdk2.Task.Requirements):
        dns = ctm.DnsType.DNS64
        cores = 1

        class Caches(sdk2.Requirements.Caches):
            pass

    @property
    def binary_executor_query(self):
        return {
            "attrs": {"task_type": "CLICKHOUSE_SANDBOX_BINARY", "released": self.Parameters.binary_executor_release_type},
            "state": [ctr.State.READY]
        }

    DESCRIPTION_CHECK_TEXT = "Description check"

    def stop_checks_for_pr(self, pull_request_number, reason, commits_to_keep={}):
        commit_message = '' if not commits_to_keep else ' excluding checks for commits {}'.format(' '.join(commits_to_keep))
        logging.info("Will stop checks for PR #{}{}. Reason '{}'".format(pull_request_number, commit_message, reason))

        # Reference for find():
        # https://sandbox.yandex-team.ru/docs/html/sdk2.internal.html?highlight=task%20find#sdk2.internal.task.Task.find
        tasks = sdk2.Task.find(
            author=ROBOT_CLICKHOUSE_STAFF_LOGIN,
            owner=self.owner,
            input_parameters={"pull_request_number": pull_request_number},
            status=(ctt.Status.EXECUTING, ctt.Status.WAIT_TASK, ctt.Status.ENQUEUED, ctt.Status.ENQUEUING, ctt.Status.ASSIGNED),
            children=True,
        ).limit(100)

        logging.info("Found %ld tasks with same pull request", tasks.count)

        for task in tasks:
            logging.info("Looking at the existing task id %s", task.id)

            params = task.Parameters
            if params.commit_sha not in commits_to_keep:
                try:
                    logging.info("Stopping task %s for prev commit %s from PR #%s", task.id, params.commit_sha, pull_request_number)
                    task.stop()
                except:
                    logging.info("Something went wrong stopping task")

    def check_pr_description(self, pull_request):
        description = pull_request.body

        lines = [line for line in map(lambda x: x.strip(), description.split('\n') if description else [])]
        lines = [re.sub(r'\s+', ' ', l) for l in lines]

        category = ''
        entry = ''

        i = 0
        while i < len(lines):
            if re.match(r'(?i)^[>*_ ]*change\s*log\s*category', lines[i]):
                i += 1
                if i >= len(lines):
                    break
                # Can have one empty line between header and the category
                # itself. Filter it out.
                if not lines[i]:
                    i += 1
                    if i >= len(lines):
                        break
                category = re.sub(r'^[-*\s]*', '', lines[i])
                i += 1

                # Should not have more than one category. Require empty line
                # after the first found category.
                if i >= len(lines):
                    break
                if lines[i]:
                    second_category = re.sub(r'^[-*\s]*', '', lines[i])
                    result_status = "More than one changelog category specified: '" + category + "', '" + second_category + "'"
                    return result_status[:140]

            elif re.match(r'(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)', lines[i]):
                i += 1
                # Can have one empty line between header and the entry itself. Filter it out.
                if i < len(lines) and not lines[i]:
                    i += 1
                # All following lines until empty one are the changelog entry.
                entry_lines = []
                while i < len(lines) and lines[i]:
                    entry_lines.append(lines[i])
                    i += 1
                entry = ' '.join(entry_lines)
                # Don't accept changelog entries like '...'.
                entry = re.sub(r'[#>*_.\- ]', '', entry)
            else:
                i += 1

        if not category:
            return 'Changelog category is empty'

        # Filter out the PR categories that are not for changelog.
        if re.match(r'(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)', category):
            return ''

        if not entry:
            return "Changelog entry required for category '{}'".format(category)

        return ''

    def get_best_token(self):
        max_rest_rate = 0
        best_token = ''
        self.best_github_token_key = "clickhouse-robot-github-token"
        for vault_key in self.Parameters.github_token_key:
            github_token = sdk2.Vault.data(vault_key)
            gh_helper = GithubHelper(github_token)
            rate_limit = gh_helper.get_rest_api_limit()
            logging.info(
                "Remaining rate limit for '%s' is %s (max %s)",
                vault_key, rate_limit, max_rest_rate)
            if rate_limit > max_rest_rate:
                max_rest_rate = rate_limit
                best_token = github_token
                self.best_github_token_key = vault_key
        return best_token if best_token else github_token

    def shuffle_checks_with_order(self, checks):
        baskets = {}
        for check_name, check in checks.iteritems():
            check_order = check.order()
            if check_order not in baskets:
                baskets[check_order] = []
            baskets[check_order].append((check_name, check))

        result = []
        orders = baskets.keys()
        orders.sort()
        for order in orders:
            random.shuffle(baskets[order])
            result += baskets[order]

        return result

    def run_fake_checks(self, pr_infos):
        for pr_info in pr_infos:
            if 'release' not in pr_info.labels and 'pr-backport' not in pr_info.labels:
                commit_status = pr_info.statuses.get(self.DESCRIPTION_CHECK_TEXT, None)
                description_report = self.check_pr_description(pr_info.pygithub_pr)[:139]
                if not commit_status or (description_report and description_report != commit_status.description):
                    if description_report:
                        pr_info.pygithub_commit.create_status(state="failure", context=self.DESCRIPTION_CHECK_TEXT, description=description_report[:140], target_url=PR_DESCRIPTION_URL)
                    else:
                        pr_info.pygithub_commit.create_status(state="success", context=self.DESCRIPTION_CHECK_TEXT, description="Description OK", target_url=PR_DESCRIPTION_URL)

    def finish_markers(self, pr_infos):
        for pr_info in pr_infos:
            logging.info("Checking for finish PR #%s @ %s", str(pr_info.number), pr_info.sha)

            # Marker check can only go to success from pending. Ensure this.
            marker_check = pr_info.statuses.get(MARKER_CHECK_TEXT)
            if not marker_check or marker_check.state != 'pending':
                logging.info("PR #{} @ {} doesn't have pending Marker check".format(pr_info.number, pr_info.sha))
                continue

            # Print which checks we expect to finish.
            github_checks = set(pr_info.statuses.keys())

            # Ignore some GitHub statuses we don't care about.
            github_checks_to_ignore = set([
                MARKER_CHECK_TEXT,
                # Might have a message about failing to clone submodules, this is OK
                ClickhousePullRequestPatcher.get_context_name(),
                ])

            # Yandex synchronization checks are started elsewhere, we don't care
            # about them.
            for x in github_checks:
                if x.startswith('Yandex'):
                    github_checks_to_ignore.add(x)

            github_checks = github_checks.difference(github_checks_to_ignore)

            tasks = set(ON_COMMIT_TASKS.keys())
            # Remove some tasks we don't care about.
            tasks_to_ignore = set([
                # It doesn't publish the status to GitHub.
                ClickhousePullRequestPatcher.get_context_name(),
                ])
            # Remove the tasks we don't have to run
            for task_name, task_class in ON_COMMIT_TASKS.items():
                result = task_class.need_to_run(pr_info)
                logging.info(
                    "Task %s for PR %s: need to run %s, description '%s', post statuses %s",
                    task_name, pr_info.number, result.need_to_run,
                    result.description, result.post_statuses)
                if not result.need_to_run:
                    tasks_to_ignore.add(task_name)

            tasks = tasks.difference(tasks_to_ignore)

            waiting = tasks.difference(github_checks)
            unexpected = github_checks.difference(tasks)
            logging.info("Waiting for '{}', unexpected '{}', ignored GitHub checks '{}', ignored tasks '{}'".format(waiting, unexpected, github_checks_to_ignore, tasks_to_ignore))

            if len(waiting) == 0:
                pr_info.pygithub_commit.create_status(state="success", context=MARKER_CHECK_TEXT, description="All checks started", target_url=MARKER_CHECK_URL)
            elif len(waiting) <= 3:
                # Show what we're waiting for.
                pr_info.pygithub_commit.create_status(state="pending", context=MARKER_CHECK_TEXT, description=("Waiting to start " + ', '.join(waiting))[:140], target_url=MARKER_CHECK_URL)

    # Returns whether we should process more checks or it's enough for this run
    # of the trigger. Used to keep the trigger latency down, by limiting the
    # number of tasks started per one tirgger run.
    def process_check_for_pr(self, repo, check_context, task_type, pr_info):
        now = datetime.datetime.utcnow()
        logging.info("Processing '%s' for PR #%s @ %s", check_context, pr_info.number, pr_info.sha)

        if check_context in pr_info.statuses:
            status = pr_info.statuses[check_context]
            if status.state in ("failure", "success"):
                logging.info("Check '{}' already completed for PR #{} @ {} with status '{}'".format(check_context, pr_info.number, pr_info.sha, status.state))
                return True
            elif status.state == "pending":
                if now - status.updated_at < datetime.timedelta(hours=3):
                    logging.info("Check '{}' is recently pending (since {}, now is {}), will not recheck for PR #{} @ {}'".format(
                        check_context, status.updated_at, now, pr_info.number, pr_info.sha))
                    return True
                logging.info("Check '{}' is pending but too old (since {}, now is {}), will recheck for PR #{} @ {}'".format(
                    check_context, status.updated_at, now, pr_info.number, pr_info.sha))
            # exception status, meaning we have to rerun the check

        logging.info("Checking, do we need this task to run")
        need_to_run_desc = task_type.need_to_run(pr_info)

        if need_to_run_desc.need_to_run:
            logging.info("Need to run task %s for commit %s from PR %s", check_context, pr_info.sha, pr_info.number)
        else:
            logging.info("Not running task %s for commit %s from PR %s because %s", check_context, pr_info.sha, pr_info.number, need_to_run_desc.description)
            # We won't post the green status to GitHub for most of the the tasks
            # we didn't run, because it is confusing. The user can see that all
            # checks were started from the marker check description. We make an
            # exception for flaky checks, because the rule for them is less
            # obvious.
            if need_to_run_desc.post_statuses:
                pr_info.pygithub_commit.create_status(state="success", context=check_context, description=need_to_run_desc.description[:140])
            return True

        if not task_type.get_resources(pr_info.pygithub_commit, repo, pr_info.pygithub_pr):
            logging.info("Cannot find resources for task %s", task_type.get_context_name())
            return True

        if task_type.get_images_names():
            # We are migrating from yandex repo to clickhouse repo.
            # For migration period we may have images both in yandex repo and in clickhouse repo
            try:
                changed_images_for_pr, dockerhub_repo_name = self.docker_images_helper.get_changed_images_for_pr(repo, pr_info)
            except Exception as ex:
                logging.info("Got exception during parsing images %s", ex)
                pr_info.pygithub_commit.create_status(state="failure", context=MARKER_CHECK_TEXT, description=TOO_OLD_PR_DESCR)
                return True

            if changed_images_for_pr is None:
                logging.info("Repo still not downloaded, cannot check docker image for task %s", task_type.get_context_name())
                return True

            # Wait for the updated versions of docker images to become ready.
            # We don't do this for Fast Test, because we must run it as soon
            # as possible. The drawback is that it will always use the image
            # from master. If you change this, don't forget to also change
            # the Push To Dockerhub task not to wait for Fast Test.
            if dockerhub_repo_name == 'yandex':
                logging.info("Still old dockerhub yandex/ repo")
                images = task_type.get_images_names()
            else:
                logging.info("New clickhouse/ repo")
                images = get_new_clickhouse_images_names(task_type.get_images_names())

            # Trying new images first and then fallback to older images
            docker_images_with_versions = self.docker_images_helper.get_changed_images_with_versions(
                images, changed_images_for_pr, pr_info.pygithub_pr, pr_info.pygithub_commit, task_type.get_context_name())

            if docker_images_with_versions is None:
                logging.info("Docker images are not ready for task '{}' for PR #{} @ {}".format(check_context, pr_info.number, pr_info.sha))
                return True
        else:
            docker_images_with_versions = {}

        logging.info("Docker images versions %s", docker_images_with_versions)

        # Check if there are any tasks already running for this check for this PR.
        # We also have to check for SUCCESS task status, because the task might have
        # finished after we checked the GitHub check status.
        tasks = sdk2.Task.find(
            task_type,
            input_parameters={"pull_request_number": pr_info.number, "commit_sha": pr_info.sha},
            status=(ctt.Status.EXECUTING, ctt.Status.WAIT_TASK, ctt.Status.ENQUEUED, ctt.Status.ENQUEUING, ctt.Status.ASSIGNED, ctt.Status.FINISHING, ctt.Status.SUCCESS),
            children=True,
        ).limit(1)

        logging.info("Found %ld tasks with same pull request and commit sha", tasks.count)

        if tasks.count > 0:
            task = tasks.first()
            logging.info("Task '{}' id {} status {} updated at {} now {} already exists for PR #{} @ {}".format(
                check_context, task.id, task.status, task.updated, now, pr_info.number, pr_info.sha))
            # Some broken tasks might show "success" status, but actually fail
            # with an error. This shouldn't happen, but it happens and we need a
            # way to restart them. If a task is "succeeded" for a long time already,
            # but didn't post a status to github, and it had to post it, restart
            # it. In other cases, wait for it.
            assert need_to_run_desc.need_to_run
            if (task.status != ctt.Status.SUCCESS
                    or not need_to_run_desc.post_statuses
                    or task.updated > now - datetime.timedelta(hours=1)):
                logging.info("Will wait for the existing task")
                return True

        logging.info("Starting task of type '%s' for commit %s and pull request №%ld", check_context, pr_info.sha, pr_info.number)

        priority = task_type.sandbox_task_priority()
        if 'pr-backport' in pr_info.labels:
            # Backport PRs are a secondary, almost non-interactive workflow,
            # so we don't care about their check latency.
            priority = ctt.Priority(ctt.Priority.Class.SERVICE, ctt.Priority.Subclass.LOW)

        task = task_type(
            self,
            description="Child of ClickhousePullRequestTrigger task {}".format(self.id),
            github_token_key=self.best_github_token_key,
            github_repo=self.Parameters.github_repo,
            s3_access_key_id=self.Parameters.s3_access_key_id,
            s3_access_key=self.Parameters.s3_access_key,
            docker_images_with_versions=docker_images_with_versions,
            commit_sha=pr_info.sha,
            pull_request_number=pr_info.number,
            priority=priority
        )
        logging.info("Created task object")

        task.enqueue()
        logging.info("Enqueued the task")

        if task.post_statuses() == PostStatuses.ALWAYS:
            task.update_status(text="Started", url=get_nda_task_url(task.id), commit=pr_info.pygithub_commit)
            logging.info("Pending status created")

        # Limit the number of tasks started per trigger, so that it doesn't run
        # for too long, and high priority tasks are started soon for updated PRs.
        self.started_tasks += 1
        if self.started_tasks > 1000:
            logging.info("Won't start more tasks in this trigger run to reduce latency (started {} tasks already)".format(self.started_tasks))
            return False

        return True

    def _process_check_star(self, arguments):
        return self.process_check(*arguments)

    def on_execute(self):
        venv = environments.VirtualEnvironment()

        with venv as venv:
            if self.Parameters.binary_executor_release_type == "none":
                environments.PipEnvironment('pygithub', venv=venv).prepare()
                environments.PipEnvironment('boto3', venv=venv).prepare()
                logging.debug("Set pip environment for nonbinary release")

            logging.getLogger().setLevel(logging.INFO)
            github_token = self.get_best_token()
            self.gh_helper = GithubHelper(github_token)
            self.docker_images_helper = DockerImageHelper(self.gh_helper)
            rate_is_ok, message = self.gh_helper.check_rate_limit()
            if not rate_is_ok:
                self.Parameters.description = "[can't run :(] " + message
                return

            self.changed_images_for_prs = {}

            repo = self.gh_helper.get_repo(self.Parameters.github_repo)

            date_from = datetime.datetime.now() - datetime.timedelta(hours=self.Parameters.check_commit_time_window)
            logging.info("Will search for pull requests since %s", date_from.strftime("%Y-%m-%d %H:%M:%S"))

            pulls_open = repo.get_pulls(state="open", sort="updated", direction="desc")
            pr_infos = []
            for pygithub_pr in pulls_open:
                if pygithub_pr.updated_at < date_from:
                    logging.info("PR #{} is updated earlier ({}) than cutoff date ({}), will not process more PRs".format(
                        pygithub_pr.number, pygithub_pr.updated_at, date_from))
                    break

                logging.info("Found PR #{}".format(pygithub_pr.number))

                pygithub_commit = repo.get_commit(pygithub_pr.head.sha)
                logging.info("Downloaded latest commit {} for PR #{}".format(pygithub_pr.head.sha, pygithub_pr.number))

                pr_info = PRInfo(pygithub_pr, pygithub_commit)

                # Cherry-pick PRs are special in that they merge a commit from
                # master to some other branch. If we add any status to such
                # commit, this status will confusingly appear in the master
                # branch as well. So just skip all cherry-pick PRs early and
                # don't even update the Marker Check for them.
                if 'release' not in pr_info.labels and 'jepsen-test' not in pr_info.labels:
                    logging.info("Will ignore all non-release PRs #{}".format(pr_info.number))
                    continue

                should_run, reason = should_run_checks_for_pr(pr_info)

                logging.info("Should {} run checks for PR #{}: '{}'".format(
                    '' if should_run else 'not ', pr_info.number, reason))

                # Describe in GitHub 'Marker check' status whether we're going
                # to check the PR, and why. This is not just an informational
                # message, but also influences what this trigger does -- e.g. we
                # stop the tasks only if it was pending.
                # Empty state turns into 'failure' or 'pending'; the 'failure'
                # and 'pending' states can turn into each other depending on
                # conditions; and 'success' is a final state we don't touch (set
                # elsewhere). The 'exception' state shouldn't happen, we also
                # don't touch it.
                marker_check = pr_info.statuses.get(MARKER_CHECK_TEXT)
                if not should_run:
                    # Stop the tasks if the check was pending.
                    if marker_check and marker_check.state == "pending":
                        self.stop_checks_for_pr(pr_info.number, reason)

                    # Update the status to failure if it was empty or pending.
                    # Don't touch other statuses to avoid confusion.
                    if not marker_check or marker_check.state == "pending":
                        pr_info.pygithub_commit.create_status(state="failure", context=MARKER_CHECK_TEXT, description=reason[:140], target_url=MARKER_CHECK_URL)
                else:
                    # 1) Publish the marker check status if it's not present yet.
                    # 2) Renew the marker check status if the check was stopped,
                    #    but the conditions changed -- e.g. the 'force tests'
                    #    tag was added.
                    if not marker_check or marker_check.state == "failure":
                        pr_info.pygithub_commit.create_status(state="pending", context=MARKER_CHECK_TEXT, description="Starting checks ({})".format(reason)[:140], target_url=MARKER_CHECK_URL)
                        self.stop_checks_for_pr(pr_info.number, 'Saving sandbox quota', set([pr_info.sha]))

                if not should_run:
                    continue

                pr_infos.append(pr_info)

            if not pr_infos:
                logging.info("No PRs to process")
                return

            # Shuffle the PRs so that we always start from a different point and still
            # make progress if something fails consistently. Same for the order of
            # individual checks -- the dependencies are handled by their
            # get_resources() methods.
            random.shuffle(pr_infos)
            logging.info("Found pull requests: " + ', '.join([str(pr.number) for pr in pr_infos]))
            checks = self.shuffle_checks_with_order(ON_COMMIT_TASKS)

            logging.info("Running fake checks on PRs")
            self.run_fake_checks(pr_infos)
            logging.info("Run of fake checks finished")

            self.started_tasks = 0

            # The next special kind of task is Fast Test, which has no dependencies
            # and which we try to start as soon as possible.
            # for pr_info in pr_infos:
            #     self.process_check_for_pr(repo, ClickhouseFastTest.get_context_name(), ClickhouseFastTest, pr_info)

            # The next special task is Repo Cloner -- it prepares the snapshot of
            # repository that is used for all other tasks.
            for pr_info in pr_infos:
                self.process_check_for_pr(repo, ClickhousePullRequestPatcher.get_context_name(), ClickhousePullRequestPatcher, pr_info)

            # No point in running any other tasks for PRs that don't yet have
            # the repository snapshot. We can tell it by that the list of changed
            # files is not known for the PR.
            pr_infos_with_repo = []
            for pr_info in pr_infos:
                if pr_info.changed_files is None:
                    logging.info("No repository snapshot for PR #{} @ {}".format(pr_info.number, pr_info.sha))
                else:
                    pr_infos_with_repo.append(pr_info)

            # Next, run all other tasks for commits that already have the repository
            # snapshot.
            should_continue = True
            for check_context, TaskType in checks:
                for pr_info in pr_infos_with_repo:
                    if not self.process_check_for_pr(repo, check_context, TaskType, pr_info):
                        should_continue = False
                        break

                if not should_continue:
                    break

            self.finish_markers(pr_infos)
