import os
import logging
import tempfile
import json
import httplib
import hashlib
import yaml
import re
from sandbox import sdk2
from sandbox.projects.stop_leak.base_hector_task.hector_git import BaseHectorGitTask
from sandbox.projects.stop_leak.common import sectools

APPSEC_DISCOVERY_URL = "appsec-discovery.sec.yandex-team.ru"
APPSEC_DISCOVERY_ROUTE = "/api/for_collectors/add"
SCRIPT_NAME = "search_for_actions.py"
DISCOVERY_PROJECT_ID = 135
SAFE_ACTION_PROVIDERS = [
    "actions", "docker", "github", "golangci", "bazelbuild", "bufbuild", "."
]

SHA_REGEX = r"[a-f0-9]{40}"


def is_unsafe(action):
    parts = action.split("/")
    provider = parts[0]
    version = None if len(
        parts[-1].split("@")) < 2 else parts[-1].split("@")[1]
    return (provider not in SAFE_ACTION_PROVIDERS) and (re.match(
        SHA_REGEX, version) is None)


def dict_hash(obj):
    return hashlib.md5(json.dumps(obj, sort_keys=True).encode()).hexdigest()


class GHWorkflow:
    def __init__(self, filename, repo_name):
        self.filename = filename
        self.repo_name = repo_name
        self.name = os.path.splitext(os.path.split(filename)[-1])[0]
        with open(filename, 'r') as f:
            self.content = f.read()
        self.obj = yaml.safe_load(self.content)

    def unsafe_external_actions(self):
        unsafe_external_actions = list()
        for job_name, job in self.obj.get("jobs", {}).iteritems():
            for step in job.get("steps", []):
                uses = step.get("uses", None)
                if uses and is_unsafe(uses):
                    unsafe_external_actions.append({
                        "workflow_name":
                        self.name,
                        "container":
                        step.get("container", None),
                        "runs-on":
                        job.get("runs-on", None),
                        "step_name":
                        step.get("name", None),
                        "job_name":
                        job_name,
                        "action":
                        uses,
                        "repo_name":
                        self.repo_name
                    })
        return unsafe_external_actions

    def __str__(self):
        return "{} content:\n{}\n".format(self.name, self.content)


class GHWorkflowFolder:
    def __init__(self, repo_name, actions_folder):
        self.repo_name = repo_name
        self.workflows = list()
        for file in os.listdir(actions_folder):
            if os.path.isfile(os.path.join(actions_folder, file)):
                try:
                    self.workflows.append(
                        GHWorkflow(os.path.join(actions_folder, file),
                                   self.repo_name), )
                except:
                    print("Unable to parse {} from {} action".format(
                        file, repo_name))
            self

    def print_status(self):
        print("Repo {} has following workflows:".format(self.repo_name))
        for workflow in self.workflows:
            print(workflow)

    def get_discovery_objects(self):
        objects = []
        for workflow in self.workflows:
            for action in workflow.unsafe_external_actions():
                objects.append({
                    "uuid":
                    dict_hash(action),
                    "type":
                    "resource",
                    "value":
                    "{} in {}".format(action["action"], self.repo_name),
                    "fields":
                    action,
                    "ancestors": []
                })
        return objects


class VertisGHCheker(BaseHectorGitTask):
    vcs = 'github'
    hector_command = 'gh'
    use_ssh = True
    parse_stdout = False
    # hardcoded
    gh_url = "https://api.github.com/"
    results_path = ''
    script_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), SCRIPT_NAME)
    results_tmp = tempfile.mkdtemp()

    class Parameters(BaseHectorGitTask.Parameters):
        repository_name = sdk2.parameters.String('Repository name', default='')
        projects_name = sdk2.parameters.String('Project name',
                                               default='YandexClassifieds')
        github_url = sdk2.parameters.String('Github base url',
                                            default='https://api.github.com/')

    def _hector_env(self):
        env = super(VertisGHCheker, self)._hector_env()
        logging.info('setting ghtoken env')
        env['GH_TOKEN'] = sdk2.Vault.data(self.owner, 'STOP_LEAK_GITHUB_TOKEN')
        return env

    def _prepare_env(self):
        super(VertisGHCheker, self)._prepare_env()

    def _hector_command(self):
        hector_cmd, parser_cmd = super(VertisGHCheker, self)._hector_command()
        parser_cmd = ['--', 'python', self.script_path, self.results_tmp]
        hector_cmd.append('--projects=' + self.Parameters.projects_name)
        hector_cmd.append('--gh-url=' + self.Parameters.github_url)
        hector_cmd.append('--size-limit=0')
        hector_cmd.append('--private-only')
        return hector_cmd, parser_cmd

    def _submit_to_discovery(self):
        output_path = self.results_tmp
        repos_with_actions = [f for f in os.listdir(output_path)]
        d = []
        for repo in repos_with_actions:
            action_obj = GHWorkflowFolder(repo,
                                          os.path.join(output_path, repo))
            action_obj.print_status()
            d.extend(action_obj.get_discovery_objects())
        connection = httplib.HTTPSConnection(APPSEC_DISCOVERY_URL)
        headers = {'Content-type': 'application/json'}
        json_foo = json.dumps([{
            "project_id": DISCOVERY_PROJECT_ID,
            "objects": d,
        }])
        logging.info("submitting {} to discovery".format(json_foo))
        connection.request('POST', APPSEC_DISCOVERY_ROUTE, json_foo, headers)
        response = connection.getresponse()
        logging.info("Response {}".format(response.status))
        connection.close()
        return

    def on_execute(self):
        self._prepare_env()
        self.hector_path = sectools.download('hector',
                                             self.Parameters.hector_version)
        self.results_path = str(self.path('results'))
        os.mkdir(self.results_path)
        logging.info('results path: ' + self.results_path)

        if self.ramdrive:
            self.work_path = str(self.ramdrive.path / 'work')
        else:
            self.work_path = str(self.path('work'))

        os.mkdir(self.work_path)
        logging.info('working path: ' + self.work_path)
        self.state_in_path = None
        if self.Parameters.load_state:
            self.state_in_path = self._load_state()
        self.state_out_path = None
        if self.Parameters.save_state:
            self.state_out_path = self._save_state_path()
        hector_cmd, parser_cmd = self._hector_command()
        cmd = hector_cmd + parser_cmd
        logging.info('Results cmd =' + ' '.join(cmd))
        if self.use_ssh:
            with sdk2.ssh.Key(self, self.owner, 'STOP_LEAK_SSH_KEY'):
                self._run_hector(cmd)
        else:
            self._run_hector(cmd)
        logging.info('saving resources')
        self._submit_to_discovery()
