# coding: utf-8
import os
import json
import logging
import time

import sandbox.common.types.client as ctc

from sandbox import sdk2
from sandbox.sdk2.helpers import subprocess as sp
from sandbox.common.types.misc import DnsType
from sandbox.common.errors import VaultError

from sandbox.projects.stop_leak.common.yahec import HecSender
from sandbox.projects.stop_leak.common import sectools
from sandbox.projects.stop_leak.common import http
from sandbox.projects.stop_leak.common.task_helper import find_state
from sandbox.projects.stop_leak.resource_types import HectorState, HectorResults


class BaseHectorTask(sdk2.Task):
    vcs = 'base'
    hector_command = 'version'
    use_ssh = False
    parse_stdout = True
    hector_path = ''
    work_path = ''
    results_path = ''
    ant_path = ''
    state_in_path = ''
    state_out_path = ''
    ant_secret_session = None
    __staff_github_repos = []

    class Requirements(sdk2.Task.Requirements):
        ram = 6 << 10
        cores = 16
        dns = DnsType.DNS64
        client_tags = ctc.Tag.Group.LINUX

    class Parameters(sdk2.Task.Parameters):
        hector_version = sdk2.parameters.String('Hector version', default='latest')
        store_results = sdk2.parameters.Bool('Store results in Splunk/YT', default=True)
        compromized = sdk2.parameters.Bool('Compromized secrets', default=False)
        valid_only = sdk2.parameters.Bool('Report only validated secrets', default=True)
        debug_mode = sdk2.parameters.Bool('Enable debug mode', default=False)
        concurrency = sdk2.parameters.Integer('Concurrency level', default=8)
        load_state = sdk2.parameters.Bool('Load hector state from resources', default=False)
        save_state = sdk2.parameters.Bool('Save hector state to resources', default=False)
        save_to_res = sdk2.parameters.Bool('Save hector results to SB resources', default=False)
        with sdk2.parameters.Output():
            state_id = sdk2.parameters.Integer('Latest since')

    def _prepare_env(self):
        pass

    def _hector_command(self):
        # Hector params
        hector_cmd = [
            self.hector_path,
            self.hector_command,
            '--concurrency=%d' % self.Parameters.concurrency,
            '--work=' + self.work_path,
            '--result=' + self.results_path
        ]
        if self.parse_stdout:
            hector_cmd.append('--parse-stdout')
        if self.use_ssh:
            hector_cmd.append('--use-ssh')
        if self.state_in_path:
            hector_cmd.append('--state-in=%s' % self.state_in_path)
        if self.state_out_path:
            hector_cmd.append('--state-out=%s' % self.state_out_path)
        if self.Parameters.debug_mode:
            hector_cmd.append('--count-limit=100')

        # Ant-secret params
        ant_secret_cmd = [
            '--',
            self.ant_path,
            # '--skip-version-check',
            '--status-code=0',
            '--num-threads=2',
            '--format=hector'
        ]

        if self.Parameters.valid_only:
            ant_secret_cmd.append('--valid-only')
        else:
            ant_secret_cmd.append('--validate')

        return hector_cmd, ant_secret_cmd

    def _hector_env(self):
        env = os.environ.copy()
        try:
            env['ANT_INTERNAL_TOKEN'] = sdk2.Vault.data('STOP_LEAK_ANT_INTERNAL_TOKEN')
        except VaultError:
            logging.info("can't find AntSecret internal token, tun w/o it")
        return env

    def _normalize_author(self, author):
        return author

    def _normalize_owner(self, author):
        return author

    def _parse_report(self, report, repo):
        problem = report.get('problem', {})
        additional = problem.get('additional', {})
        sha1 = ''
        if 'sha1' in additional:
            sha1 = additional['sha1']
            del additional['sha1']

        # backward compatibility
        if 'type' in problem:
            additional['secret_validator'] = problem['type']

        return dict(
            type=problem.get('type', None),
            secret=problem.get('secret', None),
            sha1=sha1,
            url=report.get('url', ''),
            private=repo.get('private', False),
            repo_name=repo.get('name', None),
            ignore=False,
            vcs=self.vcs,
            user=self._normalize_author(report.get('author', '')),
            state_id=self.Parameters.state_id,
            holders=self._normalize_owner(repo.get('owners', [])),
            additional=additional,
            secret_validated=problem.get('validated', False)
        )

    def _run_hector(self, cmd):
        with sdk2.helpers.ProcessLog(self, logger=logging.getLogger('hector')) as pl:
            env = self._hector_env()
            sp.check_call(cmd, env=env, stdout=pl.stdout, stderr=pl.stderr)

    def _load_state(self):
        state_in_path = find_state(self.vcs)
        logging.info('latest state path: %s', state_in_path)
        return state_in_path

    def _save_state_path(self):
        return str(self.path('hector_state.json'))

    def _save_state(self):
        sdk2.ResourceData(
            HectorState(
                self,
                'Hector %s state' % self.vcs,
                self.state_out_path,
                state_type=self.vcs
            )
        )
        with open(self.state_out_path, 'rt') as f:
            data = f.read().strip()
            if data:
                state_id = json.loads(data).get('init_at', 0)
                return state_id
        return 0

    def _save_to_res(self):
        results_archive_path = str(self.path('results.tar.gz'))

        if not os.path.isdir(self.results_path) or not os.listdir(self.results_path):
            return

        with sdk2.helpers.ProcessLog(self, logger=logging.getLogger('create results archive')) as pl:
            sp.check_call('cd {path} && tar cvzf {tgz} *'.format(path=self.results_path, tgz=results_archive_path),
                          shell=True, stderr=pl.stdout, stdout=pl.stdout)
        sdk2.ResourceData(
            HectorResults(
                self,
                'Hector %s results' % self.vcs,
                results_archive_path,
                results_type=self.vcs
            )
        )

    @property
    def staff_github_repos(self):
        if self.__staff_github_repos:
            return self.__staff_github_repos

        session = http.requests_retry_session()
        session.headers.update({'Authorization': 'OAuth ' + sdk2.Vault.data('STOP_LEAK_STAFF_TOKEN')})
        url = 'https://staff-api.yandex-team.ru/v3/persons?_fields=login,accounts&accounts.type=github'
        result = []
        while url:
            res = session.get(url)
            data = res.json()
            for r in data.get('result', []):
                login = r.get('login', 'unknown')
                for acc in r.get('accounts', []):
                    if acc.get('type') != 'github' or not acc.get('value'):
                        continue
                    result.append((login, acc.get('value').lower()))

            links = data.get('links', None)
            if not links:
                break
            url = links.get('next', None)

        self.__staff_github_repos = result
        return result

    def compromise(self, secret):
        self.ant_secret_session.post(
            'https://ant.sec.yandex-team.ru/api/v1/touch-known',
            json={
                'secret': secret,
                'meta': {
                    'vcs': self.vcs,
                    'task_id': self.id,
                }
            }
        )

    def on_execute(self):
        self._prepare_env()
        self.hector_path = sectools.download('hector', self.Parameters.hector_version)
        self.ant_path = sectools.download_secret_search()

        self.results_path = str(self.path('results'))
        os.mkdir(self.results_path)
        logging.info('results path: ' + self.results_path)

        if self.ramdrive:
            self.work_path = str(self.ramdrive.path / 'work')
        else:
            self.work_path = str(self.path('work'))
        os.mkdir(self.work_path)
        logging.info('working path: ' + self.work_path)

        self.state_in_path = None
        if self.Parameters.load_state:
            self.state_in_path = self._load_state()

        self.state_out_path = None
        if self.Parameters.save_state:
            self.state_out_path = self._save_state_path()

        hector_cmd, ant_secret_cmd = self._hector_command()
        cmd = hector_cmd + ant_secret_cmd
        if self.use_ssh:
            with sdk2.ssh.Key(self, self.owner, 'STOP_LEAK_SSH_KEY'):
                self._run_hector(cmd)
        else:
            self._run_hector(cmd)

        if self.Parameters.debug_mode:
            self.suspend()

        if self.Parameters.save_to_res:
            self._save_to_res()

        state_id = int(time.time())
        if self.state_out_path:
            state_id = self._save_state()
        self.Parameters.state_id = state_id

        if not self.Parameters.store_results:
            return

        mark_compromized = self.Parameters.compromized
        if mark_compromized:
            self.ant_secret_session = http.requests_retry_session()
            self.ant_secret_session.headers.update({'X-Internal-Token': sdk2.Vault.data('STOP_LEAK_ANT_INTERNAL_TOKEN')})

        with HecSender(sdk2.Vault.data('STOP_LEAK_HEC_TOKEN')) as log:
            for file in os.listdir(self.results_path):
                if not file.endswith('.json'):
                    continue

                with open(os.path.join(self.results_path, file), 'rt') as f:
                    result = json.loads(f.read())
                    for report in result.get('result', []):
                        prepared = self._parse_report(report, result.get('repo', {}))
                        log.send(**prepared)
                        if mark_compromized and 'secret' in prepared:
                            secret = prepared.get('secret', None)
                            # TODO(buglloc): ugly!
                            if not secret:
                                secret = prepared.get('additional', {}).get('ssh_fingerprint', None)
                            if not secret:
                                logging.info("can't find secret in ant-secret report: %s" % json.dumps(report))
                            else:
                                self.compromise(secret)
