# coding: utf-8
import os
import json
import logging
import requests

import sandbox.common.types.client as ctc

from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.sdk2.helpers import subprocess as sp

from sandbox.projects.stop_leak.common.yahec import HecSender
from sandbox.projects.stop_leak.common import sectools
from sandbox.projects.stop_leak.resource_types import AntSecretResults
from sandbox.projects.stop_leak.common import http


VCS = 'arcadia'


class AuditArcadiaSec(sdk2.Task):
    ant_secret_session = None

    class Parameters(sdk2.Task.Parameters):
        excludes = sdk2.parameters.String('Excludes', default='.git,.hg,.svn,node_modules,.venv,ut')
        store_results = sdk2.parameters.Bool('Send results to Splunk', default=True)
        compromized = sdk2.parameters.Bool('Compromise secrets', default=False)
        debug_mode = sdk2.parameters.Bool('Enable debug mode', default=False)
        not_ignore = sdk2.parameters.Bool('Ignore ".secretsignore" files', default=False)
        save_to_res = sdk2.parameters.Bool('Save ant-secret results in resources', default=False)
        valid_only = sdk2.parameters.Bool('Report only validated secrets', default=True)

    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('pyparsing', use_wheel=True)
        ]
        client_tags = ctc.Tag.Group.LINUX

    def compromise(self, secret):
        self.ant_secret_session.post(
            'https://ant.sec.yandex-team.ru/api/v1/touch-known',
            json={
                'secret': secret,
                'meta': {
                    'vcs': VCS,
                    'task_id': self.id,
                }
            }
        )

    def on_execute(self):
        ant_path = sectools.download_secret_search()

        cmd = [ant_path, '--status-code=0', '--format=jsonlines']
        if self.Parameters.valid_only:
            cmd.append('--valid-only')
        else:
            cmd.append('--validate')
        if self.Parameters.excludes:
            cmd.append('--excludes=' + self.Parameters.excludes)
        if self.Parameters.not_ignore:
            cmd.append('--not-ignore')

        svn_root = sdk2.svn.Arcadia.get_arcadia_src_dir('arcadia:/arc/trunk/arcadia')
        cmd.append(svn_root)

        env = os.environ.copy()
        env['ANT_INTERNAL_TOKEN'] = sdk2.Vault.data('STOP_LEAK_ANT_INTERNAL_TOKEN')

        ant_out_path = str(self.path('ant-secret.json'))
        with open(ant_out_path, 'wt') as fd:
            with sdk2.helpers.ProcessLog(self, logger=logging.getLogger('ant-secret')) as pl:
                sp.check_call(cmd, env=env, stdout=fd, stderr=pl.stdout)

        if self.Parameters.debug_mode:
            self.suspend()

        if self.Parameters.save_to_res:
            ant_resource = sdk2.ResourceData(
                AntSecretResults(
                    self,
                    'Ant-Secret search results',
                    ant_out_path
                )
            )
            ant_resource.ready()

        if not self.Parameters.store_results:
            return

        mark_compromized = self.Parameters.compromized
        if mark_compromized:
            self.ant_secret_session = http.requests_retry_session()
            self.ant_secret_session.headers.update({'X-Internal-Token': sdk2.Vault.data('STOP_LEAK_ANT_INTERNAL_TOKEN')})

        with HecSender(sdk2.Vault.data('STOP_LEAK_HEC_TOKEN')) as log:
            with open(ant_out_path, 'rt') as results_fd:
                for line in results_fd:
                    line = line.strip()
                    if not line:
                        continue

                    result = json.loads(line)
                    path = result['path']
                    info = get_svn_info(path)
                    owners = get_owners(path, svn_root)
                    for value in result.get('secrets', []):
                        line_no = value.get('line_no', 0)
                        if line_no > 0 and 'blame' in info:
                            author = info['blame'][line_no - 1]
                        else:
                            author = info['author']

                        additional = value.get('additional', {})
                        sha1 = ''
                        if 'sha1' in additional:
                            sha1 = additional['sha1']
                            del additional['sha1']

                        # backward compatibility
                        if 'type' in value:
                            additional['secret_validator'] = value['type']

                        report = dict(
                            sha1=sha1,
                            url='{url}?rev={rev}#L{lo}'.format(url=info['url'], rev=info['revision'], lo=line_no),
                            private=False,
                            ignore=value.get('ignore', False),
                            vcs=VCS,
                            user=author,
                            state_id=0,
                            type=value.get('type', None),
                            secret=value.get('secret', None),
                            holders=','.join(owners),
                            additional=additional,
                            secret_validated=value.get('validated', False)
                        )
                        log.send(**report)

                        if mark_compromized:
                            secret = value.get('secret', None)
                            # TODO(buglloc): ugly!
                            if not secret:
                                secret = additional.get('ssh_fingerprint', None)
                            if not secret:
                                logging.info("can't find secret in ant-secret report")
                            else:
                                self.compromise(secret)


def get_svn_info(path):
    info = sdk2.svn.Arcadia.info(path)
    blame = sdk2.svn.Arcadia.blame(path).strip()
    result = {
        'url': prepare_url(info.get('url', '')),
        'author': info.get('author', 'unknown'),
        'revision': info.get('entry_revision', 0)
    }
    if blame:
        result['blame'] = map(lambda x: x.split()[1], blame.split('\n'))
    return result


def prepare_url(url):
    return url \
        .replace('arcadia:/arc/trunk/arcadia', 'https://a.yandex-team.ru/arc/trunk/arcadia')


def get_owners(path, svn_root):
    for yamake in find_yamake(path, svn_root):
        if not yamake:
            return []

        logins, groups = parse_owners(yamake)
        if not logins and not groups:
            # Skips ya.make w/o owners
            continue

        result = logins
        if not result:
            # We have only rb groups
            try:
                for group in groups:
                    result.extend(get_group_members(group))
            except Exception:
                pass
        return result
    return []


def get_group_members(group_name):
    res = requests.get('http://rb.yandex-team.ru/arc/api/groups/{}/users/'.format(group_name), timeout=2).json()
    if res.get('stat', 'bad') != 'ok':
        return []
    return [u['username'] for u in res.get('users', []) if u.get('username', '')]


def get_parser():
    if hasattr(get_parser, 'parser'):
        return get_parser.parser

    from pyparsing import Word, ZeroOrMore, Suppress, Optional, White, Group, Regex, alphas

    space = White().suppress()
    login = Regex(r'[\w\-]+')
    group_type = Word(alphas)

    owner_login = Group(login)('login')
    owner_group = Group(
        group_type('type') +
        Suppress(':') +
        login('login')
    )('group')

    get_parser.parser = (
        Suppress('OWNER(') +
        ZeroOrMore((owner_group | owner_login) + Optional(space)) +
        Suppress(')')
    )
    return get_parser.parser


def find_yamake(path, root):
    cur_dir = os.path.dirname(path)
    while True:
        if cur_dir == root:
            # Skips arcadia root
            yield None
            return

        file_list = os.listdir(cur_dir)
        parent_dir = os.path.dirname(cur_dir)
        if 'ya.make' in file_list:
            with open(os.path.join(cur_dir, 'ya.make'), 'rt') as f:
                yield f.read()

        if cur_dir == parent_dir or cur_dir == root:
            # Not found
            yield None
            return

        cur_dir = parent_dir


def parse_owners(yamake):
    logins = []
    groups = []
    for tokens, start, end in get_parser().scanString(yamake):
        for token in tokens:
            token_type = token.getName()
            if token_type == 'login':
                logins.append(str(token[0]))
            elif token_type == 'group':
                groups.append(str(token['login']))
            else:
                raise Exception('Unknown owner type: %s', token_type)
    return logins, groups
