# -*- coding: utf-8 -*-
from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.sdk2.helpers import subprocess
import logging
import os
import datetime
import json
import uuid
import requests


class YtSecretsSearchVertis(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        environments = (
            environments.PipEnvironment("yandex-yt"),
            environments.PipEnvironment("startrek_client", use_wheel=True),
            environments.PipEnvironment("yandex-yt-yson-bindings-skynet"),
        )

    class Parameters(sdk2.Task.Parameters):
        cluster = sdk2.parameters.String('Cluster', default='hahn')
        output_table = sdk2.parameters.String('Table for results', default='//tmp/{}'.format(str(uuid.uuid4())))
        target_folder = sdk2.parameters.String('Folder or table to scan',
                                               default='//home/logfeller/logs/vertis-backend-log')
        sampling_rate = sdk2.parameters.String('Sampling rate', default=1.0)

    def on_execute(self):
        import subprocess
        import yt.wrapper as yt
        from yt.wrapper import file_commands
        CLUSTER = self.Parameters.cluster
        TMP_TABLE = self.Parameters.output_table
        TARGET_FOLDER = self.Parameters.target_folder
        SAMPLING_RATE = self.Parameters.sampling_rate
        YT_TOKEN = sdk2.Vault.data(self.owner, 'YT_TOKEN')
        ST_TOKEN = sdk2.Vault.data(self.owner, 'ST_OAUTH_TOKEN')
        SCAN_DELTA = 1
        table_scheme = [
            {'name': 'id', 'type': 'string'},
            {'name': 'table_name', 'type': 'string'},
            {'name': 'table_owner', 'type': 'string'},
            {'name': 'profile', 'type': 'string'},
            {'name': 'secret', 'type': 'string'},
            {'name': 'entropy', 'type': 'double'},
            {'name': 'column', 'type': 'string'},
            {'name': 'content', 'type': 'string'},
            {'name': 'reported', 'type': 'boolean'},
            {'name': 'misc', 'type': 'string'},
        ]

        def clean_tmp_table():
            try:
                yt.remove(path=TMP_TABLE)
            except:
                pass
            yt.create("table", TMP_TABLE, attributes={"schema": table_scheme})

        def find_owner_by_servicename(service_name):
            return "naumov-al"

        def create_yql_sample(table_name, service_name, context, container_id):
            yql_template = \
                "PRAGMA yt.InferSchema = '1';\r\n"\
                "SELECT * \r\n"\
                "FROM hahn.`{table_name}`\r\n"\
                "WHERE \r\n"\
                "`_service` == '{service_name}'"\
                "AND `_context` == '{context}'" \
                "AND `_container_id` == '{container_id}'" \
                "LIMIT 1000;\r\n".format(table_name=table_name, service_name=service_name,
                                                context=context, container_id=container_id)
            return yql_template

        def create_issue(service_name, log_record, yt_table_name):
            import startrek_client
            token = ST_TOKEN
            secret_type = log_record["profile"]
            if "CONVERT_ERROR" in log_record["secret"]:
                return
            assignee = find_owner_by_servicename(service_name)
            try:
                context = log_record["misc"]["_context"]
            except KeyError:
                context = ""
            try:
                container_id = log_record["misc"]["_container_id"]
            except KeyError:
                container_id = ""
            template = "В логах сервиса %%{service_name}%% обнаружены критичные данные типа %%{secret_type}%%.\r\n" \
                       "Лог скана в YT должен быть доступен в %%{yttable_link}%%\r\n" \
                       "Исполнителем тикета, вероятно, должен быть %%{assignee}%%\r\n" \
                       "Пример YQL-запроса для поиска задетекченных логов:\r\n%%{yql_sample}%%\r\n"\
                       "Данные сервиса: %%{container_info}%%".format(service_name=service_name,
                                                                     secret_type=secret_type,
                                                                     yttable_link=yt_table_name,
                                                                     assignee=assignee,
                                                                     yql_sample=create_yql_sample(yt_table_name,
                                                                        service_name, context, container_id),
                                                                        container_info=log_record["misc"],
                                                                     )
            st = startrek_client.Startrek(token=token, useragent='sandbox-task')
            ticket = st.issues.create(
                queue='VSSEC',
                summary='Чувствительные данные в логах {service_name}'.format(service_name=service_name),
                description=template,
                type='task',
                followers=[
                    'naumov-al'
                ]
            )

        def run_check(table, mapper_path):
            # clean_tmp_table()
            sampling_params = {"sampling_rate": float(SAMPLING_RATE)}
            sampling_spec = {'table_reader': sampling_params}
            builder = yt.spec_builders.MapSpecBuilder() \
                .job_io(sampling_spec) \
                .input_table_paths('{}'.format(table)) \
                .output_table_paths(TMP_TABLE) \
                .begin_mapper() \
                .add_file_path(file_commands.LocalFile(mapper_path)) \
                .command('python {0} {1}'.format(mapper_path.rsplit('/')[-1], table)) \
                .format('json') \
                .end_mapper()
            yt.run_operation(builder)

        logging.info("Starting work for cluster " + CLUSTER)
        yt.config["token"] = YT_TOKEN
        yt.config.config['proxy']['url'] = CLUSTER

        # tables_cache = []

        TABLE_TIME_EARLIEST = (datetime.datetime.now() - datetime.timedelta(SCAN_DELTA)).strftime(
            "%Y-%m-%dT00:00:00.0Z")
        TABLE_TIME_LATEST = datetime.datetime.now().strftime("%Y-%m-%dT00:00:00.0Z")
        tables_list = yt.search(TARGET_FOLDER, node_type=['table'],
                                object_filter=lambda obj: TABLE_TIME_EARLIEST < obj.attributes.get(
                                    "creation_time") < TABLE_TIME_LATEST,
                                attributes=["account", "owner", "modification_time", "type", "creation_time"])
        mapper_path = os.path.join(
            os.path.abspath(os.path.dirname(__file__)),
            "mapper.py"
        )
        clean_tmp_table()
        for table in tables_list:
            logging.info("Processing " + table)
            run_check(table, mapper_path)
        known_secrets = dict()
        rows = yt.read_table(TMP_TABLE, format=yt.JsonFormat(attributes={"encode_utf8": False}))
        rows_count = 0
        for row in rows:
            misc_dict = json.loads(row["misc"])
            service_name = misc_dict["_service"]
            src_table = row["table_name"]
            key_ = "{secret_type}:{service_name}".format(secret_type=row["profile"], service_name=service_name)
            rows_count += 1
            if key_ not in known_secrets:
                known_secrets[key_] = True
                log_record = row
                log_record["misc"] = misc_dict
                create_issue(service_name, log_record, src_table)
        logging.info("unique secrets {unique_secrets_count} from {rows_total_count}".format(
            unique_secrets_count=len(known_secrets.keys()), rows_total_count=rows_count))
        logging.info(known_secrets.keys())
        logging.info("result table {}".format(TMP_TABLE))
        logging.info("Saving output resource")
        output = dict()
        output["out_table"] = TMP_TABLE
        output["known_secrets"] = known_secrets.keys()
        output_path = "out.json"
        with open(output_path, 'w') as f:
            f.write(json.dumps(output))
        return
