#!/usr/bin/env python

# List of logbroker accounts:
#     https://logbroker.yandex-team.ru/logbroker/accounts?group=all

# Get OAuth token here:
#     https://oauth.yt.yandex.net/
# and put it to ~/.yt/token file.
# or use yt.config:
#     yt.config["token"] = token
# It is required for yt_wrapper and make-quarantine script

# prepare env var:
#     export YT_FILE_STORAGE=//home/<yt_account>/logfeller/yt_files_storage
# to cache file not in //tmp where robots dont have access to
# but in another location.

# prepare env var:
#     YT_PROXY=<cluster>.yt.yandex.net
# where <cluster>="hahn" or another cluster name
# example:
#     YT_PROXY=hahn.yt.yandex.net
# or:
#     yt.wrapper.config.set_proxy("hahn")


import json
import logging
import os
import re
import subprocess as sp


# YT_TOKEN = os.getenv("YT_TOKEN")
# LOGBROKER_TOKEN = os.getenv("LOGBROKER_TOKEN")

TEST_YT_STREAM = "hamster-images-blockstat-log"
TEST_LOGBROKER_CLI_PATH = "/home/procenkoeg/arcadia/logbroker/public/client/bin/logbroker"


class LogborkerRespsFinder(object):
    """
    Intresting for us files in folder "//home/logfeller/configs/logs" have format <something>_<cluster>_streams.json
    For example: "apphost_hahn_streams.json", "common_hahn_streams.json", ...
    These files contain dict of elements like next one:
    ```
        "disk-docviewer/ydisk-java-access-log": {
            "topic_path": "disk-docviewer/ydisk-java-access-log",
            "logbroker": "@stream_options:logbroker:common_arnold",
            "indexing_options": "@stream_options:indexing:schema_common",
            "parsing_options": {
                "chunk_splitter_name": "line-break",
                "parser_name": "disk-docviewer-ydisk-java-access-log-with-schema-parser",
                "formatter_name": "yamred_dsv"
            },
            "lifetimes": "@stream_options:lifetimes:arnold_common",
            "deploy_to_nirvactor": true
        }
    ```
    topic_path looks similar to stream_name, that we pass to find_stream_resps function. But every special symbol replaced by symbol "-".

    So, first of all we find all streams in folder "//home/logfeller/configs/logs" for specific cluster.
    Then we download every file with streams. And build pairs of "topic_path" and "steam_name".
    By input "steam_name" we determine "topic_path".
    Then "topic_path" is used in logbroker cli as argument to find details about corresponding topic.

    Topic details getting parsed by regex search.
    We determine 3 types of responsibles (in order of decreasing priority):
    1) Responsibles which are noted as responsibles in topic.
    2) Owner of topic.
    3) Users with permission "ModifyPermission".

    Example:
        ```
        yt_account = "security"
        REMOTE_TEMP_FILES_DIRECTORY = "//home/{}/logfeller/yt_files_storage".format(yt_account)
        lrf = LogborkerRespsFinder(YT_TOKEN, LOGBROKER_TOKEN, REMOTE_TEMP_FILES_DIRECTORY, logbroker_cli_path=TEST_LOGBROKER_CLI_PATH)
        resps = lrf.find_stream_resps(TEST_YT_STREAM, yt_cluster="hahn")
        ```
    """

    class RespsType:
        No = None
        Owner = "owner"
        Responsibles = "responsibles"
        ModifyPermissions = "modify_permissions"
        ABCService = "abc_service"

    def __init__(self, yt_token, logbroker_token, remote_temp_files_directory, logbroker_cli_path=TEST_LOGBROKER_CLI_PATH, whitelist=None):
        import yt.wrapper as yt

        self._logbroker_token = logbroker_token
        self._logbroker_cli_path = logbroker_cli_path
        self._remote_temp_files_directory = remote_temp_files_directory
        self._whitelist = whitelist or list()

        yt.config["token"] = yt_token
        yt.config["remote_temp_files_directory"] = remote_temp_files_directory

    @staticmethod
    def _prep_stream(stream):
        """
        Prepare account and stream_name by topic_paths.

        input:
            {u'indexing_options': u'@stream_options:indexing:schema_common',
             u'lifetimes': u'@stream_options:lifetimes:common',
             u'logbroker': u'@stream_options:logbroker:common_hahn',
             u'parsing_options': {u'chunk_splitter_name': u'line-break',
              u'formatter_name': u'native',
              u'parser_name': u'yc-vpc-config-plane-server-log'},
             u'topic_path': u'yandexcloud/vpc/pre-prod/api/yc-vpc-config-plane-server-log',
             u'use_yql_parser': False}

        output:
            {
                "stream_name": "yandexcloud-vpc-pre-prod-api-yc-vpc-config-plane-server-log",
                "topic_path": "yandexcloud/vpc/pre-prod/api/yc-vpc-config-plane-server-log",
            }
        """

        xmask_enabled = False
        indexing_options = stream.get("topic_path", "")
        if isinstance(indexing_options, dict):
            sensitive_data_scanning_policy = indexing_options.get("sensitive_data_scanning_policy")
            xmask_enabled = sensitive_data_scanning_policy == "xmask"
        else:
            xmask_enabled = indexing_options in [
                "@stream_options:indexing:schema_common_xmask",
                "@stream_options:indexing:schema_fast_xmask"
            ]

        return {
            # "account": stream["topic_path"].split("/", 1)[0],
            "topic_path": stream["topic_path"],
            "stream_name": stream["topic_path"].replace("/", "-"),
            "xmask_enabled": xmask_enabled,
        }

    def _download_one_stream(self, streams_file, yt_cluster="hahn"):
        import yt.wrapper as yt
        yt.config.set_proxy(yt_cluster)

        path = "//home/logfeller/configs/logs/" + streams_file

        resp_stream = yt.file_commands.read_file(path)
        res = resp_stream.read()
        json_data = json.loads(res)

        # prepare account and stream_name by topic_paths
        streams = map(self._prep_stream, json_data.values())

        return streams

    def download_all_streams(self, yt_cluster="hahn"):
        """
        List all streams in //home/logfeller/configs/logs Node.
        Read and parse all streams json files.
        """
        import yt.wrapper as yt
        yt.config.set_proxy(yt_cluster)

        logs_folder_path = "//home/logfeller/configs/logs"
        stream_postfix = "_{}_streams.json".format(yt_cluster)
        streams_files = filter(lambda x: x.endswith(stream_postfix), yt.list(logs_folder_path))

        streams = list()

        for streams_file in streams_files:
            streams += self._download_one_stream(streams_file, yt_cluster=yt_cluster)

        return streams

    # def download_common_streams(self, yt_cluster="hahn"):
    #     """
    #     Download streams config file in json format.
    #     """
    #     import yt.wrapper as yt

    #     # set cluster proxy
    #     yt.config.set_proxy(yt_cluster)
    #     # set config filename
    #     path = "//home/logfeller/configs/logs/common_{}_streams.json".format(yt_cluster)

    #     # download
    #     resp_stream = yt.file_commands.read_file(path)
    #     res = resp_stream.read()
    #     json_data = json.loads(res)

    #     # prepare account and stream_name by topic_paths
    #     streams = map(self._prep_stream, json_data.values())

    #     return streams

    def _filter_whitelisted_resps(self, resps):
        return list(filter(lambda x: x not in self._whitelist, resps)) if self._whitelist else resps

    @staticmethod
    def _parse_find_resps_with_modify_permission(output):
        """
        Parse output with regex. Find staff users with "ModifyPermissions" permission.
        """

        output_lines = output.split("\n")
        resps = list()

        for line in output_lines:
            m = re.match(r"^\s*([^@].*)@staff.*?ModifyPermissions.*$", line)
            if m:
                resps.append(m.group(1))

        resps = list(set(resps))

        return resps

    @staticmethod
    def _parse_find_owner(output):
        """
        Find topic owner
        Usually: `Owner: bla@staff`
        Sometimes: `Owner: <empty>`
        """

        owner = None
        m = re.search(r"Owner:\t([^@].*)@staff", output)
        if m:
            owner = m.group(1)
        return owner

    @staticmethod
    def _parse_find_responsibles(output):
        """
        Find topic respobsibles:
        ------------------------------------------------
        responsible  | adfox@, zcoder@_   | user_defined
        responsible  | r4start, k-petrov  | user_defined

        "m-messiah@,ivanov-d-s@,ignition@,tolmalev@,akinfold@, "

        """

        m = re.search(r"responsible[^|].*\|([^|].*)\|.*?\n", output)
        if not m:
            return None

        responsibles = m.group(1).split(",")
        responsibles = map(lambda x: x.strip(), responsibles)
        responsibles = filter(lambda x: x != "", responsibles)
        responsibles = filter(lambda x: x != "<empty>", responsibles)

        responsibles_temp = list()
        for resp in responsibles:
            at_sign_idx = resp.find("@")
            if at_sign_idx == -1:
                responsibles_temp.append(resp)
            else:
                responsibles_temp.append(resp[:at_sign_idx])
        responsibles = responsibles_temp

        responsibles = list(set(responsibles))
        return responsibles

    @staticmethod
    def _parse_find_abc_service(output):
        """
        Find assosiated ABC service:
        ------------------------------------------------
        ABC service   | <empty>   | user_defined
        ABC service   | stat2     | user_defined

        """

        m = re.search(r"ABC service[^|].*\|([^|].*)\|.*?\n", output)
        if not m:
            return None

        services = m.group(1).split(",")
        services = map(lambda x: x.strip(), services)
        services = filter(lambda x: x != "", services)
        services = filter(lambda x: x != "<empty>", services)

        # responsibles_temp = list()
        # for resp in responsibles:
        #     at_sign_idx = resp.find("@")
        #     if at_sign_idx == -1:
        #         responsibles_temp.append(resp)
        #     else:
        #         responsibles_temp.append(resp[:at_sign_idx])
        # responsibles = responsibles_temp

        # responsibles = list(set(responsibles))
        return services

    def _prepare_logbroker_env(self):
        """
        Reassign dir for temp files.
        Default //tmp not acceptable by robots and zombies.
        """

        env = os.environ.copy()
        env["LOGBROKER_TOKEN"] = self._logbroker_token
        return env

    def _run_logbroken_describe_on(self, topic_path):
        """
        Runs logbroker's describe command on topic/account
        """

        cmd = [self._logbroker_cli_path, "-s", "logbroker", "schema", "describe", topic_path]
        logging.info("[+] _run_logbroken_describe_on: " + str(cmd))
        output = sp.check_output(cmd, shell=False, env=self._prepare_logbroker_env())
        return output

    def _find_topic_resps_imp(self, topic_path):
        try:
            output = self._run_logbroken_describe_on(topic_path)
        except sp.CalledProcessError:
            logging.info("[+] Exception sp.CalledProcessError on topic: {}.".format(topic_path))
            return None, self.RespsType.No

        # user-defined responsibles
        resps = self._parse_find_responsibles(output)
        if resps:
            resps = self._filter_whitelisted_resps(resps)
            return resps, self.RespsType.Responsibles

        # topic owner
        resps = self._parse_find_owner(output)
        if resps:
            resps = [resps]
            resps = self._filter_whitelisted_resps(resps)
            return resps, self.RespsType.Owner

        # users with modify permission
        resps = self._parse_find_resps_with_modify_permission(output)
        if resps:
            resps = self._filter_whitelisted_resps(resps)
            return resps, self.RespsType.ModifyPermissions

        resp_services = self._parse_find_abc_service(output)
        if resp_services:
            return resp_services, self.RespsType.ABCService

        return None, self.RespsType.No

    @staticmethod
    def _soft_suitable_streams_check(suitable_streams):
        """
        Softly check if there is exectly 1 topic found corresponding specified stream_name.
        Return True on success.

        Return False otherwise and print warning.
        """

        # low probability events
        if len(suitable_streams) == 0:
            # no stream found? parsing error?
            print("[!] NOT FOUND ACCOUNT")
            return False

        elif len(suitable_streams) > 1:
            # multiple streams with the same parsed name found
            print("[!] MULTIPLE ACCOUNTS MATCHED")
            return False

        return True

    def _find_topic_resps(self, topic_path):
        """
        Find respsfor specified topic.
        If no resps were found, we go to parent topic (one slash back).
        For example input: "apphost/shared/prod/event-log". If no resps found we try "apphost/shared/prod/" and etc.
        """

        while topic_path:

            resps, resps_type = self._find_topic_resps_imp(topic_path)

            if resps:
                return resps, resps_type, topic_path

            else:
                parts = topic_path.rsplit("/", 1)
                if len(parts) == 2:
                    topic_path = parts[0]
                else:
                    topic_path = None

        return None, self.RespsType.No, None

    def find_stream_resps(self, yt_stream_name, yt_cluster="hahn", streams=None):
        """
        This method finds owner of specific topic which is corresponds to yt_stream_name.
        If there are no resps found for specified topic we try parent topic (one slash bask).

        Returns None when no resps found or on soft_suitable_streams_check error.
        Otherwise return pair of:
            1) list of owners (users, service, servicerole, department?)
            2) owning reason. (responsibles field, owner field, users with ModifyPermission).
        """

        if not streams:
            streams = self.download_all_streams(yt_cluster=yt_cluster)

        suitable_streams = list(filter(lambda s: s["stream_name"] == yt_stream_name, streams))

        if not self._soft_suitable_streams_check(suitable_streams):
            return None, None, None, None, None

        stream = suitable_streams[0]
        orig_topic_path = stream["topic_path"]
        resps, resps_type, topic_path = self._find_topic_resps(orig_topic_path)

        # fix names like "\tusername"
        if resps:
            resps = list(map(lambda x: x.strip(), resps))

        xmask_enabled = stream["xmask_enabled"]
        return resps, resps_type, topic_path, orig_topic_path, xmask_enabled
