import datetime
import sys
import logging
from sandbox import sdk2
from sandbox.projects.common import apihelpers
from sandbox.projects.logbroker.tasks.BuildLogbrokerCli import LOGBROKER_CLI_LINUX
from sandbox.projects.security.logfeller_quarantine.logbroker_resps_finder import LogborkerRespsFinder
from sandbox.projects.security.logfeller_quarantine.scheduler import Scheduler
from sandbox.projects.security.logfeller_quarantine.solomon_api import SolomonAlerts
from sandbox.projects.security.logfeller_quarantine.quarantine_maker import QuarantineMaker
from sandbox.projects.security.logfeller_quarantine.quarantine_yql import QuarantineYQLQuery
from sandbox.projects.security.logfeller_quarantine.utils import get_cur_timestamp, cached_property
from sandbox.projects.security.logfeller_quarantine.ya_resolve_client import YaResolveClient
from sandbox.projects.stop_leak.common.yahec import HecSender
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.channel import channel


"""
Wanna make next structure:

//home
  /infrasec
    /logfeller
      /streams
        /<yt_stream_name_1>
          /meta.json
          /<stream_time_1>
            /stream
            /stream.unparsed
            /stream.quarantine
          /<stream_time_2>
            /stream
            /stream.unparsed
            /stream.quarantine
        /<yt_stream_name_2>


File meta.json contains time of 1) last run and 2) last success finish.
If this file is not exists, then we assume that corresponding stream had not been analysed earlier.
To clear space, we will use Cypress TTL attribute on <stream_time> folders.
"""

"""
Sandbox flow:
0. Run task every X minuts. For example X=10
1. Get all evaluations
2.
  First of all, analyze evaluations without meta file.
  Secondly, analyze evaluation sorted by time
3. Send info about selected evaluation to splunk
4. Make quarantine
5. Get logbroker topic resps
6. Create table in yql. Get share url.
7. Send info to splunk about routine finish, send share_url (maybe some result_rows).
"""


# -------------------------
# --- CODE ----------------
# -------------------------


def send_report(report, hec_token):
    counter = 10
    while True:
        try:
            with HecSender(hec_token) as hec_sender:
                hec_sender.send(**report)
                print("[+] hec_sender send_report success.")
            return
        except Exception as e:
            print("[!] Exception occured: [{}]. Retry... {}".format(e, counter))
            if counter == 0:
                raise e
            else:
                counter -= 1


def clear_yt_cache(yt_token, remote_temp_files_directory, yt_cluster):
    import yt.wrapper as yt

    yt.config["token"] = yt_token
    yt.config["remote_temp_files_directory"] = remote_temp_files_directory
    yt.config.set_proxy(yt_cluster)

    # yt_new_cache_folder = remote_temp_files_directory + "/new_cache"
    yt_cache_folder = remote_temp_files_directory
    try:
        logging.info("[+] clear_yt_cache. yt_cache_folder: {}".format(yt_cache_folder))
        yt.remove(yt_cache_folder, recursive=True)
    except yt.YtHttpResponseError:
        logging.info("[+] clear_yt_cache. Error in removing: {}".format(yt_cache_folder))


class LogfellerSecretsQuarantine(sdk2.Task):

    class Requirements(sdk2.Task.Requirements):
        environments = [
            # List of required packages
            environments.PipEnvironment("requests"),
            environments.PipEnvironment("yandex-yt"),
            environments.PipEnvironment("yql"),
        ]

        cores = 1
        ram = 2048
        disk_space = 10 * 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        yt_account = sdk2.parameters.String("Use quota of specified yt account.", default="security")   # default="infrasec"
        reanalyze_period = sdk2.parameters.Integer("Period (in seconds) to wait before repeating reanalysis.", default=60 * 60 * 24 * 7 * 1)    # 1 week
        stream_ttl = sdk2.parameters.Integer("TTL time for created stream files (in seconds).", default=60 * 60 * 24 * 7 * 2)   # 2 weeks
        logfeller_stream_name = sdk2.parameters.String("Analyze specific stream in logfeller", default=None)
        logfeller_event_time = sdk2.parameters.String("Analyze specific time slot (ex. '2020-06-06T19:10:00Z')", default=None)
        logfeller_cluster = sdk2.parameters.String("Analyze specific cluster (ex. 'hahn')", default=None)
        logfeller_service = sdk2.parameters.String("Analyze specific service (ex. 'indexing')", default=None)
        evaluations_slicer_value = sdk2.parameters.Integer("Evaluations slicer value (dev only).")
        make_quarantine_resource_id = sdk2.parameters.Integer("Make quarantine resource id", default=1939158532)
        udf_lib_url = sdk2.parameters.String("Udf lib url", default="https://proxy.sandbox.yandex-team.ru/1488003205")
        udf_lib_name = sdk2.parameters.String("Udf lib name", default="ant_secret_udf.so")
        query_version = sdk2.parameters.Integer("YQL query version", default=1)

    @cached_property
    def logfeller_bins_folder(self):
        """
        OLD: https://sandbox.yandex-team.ru/task/621662890/resources
        OLD: https://proxy.sandbox.yandex-team.ru/1379773501/

        CUR: https://sandbox.yandex-team.ru/task/720113251/view
        CUR: https://proxy.sandbox.yandex-team.ru/1602303384
        """

        resource = sdk2.Resource[self.Parameters.make_quarantine_resource_id]
        resource_data = sdk2.ResourceData(resource)
        return str(resource_data.path) + "/logfeller/python/logfeller/bin/standalone/make_quarantine"

    @cached_property
    def simple_parser_bin_path(self):
        return self.logfeller_bins_folder + "/logfeller-simple-yt-parser"

    @cached_property
    def make_quarantine_bin_path(self):
        return self.logfeller_bins_folder + "/make-quarantine"

    @cached_property
    def logbroker_client(self):
        logbroker_resourse = apihelpers.get_last_resource(LOGBROKER_CLI_LINUX)
        logbroker_client_path = channel.task.sync_resource(logbroker_resourse)
        return logbroker_client_path

    def on_execute(self):
        import yt.wrapper as yt

        # load tokens
        SOLOMON_TOKEN = sdk2.Vault.data("SOLOMON_TOKEN")
        YT_TOKEN = sdk2.Vault.data("YT_TOKEN")
        LOGBROKER_TOKEN = sdk2.Vault.data("LOGBROKER_TOKEN")
        YQL_TOKEN = sdk2.Vault.data("procenkoeg", name="YQL_TOKEN")
        HEC_TOKEN = sdk2.Vault.data("ANT_SECRET_HEC_TOKEN")

        yt_account = self.Parameters.yt_account

        REMOTE_TEMP_FILES_DIRECTORY = "//home/{}/logfeller/yt_files_storage".format(yt_account)

        WHITELIST = [
            "sterh", "abash", "styskin", "tigran", "volozh"
        ]

        # show current python version
        logging.info("[+] Python major version: {}.{}.{}.".format(sys.version_info.major, sys.version_info.minor, sys.version_info.micro))

        # prepare yt
        yt.config["token"] = YT_TOKEN
        yt.config["remote_temp_files_directory"] = REMOTE_TEMP_FILES_DIRECTORY

        # SOLOMON PART
        sa = SolomonAlerts(token=SOLOMON_TOKEN)
        evaluations = sa.get_evaluations()

        if self.Parameters.evaluations_slicer_value:
            evaluations = evaluations[:self.Parameters.evaluations_slicer_value]

        if self.Parameters.logfeller_stream_name:
            evaluations = list(filter(lambda x: x["yt_stream_name"] == self.Parameters.logfeller_stream_name, evaluations))

        # SCHEDULER PART
        scheduler = Scheduler(yt_account, evaluations, reanalyze_period=self.Parameters.reanalyze_period)

        # Choose this run evaluation
        if (self.Parameters.logfeller_stream_name and self.Parameters.logfeller_event_time and self.Parameters.logfeller_cluster and self.Parameters.logfeller_service):
            service = self.Parameters.logfeller_service
            event_time = self.Parameters.logfeller_event_time
            stream_name = self.Parameters.logfeller_stream_name
            cluster = self.Parameters.logfeller_cluster
            create_quarantine_table_command = "./make-quarantine --service {} --cluster {} --stream-name {} "
            create_quarantine_table_command += "--event-time {} --output-tables-prefix //home/logfeller-dev/ovandriyanov/samples-indexing-{}"
            create_quarantine_table_command = create_quarantine_table_command.format(service, cluster, stream_name, event_time, event_time)
            evaluation = {
                "create_quarantine_table_command": create_quarantine_table_command,
                "yt_stream_name": create_quarantine_table_command.split(" ")[6],
                "yt_cluster": create_quarantine_table_command.split(" ")[4],
            }
        else:
            evaluation = scheduler.select_next_evaluation()

        if not evaluation:
            logging.info("[+] No evaluation to analyze found.")
            return

        logging.info("[+] Selected evaluation. cluster: {}.".format(evaluation["yt_cluster"]))
        logging.info("[+] Selected evaluation. stream_name: {}.".format(evaluation["yt_stream_name"]))
        logging.info("[+] Selected evaluation: {}.".format(evaluation))

        yt.config.set_proxy(evaluation["yt_cluster"])

        # Create streams folder if not exists
        logfeller_dir = "//home/{}/logfeller".format(yt_account)
        if not yt.exists(logfeller_dir):
            yt.file_commands.mkdir(logfeller_dir)
        streams_dir = logfeller_dir + "/streams"
        if not yt.exists(streams_dir):
            yt.file_commands.mkdir(streams_dir)

        # Create stream folder if not exists
        stream_folder = scheduler._build_stream_folder(evaluation)
        try:
            yt.file_commands.mkdir(stream_folder)
        except yt.YtHttpResponseError:
            logging.info("[+] Didnt create stream folder {}. Already exists. (Catched YtHttpResponseError).".format(stream_folder))

        # Set meta.json start_time
        quarantine_start_time = get_cur_timestamp()
        scheduler.set_meta_last_start(evaluation, quarantine_start_time)

        # QUARANTINE PART
        qm = QuarantineMaker(
            evaluation, scheduler._build_quarantine_prefix(evaluation),
            REMOTE_TEMP_FILES_DIRECTORY, yt_token=YT_TOKEN,
            make_quarantine_bin_path=self.make_quarantine_bin_path,
            logfeller_simple_yt_parser_bin_path=self.simple_parser_bin_path,
            overwrite_event_time=self.Parameters.logfeller_event_time
        )

        try:
            qm.create_quarantine_table()
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            logging.info("[+] qm.create_quarantine_table() have through and exception. {}.".format(e))

        quarantine_table = qm.get_quarantine_table()
        logging.info("[+] Quarantine table {} were created.".format(quarantine_table))
        qm.remove_non_quarantine_tables()
        logging.info("[+] Non quarantine tables were deleted.")

        # SET QUARANTINE FOLDER TTL
        expiration_time = get_cur_timestamp() + self.Parameters.stream_ttl  # (60 * 60 * 24 * 7 * 2)  # 2 weeks
        expiration_time = datetime.datetime.fromtimestamp(expiration_time)
        expiration_time = expiration_time.isoformat()
        path = scheduler._build_quarantine_folder(evaluation)
        yt.config.set_proxy(evaluation["yt_cluster"])
        yt.file_commands.set_attribute(path, "expiration_time", expiration_time)
        logging.info("[+] Quarantine fodler TTL was set.")

        # LOGBROKER PART
        lrf = LogborkerRespsFinder(
            YT_TOKEN, LOGBROKER_TOKEN, REMOTE_TEMP_FILES_DIRECTORY, logbroker_cli_path=self.logbroker_client, whitelist=WHITELIST
        )
        yt_stream_name = evaluation["yt_stream_name"]
        yt_cluster = evaluation["yt_cluster"]
        logging.info("[+] Logbroker get resps for yt_stream_name={}, yt_cluster={}.".format(yt_stream_name, yt_cluster))
        resps, resps_type, topic_path, orig_topic_path, xmask_enabled = lrf.find_stream_resps(yt_stream_name, yt_cluster=yt_cluster)

        if not resps and "@" in yt_stream_name:
            yt_stream_name = yt_stream_name.replace("@", "-")
            logging.info("[+] Logbroker get resps for yt_stream_name={}, yt_cluster={}.".format(yt_stream_name, yt_cluster))
            resps, resps_type, topic_path, orig_topic_path, xmask_enabled = lrf.find_stream_resps(yt_stream_name, yt_cluster=yt_cluster)
        logging.info("[+] Logbroker getting resps finished.")

        # YA RESOLVER PART
        logging.info("[+] YaResolveClient. resps: {}.".format(resps))
        yrc = YaResolveClient()
        ya_resolve_resps = yrc.resolve_many(resps)
        ya_resolve_resps = list(filter(lambda x: x not in WHITELIST, ya_resolve_resps))
        if not ya_resolve_resps:
            ya_resolve_resps = None
        logging.info("[+] YaResolveClient. ya_resolve_resps: {}.".format(ya_resolve_resps))

        # YQL PART
        # qq = QuarantineYQLQuery(YQL_TOKEN, yt_cluster=evaluation["yt_cluster"])
        yql_cache_folder = "//home/{}/logfeller/yql_cache".format(yt_account)
        udf_lib_url = self.Parameters.udf_lib_url
        udf_lib_name = self.Parameters.udf_lib_name
        query_version = self.Parameters.query_version
        qq = QuarantineYQLQuery(YQL_TOKEN, YT_TOKEN, yql_cache_folder, udf_lib_name, udf_lib_url, query_version)
        qq.run_query(yt_table=quarantine_table, yt_cluster=evaluation["yt_cluster"])
        share_url = qq.get_share_url()
        yql_result_count = len(qq.get_result_rows())
        qq.clear_cache(REMOTE_TEMP_FILES_DIRECTORY)
        # result_rows = qq.get_result_rows()
        logging.info("[+] YQL query done.")

        # Set meta.json finish_time
        quarantine_finish_time = get_cur_timestamp()
        scheduler.set_meta_last_finish(evaluation, quarantine_finish_time)
        logging.info("[+] Finish time was saved.")

        # RESULT
        report = {
            # splunk related
            # "index": "temp",
            "task_type": "logfeller_quarantine",

            # evaluation related
            "yt_cluster": evaluation.get("yt_cluster"),
            "yt_stream_name": evaluation.get("yt_stream_name"),
            "yt_log_type": evaluation.get("yt_log_type"),
            "solomon_alert_eval_time": evaluation.get("eval_timestamp"),
            "solomon_project_id": evaluation.get("project_id"),
            "solomon_alert_id": evaluation.get("alert_id"),
            "solomon_subalert_id": evaluation.get("subalert_id"),
            "quarantine_size": evaluation.get("quarantine_size"),
            "service": evaluation.get("service"),

            # other task related
            "yql_share_url": share_url,
            "yql_result_count": yql_result_count,
            "quarantine_table": quarantine_table,
            "sb_task_start_time": quarantine_start_time,
            "sb_task_finish_time": quarantine_finish_time,
            "logbroker_resps": resps,
            "logbroker_resps_type": resps_type,
            "logbroker_resps_topic": topic_path,
            "logbroker_topic": orig_topic_path,
            "logfeller_xmask_enabled": xmask_enabled,
            "ya_resolve_resps": ya_resolve_resps,
        }
        logging.info("[+] Result was prepared.")

        logging.info("[+] Sending report.")
        send_report(report, HEC_TOKEN)

        logging.info("[+] Clear yt new_cache folder.")
        clear_yt_cache(YT_TOKEN, REMOTE_TEMP_FILES_DIRECTORY, evaluation["yt_cluster"])
