# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import re
import logging

try:
    from yt_worker import YtWorker, yt
except ImportError:
    from .yt_worker import YtWorker, yt


logger = logging.getLogger(__name__)


class YtRawMasterAnalysis(YtWorker):
    def __init__(self, token, clusters_for_analysis=None, source_table=None):

        super(YtRawMasterAnalysis, self).__init__(token=token, source_table=source_table,
                                                  source_path="//logs/yt-raw-master-log/1d/")

        logger.info("Creating node for raw_master ...")
        self.yt.create("map_node", self.raw_master_folder, recursive=True, ignore_existing=True)

        # Threshold count
        self.threshold = 10

        # Secondary variables
        self.request_id_list = list()

        # Clusters for analysis
        if clusters_for_analysis is None:
            self.clusters = ["banach", "hahn"]
        elif type(clusters_for_analysis) is list and len(clusters_for_analysis) > 0:
            self.clusters = clusters_for_analysis
        elif type(clusters_for_analysis) is bool and clusters_for_analysis:
            # Assign all clusters
            self.clusters = ["banach", "hahn", "freud",
                             "flux", "hume", "locke",
                             "markov", "perelman", "pythia",
                             "seneca-man", "seneca-myt", "seneca-sas",
                             "vanga", "zeno"]

        logger.info("Clusters for analysis: %s", self.clusters)
        self.dest_paths = list()

    @yt.aggregator
    def _map_auth_fails(self, records):
        mapped_record = dict()
        for record in records:
            log_message = record.get("log_message")
            cluster = record.get("cluster")
            time = record.get("iso_eventtime")
            log_category = record.get("log_category")

            user_pattern = re.search("\\n[\s\t]*user[\s\t]*([a-z0-9_-]{2,16})\\n", log_message)
            permission_pattern = re.search("\\n\s+permission\s+([a-z_\-0-9]+)\W", log_message)
            path_pattern = re.search("(/(/[^/@ ]*)+)/?", log_message)
            error_msg_pattern = re.search("Error: ([^:\\\\]+)", log_message)

            if user_pattern is not None and permission_pattern is not None and path_pattern is not None:
                user = user_pattern.group(1)
                access_type = permission_pattern.group(1)
                path = path_pattern.group(1)
                error_msg = error_msg_pattern.group(1)

                if "Access denied" in error_msg and cluster in self.clusters:
                    mapped_record.update({"user": user,
                                          "access_type": access_type,
                                          "path": path,
                                          "cluster": cluster,
                                          "time": time,
                                          "log_category": log_category,
                                          "error_msg": error_msg
                                          })
                    yield mapped_record

    def _reduce_auth_fails(self, key, records):
        user, cluster, access_type = key["user"], key["cluster"], key["access_type"]
        reduced_record = dict()

        count = 0
        for record in records:
            count += 1

        if count > self.threshold:
            reduced_record["user"] = user
            reduced_record["cluster"] = cluster
            reduced_record["access_type"] = access_type
            reduced_record["count"] = count
            yield reduced_record

    def run_user_auth_failed_analysis(self, clusters_for_analysis=None):
        if clusters_for_analysis is not None and isinstance(clusters_for_analysis, list) and len(
                clusters_for_analysis) > 0:
            self.clusters = clusters_for_analysis

        if self.yt.exists(self.source_table):
            dest_table_auth_failed = self.raw_master_folder + "/auth_failed"
            logger.info("Dest table: %s", dest_table_auth_failed)

            # Map
            logger.info("Running map for user auth failed ...")
            self.yt.run_map(self._map_auth_fails, self.source_table, dest_table_auth_failed)

            # Sort
            logger.info("Running sort by user, cluster and access_type for user auth failed ...")
            self.yt.run_sort(dest_table_auth_failed, sort_by=["user", "cluster", "access_type"])

            # Reduce
            logger.info("Running reduce by user,cluster and access_type for user auth failed ...")
            self.yt.run_reduce(self._reduce_auth_fails, dest_table_auth_failed, dest_table_auth_failed,
                               reduce_by=["user", "cluster", "access_type"])

            # Sort result table by count
            logger.info("Running sort by count for user auth failed result table...")
            self.yt.run_sort(dest_table_auth_failed, sort_by="count")

            logger.info("Appending path to dest_paths ...")
            self.dest_paths.append(dest_table_auth_failed)
            return True

        else:
            return False

    @yt.aggregator
    def _first_map_user_creation(self, records):
        mapped_record = dict()
        for record in records:
            log_message = record.get("log_message")

            user_pattern = re.search("User:[\s\t]*([a-z0-9_-]{2,16})", log_message)
            request_id_pattern = re.search("RequestId: ([^\s,;\\\\]+)", log_message)

            if "<-".encode('utf-8') in log_message and user_pattern is not None and request_id_pattern is not None:
                if "Type: User".encode("utf-8") in log_message:
                    request_id = request_id_pattern.group(1)

                    mapped_record.update({"request_id": request_id})
                    yield mapped_record

    @yt.aggregator
    def _second_map_user_creation(self, records):
        mapped_record = dict()
        for record in records:
            log_message = record.get("log_message")
            cluster = record.get("cluster")
            time = record.get("iso_eventtime")

            user_pattern = re.search("User:[\s\t]*([a-z0-9_-]{2,16})", log_message)
            request_id_pattern = re.search("RequestId: ([^\s,;\\\\]+)", log_message)
            error_msg_pattern = re.search("Error: ([^\n]+)", log_message)

            if request_id_pattern is not None:
                request_id = request_id_pattern.group(1)
                if request_id in self.request_id_list:
                    if "<-".encode('utf-8') in log_message or "->".encode('utf-8') in log_message:
                        if user_pattern is not None:
                            user_initiator = user_pattern.group(1)
                            mapped_record["user_initiatior"] = user_initiator
                        if error_msg_pattern is not None:
                            error_msg = error_msg_pattern.group(1)
                            mapped_record["error_msg"] = error_msg

                        mapped_record.update({"request_id": request_id,
                                              "cluster": cluster,
                                              "time": time,
                                              "log_message": log_message})

                        yield mapped_record

    @staticmethod
    def _reduce_user_creation(key, records):
        request_id = key["request_id"]
        reduced_record = dict()
        reduced_record.update({"request_id": request_id,
                               "log_message": list()})
        for record in records:
            log_message = record.get("log_message")
            cluster = record.get("cluster")
            time = record.get("iso_eventtime")
            user_initiator = record.get("user_initiator")
            error_msg = record.get("error_msg")

            if error_msg is not None:
                reduced_record["error_msg"] = error_msg

            if user_initiator is not None:
                reduced_record["user_initiator"] = user_initiator

            if log_message is not None:
                reduced_record["log_message"].append(log_message)

            reduced_record.update({"cluster": cluster,
                                   "time": time})

        yield reduced_record

    def _read_user_creation_request_id(self, path):
        rows = self.yt.read_table(path)
        request_id_list = [row["request_id"] for row in rows]
        return request_id_list

    def run_user_creation_analysis(self, clusters_for_analysis=None):
        if clusters_for_analysis is not None and isinstance(clusters_for_analysis, list) and len(
                clusters_for_analysis) > 0:
            self.clusters = clusters_for_analysis

        if self.yt.exists(self.source_table):
            dest_table = self.raw_master_folder + "/user_creation_log"
            logger.info("Dest table for user creation analysis: %s", dest_table)

            # First Map - find request_id for all event of required type
            logger.info("Running first map to find required request_id's ...")
            self.yt.run_map(self._first_map_user_creation, self.source_table, dest_table)

            # Read first step result, write request_id's to list
            logger.info("Reading first map result, writing requests_id's to variable")
            self.request_id_list = self._read_user_creation_request_id(dest_table)

            # Second Map - find all message for our event type by request_id
            logger.info("Running second map to find all message for our request_id's ...")
            self.yt.run_map(self._second_map_user_creation, self.source_table, dest_table)

            # Sort before reduce
            logger.info("Running sort before reduce by request_id ...")
            self.yt.run_sort(dest_table, sort_by="request_id")

            # Reduce results by request_id
            logger.info("Running reduce by request_id ...")
            self.yt.run_reduce(self._reduce_user_creation, dest_table, dest_table, reduce_by="request_id")

            logger.info("Appending path to dest_paths ...")
            self.dest_paths.append(dest_table)
            return True
        else:
            return False


def main():

    raw_master = YtRawMasterAnalysis(token=None)
    raw_master.run_user_auth_failed_analysis()

    # Beware! This analysis takes more than 12 hours.
    # raw_master.run_user_creation_analysis()


if __name__ == "__main__":
    main()
