# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
from datetime import timedelta
import logging

try:
    from yt_worker import YtWorker, yt
except ImportError:
    from .yt_worker import YtWorker, yt


logger = logging.getLogger(__name__)


class YtLogsEnrich(YtWorker):
    def __init__(self, token, source_table=None):
        super(YtLogsEnrich, self).__init__(token=token, source_table=source_table)
        # Set auto-remove timeout for enrich logs with analysis folders (current = 7days)
        self.enrich_log_ttl = (self.current_datetime + timedelta(days=7)).isoformat() + "+03:00"

        # Create subdir based on logs date
        self.yt.create("map_node", self.dest_path,
                       attributes={"expiration_time": self.enrich_log_ttl}, ignore_existing=True)

    # Mapper function for enrichment
    @yt.aggregator
    def _prepare_enrich_for_map(self, records):
        mapped_record = dict()
        for record in records:
            # Prepare required fields
            request_id = record.get("request_id")
            user = record.get("user")
            cluster = record.get("cluster")
            hostname = record.get("hostname")
            origin = record.get("origin")
            command = record.get("command")
            parameters = record.get("parameters")
            message = record.get("message")
            time = record.get("time")
            status_code = record.get("status_code")
            method = record.get("method")
            user_agent = record.get("user_agent")

            auth_status = None
            auth_cache_hit = None
            done_status = None
            # Additional enrichment for later analysis
            if message == "Client has been authenticated":
                auth_status = "True"
            if message == "Client has failed to authenticate":
                auth_status = "False"
            if message == "Done (success)":
                done_status = "True"
            if message == "Done (failure)":
                done_status = "False"
            if message == "Authentication cache hit":
                auth_cache_hit = "True"
            if message == "Authentication cache miss":
                auth_cache_hit = "False"

            if request_id is not None:
                mapped_record.update({"request_id": request_id,
                                      "user": user,
                                      "cluster": cluster,
                                      "hostname": hostname,
                                      "origin": origin,
                                      "command": command,
                                      "parameters": parameters,
                                      "message": message,
                                      "time": time,
                                      "status_code": status_code,
                                      "method": method,
                                      "user_agent": user_agent,
                                      "auth_status": auth_status,
                                      "done_status": done_status,
                                      "auth_cache_hit": auth_cache_hit
                                      })
                yield mapped_record

    # Reduce function for enrichment
    @staticmethod
    def _prepare_enrich_for_reduce(key, records):
        request_id = key["request_id"]
        reduced_record = dict()
        reduced_record["request_id"] = request_id
        reduced_record["message"] = list()

        for record in records:
            # Prepare required fields
            user = record.get("user")
            cluster = record.get("cluster")
            hostname = record.get("hostname")
            origin = record.get("origin")
            command = record.get("command")
            parameters = record.get("parameters")
            message = record.get("message")
            time = record.get("time")
            status_code = record.get("status_code")
            method = record.get("method")
            user_agent = record.get("user_agent")
            auth_status = record.get("auth_status")
            auth_cache_hit = record.get("auth_cache_hit")
            done_status = record.get("done_status")

            if user is not None:
                reduced_record["user"] = user
            if cluster is not None:
                reduced_record["cluster"] = cluster
            if hostname is not None:
                reduced_record["hostname"] = hostname
            if origin is not None:
                reduced_record["origin"] = origin
            if command is not None:
                reduced_record["command"] = command
            if parameters is not None:
                reduced_record["parameters"] = parameters

            if message is not None:
                reduced_record["message"].append(message)
            if time is not None:
                reduced_record["time"] = time
            if status_code is not None:
                reduced_record["status_code"] = status_code
            if method is not None:
                reduced_record["method"] = method
            if user_agent is not None:
                reduced_record["user_agent"] = user_agent

            if auth_status is not None:
                reduced_record["auth_status"] = auth_status
            if auth_cache_hit is not None:
                reduced_record["auth_cache_hit"] = auth_cache_hit
            if done_status is not None:
                reduced_record["done_status"] = done_status

        yield reduced_record

    @staticmethod
    def schema_mapper(records):
        records["message"] = str(records["message"])
        yield records

    # Enrich function
    def enrich_yt_proxy_log(self):
        import yt.yson as yson
        if self.yt.exists(self.source_table):
            # Map -> Sort -> Reduce (with auto-merge)
            logger.info("Running map ...")
            self.yt.run_map(self._prepare_enrich_for_map, self.source_table, self.enrich_table)
            logger.info("Running sort ...")
            self.yt.run_sort(self.enrich_table, sort_by="request_id")
            logger.info("Running reduce ...")
            self.yt.run_reduce(self._prepare_enrich_for_reduce, self.enrich_table, self.enrich_table,
                               reduce_by="request_id", spec=self.yt_spec)

            # Prepare schema
            schema = [
                {"name": "request_id", "type": "string"},
                {"name": "user", "type": "string"},
                {"name": "cluster", "type": "string"},
                {"name": "hostname", "type": "string"},
                {"name": "origin", "type": "string"},
                {"name": "command", "type": "string"},
                {"name": "parameters", "type": "string"},
                {"name": "message", "type": "string"},
                {"name": "time", "type": "string"},
                {"name": "status_code", "type": "int64"},
                {"name": "method", "type": "string"},
                {"name": "user_agent", "type": "string"},
                {"name": "auth_status", "type": "string"},
                {"name": "auth_cache_hit", "type": "string"},
                {"name": "done_status", "type": "string"}]

            schema = yson.YsonList(schema)
            schema.attributes["schema"] = False

            dest_table = "<schema={0}>" + self.enrich_table
            logger.info("Running map for schematizing result enrich table ...")
            self.yt.run_map(self.schema_mapper, self.enrich_table,
                            dest_table.format(self.yt.yson.dumps(schema)))

            return True
        else:
            return False


def main():
    enrich_yt_logs = YtLogsEnrich("token_here")
    enrich_yt_logs.enrich_yt_proxy_log()


if __name__ == "__main__":
    main()
