# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals
import logging
import re

try:
    from yt_worker import YtWorker, yt
except ImportError:
    from .yt_worker import YtWorker, yt


logger = logging.getLogger(__name__)


class YtTokensExposedInParams(YtWorker):
    def __init__(self, token):

        super(YtTokensExposedInParams, self).__init__(token=token)

        # Dest table
        self.dest_table = "/".join([self.analysis_folder, "tokens_exposed"])
        self.dest_reduced_table = self.dest_table + "_reduced"
        # Define secret command keys (patterns)
        self.secrets = ["--token", "--yql-token", "--yt-token"]
        self.pattern_type = "(\-\-(yt\-|yql\-)?token)(?!(\-|\_)?(path|file))"
        self.pattern_token = "(\")?([^\"\\\\]+)"

    # Map enriched table
    @yt.aggregator
    def _map_enriched_table(self, records):
        mapped_record = dict()

        for record in records:
            # Preapre required fields
            user = record.get("user")
            parameters = record.get("parameters")
            command = record.get("command")
            request_id = record.get("request_id")
            cluster = record.get("cluster")

            try:
                params = parameters.split(";")
            except AttributeError:
                continue

            for index, value in enumerate(params):
                if any(secret in value for secret in self.secrets):
                    if "=" not in value:
                        token = params[index + 1]
                        token_type = value
                    else:
                        exposed_string = value.split("=")
                        token_type = exposed_string[0]
                        token = exposed_string[1]

                    match_type = re.search(self.pattern_type, token_type)
                    match_token = re.search(self.pattern_token, token)

                    if match_type is not None and match_token is not None:
                        token_type = match_type.group(1)
                        token = match_token.group(2)

                        if "/" not in token and len(token) > 20:
                            mapped_record.update({"user": user,
                                                  "type": token_type,
                                                  "token": token,
                                                  "command": command,
                                                  "request_id": request_id,
                                                  "cluster": cluster})
                            yield mapped_record

    @staticmethod
    def _reduce_results(key, records):
        user = key["user"]
        token = key["token"]
        reduced_record = dict()
        reduced_record["type"] = list()

        for record in records:
            # Prepare fields
            token_type = record.get("type")

            if token_type not in reduced_record["type"]:
                reduced_record["type"].append(token_type)

            reduced_record["user"] = user
            reduced_record["token"] = token

        yield reduced_record

    def run_analysis(self):
        if self.yt.exists(self.enrich_table):
            # Map
            self.yt.run_map(self._map_enriched_table,
                            self.enrich_table, self.dest_table)

            # Sort
            self.yt.run_sort(self.dest_table, sort_by=["user", "token"])

            # Reduce
            self.yt.run_reduce(self._reduce_results, self.dest_table, self.dest_reduced_table,
                               reduce_by=["user", "token"])

            return self.dest_reduced_table
        else:
            logger.error("Enrich table not exists!")
            return None


def main():

    analyze = YtTokensExposedInParams(token="token")
    analyze.run_analysis()


if __name__ == "__main__":
    main()
