# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

try:
    from yt_worker import YtWorker, yt
except ImportError:
    from .yt_worker import YtWorker, yt


class YtUsersActivityAnalysis(YtWorker):
    def __init__(self, token):

        super(YtUsersActivityAnalysis, self).__init__(token=token)

        # Create required folder (node)
        self.yt.create("map_node", self.analysis_user_activity,
                       recursive=True, ignore_existing=True)

        # Attributes for analysis
        self.command = ""
        self.threshold = 0
        # Set thresholds per command
        self.commands_with_thresholds = {
            "list": 10,
            "exists": 10,
            "erase": 10,
            "remove": 50,
            "move": 10,
            "insert_rows": 10,
            "read_table": 50,
            "write_table": 50,
            "copy": 30
        }

        # Dest table
        self.dest_table = self.analysis_user_activity + "/top_users_run_"
        # Store paths to all result tables in list
        self.dest_tables_paths = []

    # Mapper function
    @yt.aggregator
    def _mapper_for_analysis(self, records):
        count_per_cluster = dict()
        for record in records:
            user = record.get("user")
            command = record.get("command")
            done_status = record.get("done_status")
            cluster = record.get("cluster")

            if command == self.command and user is not None and done_status == "True" and user != "cron" \
                    and cluster is not None:
                if user not in count_per_cluster:
                    count_per_cluster[user] = {cluster: 1}
                else:
                    if cluster in count_per_cluster[user]:
                        count_per_cluster[user][cluster] += 1
                    else:
                        count_per_cluster[user][cluster] = 1

        for user, value in count_per_cluster.items():
            mapped_record = dict()
            mapped_record["user"] = user
            for cluster, count in value.items():
                mapped_record["cluster"] = cluster
                mapped_record["count"] = count
                yield mapped_record

    # Reduce function
    def _reduce_for_analysis(self, key, records):
        user, cluster = key["user"], key["cluster"]
        res_count = 0
        reduced_record = dict()
        reduced_record["user"] = user
        reduced_record["cluster"] = cluster
        for record in records:
            count = record.get("count")
            res_count += count

        if res_count > self.threshold:
            reduced_record["count"] = res_count
            yield reduced_record

    # Main functions
    def run_analysis(self):
        if self.yt.exists(self.enrich_table):
            for command, threshold in self.commands_with_thresholds.items():
                self.command = command
                self.threshold = threshold
                dest_table = self.dest_table + self.command
                self.dest_tables_paths.append(dest_table)
                self.yt.run_map(self._mapper_for_analysis,
                                self.enrich_table, dest_table)
                self.yt.run_sort(dest_table, sort_by=["user", "cluster"])
                self.yt.run_reduce(self._reduce_for_analysis, dest_table, dest_table, reduce_by=[
                                   "user", "cluster"])
                self.yt.run_sort(dest_table, sort_by="count")
            return True
        else:
            return False


def main():

    user_activity = YtUsersActivityAnalysis(token=None)
    user_activity.run_analysis()


if __name__ == "__main__":
    main()
