"""
    RUNTIMECLOUD-5789

    This program:
    1. Reads date as input.
    2. Goes to YT API and launches MapReduce job there. Job selects
       Nanny configurations stages changes times.
    3. Calculates percentiles and histograms for stages.
    4. Uploads them to stat.yandex-team.ru.

    You need STAT_OAUTH_TOKEN and YT_TOKEN in env.
"""
# from __future__ import unicode_literals
from __future__ import print_function

import os
import json
import hashlib
import datetime
import argparse

from collections import namedtuple, defaultdict

import yt.wrapper as yt


YT_CLUSTER = "hahn.yt.yandex.net"
STAT_BETA_FQDN = "upload.stat-beta.yandex-team.ru"
STAT_FQDN = "upload.stat.yandex-team.ru"
STAT_OAUTH_TOKEN = None
STAT_OAUTH_TOKEN_BELONGS_TO_USER = "nanny-robot"
STAT_REPORT = {
    "name": "Yandex/Infra/ISS/NannyDeployTimings",
    "title": "Nanny user services deploy timings",
    "scale": "d",
    "report_config": {
        "dimensions": [
            {"fielddate": "date"},
            {"data": "tree"}
        ],
        "measures": [
            {"value": "number"},
        ],
        "graphs": [
            {"fields": "value"},
        ]
    }
}

TRANSITIONS = {
    None: ["GENERATING"],
    "GENERATING": ["PREPARING", "ACTIVATING"],
    "PREPARING": ["ACTIVATING"],
    "ACTIVATING": ["ACTIVE"],
}
YT_MAPPER_TARGET_STATES = ["CREATED", "PREPARED", "ACTIVE"]
DATE_TICK_FORMAT = "%d%b-%H:%M"
TMP_DIR = "/tmp/"

StateChange = namedtuple("StateChange", ["unixtime", "current_state"])



def parse_date(date_as_string):
    """ Returns date object from string. """

    return datetime.datetime.strptime(date_as_string, "%Y-%m-%d").date()


def mapper(target_states):
    """ YT mapper. """

    def filter_events(row):
        """ YT mapper filter. """

        if row.get("target_state") in target_states:
            yield {"unixtime": row["unixtime"],
                   "current_state": row["current_state"],
                   "snapshot_id": row["snapshot_id"],
                   "service_id": row["service_id"],
                   "target_state": row.get("target_state")}
    return filter_events


def reducer(key, rows):
    """ YT reducer. """

    objs = sorted(StateChange(int(r["unixtime"]), r["current_state"]) for r in rows)
    result = {"snapshot_id": key["snapshot_id"],
              "service_id": key["service_id"],
              "unixtime": objs[0].unixtime}
    last_state = None
    last_time = None
    for row in objs:
        if row.current_state == "ACTIVE" and last_state == "ACTIVATING":
            result["ACTIVATING"] = row.unixtime - last_time
        if row.current_state in ["ACTIVATING", "PREPARED"] and last_state == "PREPARING":
            result["PREPARING"] = row.unixtime - last_time
        # pylint: disable=line-too-long
        if row.current_state in ["ACTIVATING", "PREPARING", "CREATED"] and last_state == "GENERATING":
        # pylint: enable=line-too-long
            result["GENERATING"] = row.unixtime - last_time
        last_time = row.unixtime
        last_state = row.current_state
    yield result


def run_yt_jobs(date, cluster, use_skynet_python):
    """ Runs YT MapReduce job. """

    yt.config.set_proxy(cluster)

    # https://wiki.yandex-team.ru/yt/userdoc/pythonwrapper/#primeryfiltraciimodulejj
    # Fixes error "AttributeError: "module" object has no attribute "openssl_md_meth_names""
    # in YT job.
    # pylint: disable=line-too-long
    yt.config["pickling"]["module_filter"] = lambda module: "hashlib" not in getattr(module, "__name__", "")
    # pylint: enable=line-too-long

    # https://clubs.at.yandex-team.ru/yt/1851
    if use_skynet_python:
        yt.config["pickling"]["python_binary"] = "/skynet/python/bin/python"

    day_table = "//statbox/nanny-services-event-log/{}".format(date)
    with yt.TempTable(prefix="nanny-event-log-processor-") as temp_table:
        yt.run_map_reduce(mapper=mapper(YT_MAPPER_TARGET_STATES),
                          reducer=reducer,
                          source_table=day_table,
                          destination_table=temp_table,
                          reduce_by=["service_id", "snapshot_id"])
        return yt.read_table(temp_table, format=yt.JsonFormat(), raw=False)


def get_from_cache(file_name):
    """ Read saved YT job result from local file cache. """

    file_name = os.path.join(TMP_DIR, file_name)
    if os.path.isfile(file_name):
        with open(file_name) as file_descriptor:
            return json.load(file_descriptor)
    return None


def get_cache_name(date, cluster):
    """ Calculate cache file name from date and cluster. """

    name = "".join([str(date), cluster])
    return "nanny-event-log-{}.json".format(hashlib.md5(name).hexdigest())


def save_to_cache(file_name, content):
    """ Save YT job result to file cache. """

    save_to_file(os.path.join(TMP_DIR, file_name), content)


def save_to_file(file_name, content):
    """ Dump dictionary to file as JSON. """

    with open(file_name, "w") as file_descriptor:
        json.dump(content, file_descriptor)


def yield_result_values_percentiles(prefixes_and_data, percentiles_names):
    """ Yields percentiles in key/value form. """

    for prefix, percentiles_data in prefixes_and_data.iteritems():

        path_name = "\t{}\t{}\t{}".format("percentiles",
                                          prefix,
                                          "p")
        for i, percentile_name in enumerate(percentiles_names):
            key = "{path_name}{percentile_name}\t".format(path_name=path_name,
                                                          percentile_name=percentile_name)
            value = int(percentiles_data[i])
            yield key, value


def yield_result_values_histogram(prefixes_and_data):
    """ Yields histograms in key/value form. """

    for prefix, histogram_data in prefixes_and_data.iteritems():
        path_name = "\t{}\t{}\t{}".format("histogram",
                                          prefix,
                                          "h")

        # https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.histogram.html
        # Histogram data: [0] - values, [1] - buckets. len([0]) == len([1]) - 1.
        for i, value in enumerate(histogram_data[0]):
            key = "{path_name}{left_bin}-{right_bin}\t".format(path_name=path_name,
                                                               left_bin=histogram_data[1][i],
                                                               right_bin=histogram_data[1][i+1])
            value = int(value)
            yield key, value



def upload_report_to_stat(stat_fqdn, stat_oauth_token, data):
    """ Creates/uploads report to stat. """

    # upload_to_stat(STAT_BETA_FQDN, STAT_OAUTH_TOKEN, report_json)
    # Importing requests here because there is no numpy in YT runtime environment.
    import requests
    # nanny-robot

    # Will be returned by this function
    upload_result = {
        "if_success": None,
        "message": None
    }

    request_headers = {
        "Authorization": "OAuth {}".format(stat_oauth_token)
    }
    who_am_i_url = "https://{}/_v3/whoami/".format(stat_fqdn)
    report_config_url = "https://{}/_api/report/config".format(stat_fqdn)
    report_data_url = "https://{}/_api/report/data".format(stat_fqdn)

    # Check who am I
    resp = requests.get(who_am_i_url, headers=request_headers)
    resp_json = json.loads(resp.text)

    # Check if OAuth token provided belongs to correct user
    if resp_json["username"] != STAT_OAUTH_TOKEN_BELONGS_TO_USER:
        upload_result["if_success"] = False
        upload_result["message"] = "OAuth token does not belong to user {}, won't try to \
upload.".format(STAT_OAUTH_TOKEN_BELONGS_TO_USER)
        return upload_result

    # Upload report config
    resp = requests.post(
        report_config_url,
        headers=request_headers,
        data={
            "json_config": json.dumps(
                {
                    "user_config": STAT_REPORT["report_config"],
                    "title": STAT_REPORT["title"]
                }
            ),
            "name": STAT_REPORT["name"],
            "scale": STAT_REPORT["scale"]
        }
    )
    if not resp.ok:
        upload_result["if_success"] = False
        upload_result["message"] = resp.text
        return upload_result

    # Upload data
    resp = requests.post(
        report_data_url,
        headers=request_headers,
        data={
            "json_data": json.dumps(data),
            "name": STAT_REPORT["name"],
            "scale": STAT_REPORT["scale"]
        }
    )

    # Form function result
    if not resp.ok:
        upload_result["if_success"] = False
    else:
        upload_result["if_success"] = True
    upload_result["message"] = resp.text

    return upload_result



def main(date,
         yt_cluster,
         try_cache,
         upload_to_stat_beta,
         upload_to_stat,
         stat_oauth_token,
         use_skynet_python):
    """ Main logic. """

    if not stat_oauth_token:
        stat_oauth_token = os.environ["STAT_OAUTH_TOKEN"]

    # Check if OAuth token for stat.yandex-team.ru provided by user
    if upload_to_stat_beta or upload_to_stat:
        if not stat_oauth_token:
            print("ERROR: No OAuth token provided. Token must belong \
to user {}".format(STAT_OAUTH_TOKEN_BELONGS_TO_USER))
            exit(1)

    result = None
    cache_name = get_cache_name(date, yt_cluster)

    # print("Cache filename", cache_name)

    if try_cache:
        result = get_from_cache(cache_name)

    if not result:
        print("Running YT job...")
        result = run_yt_jobs(date, yt_cluster, use_skynet_python)
        result = sorted(result, key=lambda x: x["unixtime"])
        save_to_cache(cache_name, result)

    # Here we have in result
    # {
    #   "GENERATING": 20,
    #   "unixtime": 1522185648,
    #   "PREPARING": 34,
    #   "snapshot_id": "9e14b70975a171b78e3b0397980ed10a72bd36d7",
    #   "service_id": "testing_market_mbo_skubd_iva",
    #   "ACTIVATING": 31
    # },


    generating = defaultdict(dict)
    preparing = defaultdict(dict)
    activating = defaultdict(dict)
    # "deploying" is a sum of all stages timings.
    deploying = defaultdict(dict)

    # Parse YT result in structures for
    # generating, preparing, activating, deploying
    # like:
    #
    # {
    #   "testing_market_mbo_skubd_iva":
    #   {
    #       datetime.datetime(2018, 3, 29, 23, 10, 59): 16,
    #       datetime.datetime(2018, 3, 28, 2, 12, 26): 20,
    #       ...
    #   }
    # }
    #
    # We might want to have Nanny services names here if script will be made more advanced.
    for row in result:
        start_time = datetime.datetime.fromtimestamp(row["unixtime"])
        srv_id = row["service_id"]

        generating_duration = row.get("GENERATING")
        if generating_duration:
            generating[srv_id][start_time] = generating_duration

        preparing_duration = row.get("PREPARING")
        if preparing_duration:
            preparing[srv_id][start_time] = preparing_duration

        activating_duration = row.get("ACTIVATING")
        if activating_duration:
            activating[srv_id][start_time] = activating_duration

        # Calculated only if all stages present in result structure.
        if generating_duration and preparing_duration and activating_duration:
            deploying[srv_id][start_time] = generating_duration + \
                                            preparing_duration + \
                                            activating_duration


    # Importing numpy here because there is no numpy in YT runtime environment.
    import numpy


    percentiles_names = [50, 75, 95, 99, 100]

    # Histogram bins.
    bins = [0, 60, 120, 180, 240, 300, 600, 900, 9999999] # seconds


    # Create lists with only timings values for numpy functions
    generating_times = []
    for service in generating:
        for value in generating[service].values():
            generating_times.append(value)

    preparing_times = []
    for service in preparing:
        for value in preparing[service].values():
            preparing_times.append(value)

    activating_times = []
    for service in activating:
        for value in activating[service].values():
            activating_times.append(value)

    deploying_times = []
    for service in deploying:
        for value in deploying[service].values():
            deploying_times.append(value)


    # Count percentiles
    percentiles_prefixes_and_data = {
        "generating": numpy.percentile(generating_times, percentiles_names),
        "preparing": numpy.percentile(preparing_times, percentiles_names),
        "activating": numpy.percentile(activating_times, percentiles_names),
        "deploying": numpy.percentile(deploying_times, percentiles_names)
    }

    # Count histograms
    histogram_prefixes_and_data = {
        "generating": numpy.histogram(generating_times, bins=bins),
        "preparing": numpy.histogram(preparing_times, bins=bins),
        "activating": numpy.histogram(activating_times, bins=bins),
        "deploying": numpy.histogram(deploying_times, bins=bins)
    }

    # Result example
    # https://wiki.yandex-team.ru/statbox/Statface/externalreports/#zagruzkadannyx
    # https://wiki.yandex-team.ru/users/feriat/howtostat/
    #
    # My result is a nested structure like:
    # {
    #   "values": [
    #     {
    #         "fielddate": "2018-03-29",
    #         "data": "\thistogram\tgenerating_h\tgenerating_h60-120\t",
    #         "value": 42
    #     },
    #     {
    #         "fielddate": "2018-03-29",
    #         "data": "\tpercentile\tpreparing_p\tpreparing_p75\t",
    #         "value": 24
    #     }
    #    ]
    # }

    report_json = {}
    report_values = []
    report_value = {}

    # "date" is an object like "datetime.date(2018, 3, 29)", convert it to string
    report_value["fielddate"] = str(date)

    # Add percentiles names and values to result
    for key, value in yield_result_values_percentiles(percentiles_prefixes_and_data,
                                                      percentiles_names):
        value_item = {}
        value_item["fielddate"] = str(date)
        value_item["data"] = key
        value_item["value"] = value
        report_values.append(value_item)

    # Add histogram names and values to result
    for key, value in yield_result_values_histogram(histogram_prefixes_and_data):
        value_item = {}
        value_item["fielddate"] = str(date)
        value_item["data"] = key
        value_item["value"] = value
        report_values.append(value_item)

    # Form result
    report_json["values"] = report_values

    # print(json.dumps(report_json))

    # Upload result to stat API
    exit_code = 0
    if upload_to_stat_beta:
        stat_beta_response = upload_report_to_stat(STAT_BETA_FQDN, stat_oauth_token, report_json)
        print("Uploading to {} result: if_success={}, message={}".format(
            STAT_BETA_FQDN,
            stat_beta_response["if_success"],
            stat_beta_response["message"].encode("utf-8")))
        if not stat_beta_response["if_success"]:
            exit_code = 1
    if upload_to_stat:
        stat_response = upload_report_to_stat(STAT_FQDN, stat_oauth_token, report_json)
        print("Uploading to {} result: if_success={}, message={}".format(
            STAT_FQDN,
            stat_response["if_success"],
            stat_response["message"].encode("utf-8")))
        if not stat_response["if_success"]:
            exit_code = 1

    exit(exit_code)



if __name__ == "__main__":
    parser = argparse.ArgumentParser("Service deployment time percentiles and histogram generator")
    parser.add_argument("--date",
                        help="Date, defaults to yesterday",
                        type=parse_date,
                        default=datetime.date.today() - datetime.timedelta(days=1))
    parser.add_argument("--yt-cluster",
                        help="YT cluster where Nanny logs are stored",
                        default=YT_CLUSTER)
    parser.add_argument("--cache",
                        help="Try to get results from cache",
                        action="store_true")
    parser.add_argument("--stat-oauth-token",
                        help="Stat OAuth token for robot-nanny, or STAT_OAUTH_TOKEN in env",
                        default=STAT_OAUTH_TOKEN)
    parser.add_argument("--upload-to-stat-beta",
                        help="Upload results to stat-beta.yandex-team.ru",
                        action="store_true")
    parser.add_argument("--upload-to-stat",
                        help="Upload results to stat.yandex-team.ru",
                        action="store_true")
    parser.add_argument("--use-skynet-python",
                        help="Use Skynet Python in YT MR job",
                        action="store_true")
    args = parser.parse_args()

    main(date=args.date,
         yt_cluster=args.yt_cluster,
         try_cache=args.cache,
         upload_to_stat_beta=args.upload_to_stat_beta,
         upload_to_stat=args.upload_to_stat,
         stat_oauth_token=args.stat_oauth_token,
         use_skynet_python=args.use_skynet_python)
