from nile.api.v1 import (
    Record,
    clusters,
    statface,
    extractors as ne,
    filters as nf,
    aggregators as na,
    modified_schema,
    with_hints,
    cli
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf,
    typing as qt
)
from datetime import datetime, timedelta
from collections import defaultdict

manufacturers = {
    "Samsung",
    "Huawei",
    "Xiaomi",
    "Meizu",
    "Lenovo",
    "Sony",
    "Asus",
    "LG",
    "Zte",
    "BQru",
    "Fly",
    "Nokia",
    "Alcatel",
    "HTC",
    "LeTV",
    "bq",
    "Prestigio",
    "Micromax",
    "Vertex",
    "INOI",
    "OnePlus",
    "DEXP",
    "Wileyfox",
    "Digma",
    "Philips",
    "Irbis",
    "Oppo",
    "Oukitel",
    "Blackview",
    "Motorola",
    "Highscreen",
    "Beeline"
}


def get_sl_version(raw_dict):
    try:
        event_dict = eval(raw_dict.replace("false", "False").replace("true", "True"))
        return event_dict.get("version", "none")
    except:
        return "none"


def get_high_os_version(version):
    try:
        return version.split(".")[0]
    except:
        return "other"


def add_to_dict(data_dict, os_version, manufacturer, app_trees, model_trees, sl_trees, count):
    """
    This method adds to dict all variations of tree, cause stat can't aggregate results on top levels
    by himself
    :param data_dict: dict
    :param os_version: str
    :param manufacturer: str
    :param app_trees: list(str)
    :param model_trees: list(str)
    :param sl_trees: list(str)
    :param count: int
    :return:
    """
    for model_tree in model_trees:
        for app_tree in app_trees:
            for sl_tree in sl_trees:
                t = (os_version, manufacturer, app_tree, model_tree, sl_tree)
                data_dict[t] += count

                t = (os_version, "_total_", app_tree, model_tree, sl_tree)
                data_dict[t] += count

                t = ("_total_", manufacturer, app_tree, model_tree, sl_tree)
                data_dict[t] += count

                t = ("_total_", "_total_", app_tree, model_tree, sl_tree)
                data_dict[t] += count


def prepare_dict(data_dict, manufacturer, model, app_info, sl_version, os_version, count, counted_in_total = False):
    """
    This method prepares resulting dict that will be used to publish on stat
    :param data_dict: dict
    :param manufacturer: str
    :param model: str
    :param app_info: list(str)
    :param sl_version: list(str)
    :param os_version: str
    :param count: int
    :return:
    """
    model_trees = ["\t_total_\t", "\t_total_\t{}\t".format(manufacturer),
                   "\t_total_\t{}\t{}\t".format(manufacturer, model)]
    app_trees = ["\t_total_\t{}\t".format(app_info[0]),
                 "\t_total_\t{}\t{}\t".format(app_info[0], app_info[1])]
    sl_trees = ["\t_total_\t{}\t".format(sl_version[0]),
                "\t_total_\t{}\t{}\t".format(sl_version[0], sl_version[1])]

    if manufacturer not in manufacturers:
        manufacturer = "other"
        model_trees = ["\t_total_\t", "\t_total_\t{}\t".format(manufacturer)]

    add_to_dict(data_dict, os_version, manufacturer, app_trees, model_trees, sl_trees, count)
    add_to_dict(data_dict, os_version, manufacturer, app_trees, model_trees, ["\t_total_\t"], count)
    add_to_dict(data_dict, os_version, manufacturer, ["\t_total_\t"], model_trees, sl_trees, count)
    if not counted_in_total:
        add_to_dict(data_dict, os_version, manufacturer, ["\t_total_\t"], model_trees, ["\t_total_\t"], count)

    if app_info[0] != app_info[1]:
        app_trees = ["\t_total_\t{}\t".format(app_info[1]),
                     "\t_total_\t{}\t{}\t".format(app_info[1], app_info[0])]

        sl_trees = ["\t_total_\t{}\t".format(sl_version[1]),
                    "\t_total_\t{}\t{}\t".format(sl_version[1], sl_version[0])]

        add_to_dict(data_dict, os_version, manufacturer, app_trees, model_trees, sl_trees, count)
        add_to_dict(data_dict, os_version, manufacturer, app_trees, model_trees, ["\t_total_\t"], count)
        add_to_dict(data_dict, os_version, manufacturer, ["\t_total_\t"], model_trees, sl_trees, count)

    if app_info[0] == app_info[1] and sl_version[0] != sl_version[1]:
        app_trees = ["\t_total_\t", "\t_total_\t{}\t{}\t".format(app_info[0], app_info[1])]

        sl_trees = ["\t_total_\t{}\t".format(sl_version[1]),
                    "\t_total_\t{}\t{}\t".format(sl_version[1], sl_version[0])]

        add_to_dict(data_dict, os_version, manufacturer, app_trees, model_trees, sl_trees, count)


def get_mapper(table_date):
    """
    sets needed table_date in mapper
    :param table_date: str
    :return: func
    """
    @with_hints(
        output_schema=dict(os_version=str, application=str, sl_version=str, manufacturer=str, model=str, count=int,
                           fielddate=str))
    def mapper(records):
        """

        :param records:
        :return:
        """
        data_dict = defaultdict(int)

        for record in records:
            os_version = record["os_version"].split(".")[0]

            if not os_version.isdigit() or len(os_version) > 1:
                os_version = "thrash"

            manufacturer = record["manufacturer"]
            model = record["model"]
            count = record["count"]

            if model is None or manufacturer is None:
                continue

            record_app_info = record["app_info"].split("\n")

            if len(record_app_info) == 2:
                first_app = record_app_info[0].split("\t")
                second_app = record_app_info[1].split("\t")
                app_info = [first_app[0], second_app[0]]
                sl_version = [first_app[1], second_app[1]]
                prepare_dict(data_dict, manufacturer, model, app_info, sl_version, os_version, count)
            else:
                unique_pairs = set()
                counted_in_total = False
                for cur_app in record_app_info:
                    cur_pair = tuple(cur_app.split("\t"))
                    if cur_pair not in unique_pairs:
                        app_info = [cur_pair[0], "more_than_two"]
                        sl_version = [cur_pair[1], "more_than_two"]
                        prepare_dict(data_dict, manufacturer, model, app_info, sl_version, os_version, count,
                                     counted_in_total)
                        counted_in_total = True
                    unique_pairs.add(cur_pair)

        for k, v in data_dict.iteritems():
            yield Record(os_version=k[0], manufacturer=k[1], application=k[2], model=k[3], sl_version=k[4],
                         fielddate=table_date, count=v)

    return mapper


@with_hints(output_schema=dict(os_version=str, manufacturer=str, model=str,
                               app_info=str, count=int))
def prepare_table_reducer(groups):
    """
    Reduce aggregated groups with extracting information about app (type and searchlib version)
    :param groups:
    :return:
    """
    os_dict = defaultdict(int)
    for key, records in groups:
        app_info = []
        for record in records:
            app_info += ["{}\t{}".format(record['AppID'], get_sl_version(record['EventValue']))]

        if key["OSVersion"] is None or key["Manufacturer"] is None or key["Model"] is None:
            continue

        if len(app_info) >= 2:
            key_tuple = (key["OSVersion"], key["Manufacturer"], key["Model"], "\n".join(app_info))
            os_dict[key_tuple] += 1

    for k, v in os_dict.iteritems():
        yield Record(os_version=k[0], manufacturer=k[1], model=k[2],
                     app_info=k[3], count=v)


@cli.statinfra_job
def make_job(job, options, statface_client):
    table_date = options.dates[0]

    report = statface.report.StatfaceReport(path="Distribution/Adhoc/searchlib_double_dayuse",
                                            scale="daily",
                                            replace_mask=("fielddate"),
                                            client=statface_client)

    squeeze_table = job.table("//home/searchlib/squeeze/$date")

    # temporary table with details for each double bar
    squeeze_table = squeeze_table.filter(
        nf.and_(
            nf.equals("EventName", "searchlib_dayuse"),
            nf.equals("EventType", 4),  # EVENT_CLIENT
            nf.equals("EventDate", table_date)
        )
    ).groupby("DeviceID", "OSVersion", "Manufacturer", "Model")\
        .sort("AppID")\
        .reduce(prepare_table_reducer)\
        .groupby("os_version", "manufacturer", "model", "app_info")\
        .aggregate(count=na.sum("count"))

    today_mapper = get_mapper(table_date)
    squeeze_table.map(today_mapper).publish(report, remote_mode=True)

    return job


if __name__ == "__main__":
    cli.run()
