# -*- coding: utf-8 -*-

import sys
import yt.wrapper as yt
import pandas as pd


if len(sys.argv) > 1:
    YT_SRC = sys.argv[1]
else:
    YT_SRC = "//home/vipplanners/freelance_info/export/recommendations"


def main():
    yt_client = yt.YtClient(proxy="hahn")
    table_path = YT_SRC
    print "recommendations:", table_path
    table_columns = {
            "fl_client_id": str,
            "client_id": str,
            "cid": str,
            "pid": str,
            "login": str,
            "pr_part_bad_shows": float,
            "phrase": lambda x: x.decode("utf-8"),
            "pr_part_bad_shows_share": float,
            "part_shows": int,
            "exact_part_bad_shows_share": float,
            "body_db": lambda x: x.decode("utf-8"),
            "exact_part_bad_shows": float,
            "query": lambda x: x.decode("utf-8"),
            "title_db": lambda x: x.decode("utf-8"),
            "pr_segment_bad_shows": float,
            "fl_name": lambda x: x.decode("utf-8"),
            "campaign_name": lambda x: x.decode("utf-8")
    }

    def get_it(is_test=False):
        for row in yt_client.read_table(
            yt.TablePath(
                table_path,
                 start_index=0, end_index=2 if is_test else None),
            format=yt.SchemafulDsvFormat(
            columns=table_columns.keys(),
            attributes={
                'missing_value_mode': 'print_sentinel',
                'missing_value_sentinel': 'null'}),
            raw=False
        ):
            yield {key: table_columns[key](val) for (key, val) in row.items()}


    data = pd.DataFrame.from_records(get_it())
    groups = data.groupby(["fl_client_id", "fl_name"])
    colord = [
        "client_id",
        "login",
        "cid",
        "campaign_name",
        "pid",
        "phrase",
        "query",
        "title_db",
        "body_db",
        "part_shows",
        "pr_part_bad_shows",
        "exact_part_bad_shows"
    ]
    colsort = [
        "client_id",
        "cid",
        "pid",
        "phrase",
        "query"
    ]

    for ((flid, flname), flg) in groups:
        flg = flg[colord]
        flg = flg.sort_values(colsort)
        level_cid = flg.query("not (pid > '0')").drop("pid", axis=1)
        level_pid = flg.query("pid > '0'")
        cidgr = level_cid.groupby("cid")
        part_shows, exact_part_bad_shows = sum(
                x.iloc[0][["part_shows", "exact_part_bad_shows"]] for (_, x) in cidgr
        )
        exact_bad_shows_share = exact_part_bad_shows / part_shows
        if exact_bad_shows_share < 0.01:
            continue

        with pd.ExcelWriter("./%.0f%%_%s_%s.xlsx" % (100 * exact_bad_shows_share, flid, flname)) as xlsx_wr:
            level_cid.to_excel(xlsx_wr, sheet_name=u"Кампании", index=False)
            level_pid.to_excel(xlsx_wr, sheet_name=u"Группы", index=False)

