#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
from yt.yson.yson_types import YsonEntity
import yt.wrapper as yt
import pyexcel

ROOT = "//home/videoquality/vh_analytics/underimpressions"

def wrap_yson(obj):
    if isinstance(obj, bytes):
        return obj.decode("utf8", errors="replace")
    if isinstance(obj, list):
        return [wrap_yson(x) for x in obj]
    if isinstance(obj, dict):
        return {wrap_yson(k): wrap_yson(v) for k, v in obj.items()}
    if isinstance(obj, YsonEntity):
        return None
    if isinstance(obj, float):
        return float(obj)
    if isinstance(obj, int):
        return int(obj)
    return obj

def out_wrap(obj):
    if isinstance(obj, list):
        return ",".join(str(x) for x in obj)
    return obj

types = {}

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--date", "-d")
    args = parser.parse_args()

    if args.date:
        date = args.date
    else:
        date = max(yt.list(ROOT))
    table = ROOT + "/" + date
    print(f"downloading from {table}...")
    recs = [wrap_yson(rec) for rec in yt.read_table(table)]
    max_identificator = 0
    for rec in recs:
        for k in rec:
            if k not in types and rec[k] is not None:
                types[k] = type(rec[k])
    header_row_identificator = ["media_type", "campaign_type", "business_unit", "client_name", "campaign_id", "group_id"]
    header_row = []
    header_row.extend(sorted([
        k for k in types if types[k] in (int, float)
    ]))
    header_row.extend(sorted([
        k for k in types if types[k] not in (int, float)
    ]))
    result = [header_row_identificator + header_row]
    for rec in recs:
        row = [rec["identificator_struct"].get(k) for k in header_row_identificator]
        row.extend([out_wrap(rec.get(k)) for k in header_row if "identificator" not in k])
        result.append(row)
    pyexcel.save_as(array=result, dest_file_name=f"underimpressions_{date}.xlsx")


if __name__ == "__main__":
    main()
