#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import os
import codecs
import argparse
import json
import yaml
from yql.api.v1.client import YqlClient
from videolog_common import YqlRunner, apply_replacements
from beeline_codegen_data import codegen_vars


def get_metrics_keys(metrics):
    return [x.split()[-1].strip() for x in metrics.split(",\n")]


def generate_bash_lines(d_type, a_type, yt_keys, cvars, config):
    bash = []
    yt_format = cvars["yt_format_string"].format(";".join(yt_keys))
    output_root = config["output_root"]
    export_name = config["export_name"]
    suffix = f"_by_{a_type}" if a_type else ""
    yt_table = f"{output_root}/{export_name}/{export_name}_{d_type}{suffix}"
    filename = f"{export_name}_{d_type}{suffix}.tsv"
    filename_gz = f"{export_name}_{d_type}{suffix}.tsv.gz"
    bash.append(f"yt read --format '{yt_format}' {yt_table} > {filename}")
    bash.append(f"gzip {filename}")
    bash.append(
        f"python3.6 boto3_upload.py -s {config['args']['s3_host']} -b yaexports -f {filename_gz} -k {export_name}/{filename_gz} -a"
    )
    return bash


PIVOTING_AGGREGATIONS = ("s2", "geounit", "filial", "s2_month", "s2_month_binary")


class PivoterGen:
    def __init__(self, cvars, config):
        self.cvars = cvars
        self.config = config
        self.pivoters = []
        self.export = []
        self.bash = []

    def generate_signatures(self, grouping_keys, metrics, a_type):
        input_type_signature = []
        output_type_signature = []
        for key in grouping_keys:
            signature = f'"{key}": {self.cvars["columns_types"][key]}'
            input_type_signature.append(signature)
            if key != "operator":
                output_type_signature.append(signature)
        yt_metrics_keys = []
        for key in metrics:
            signature = f'"{key}": {self.cvars["columns_types"][key]}'
            input_type_signature.append(signature)
            output_type_signature.append(signature)
            yt_metrics_keys.append(key)
            if a_type != "s2_month_binary":
                for subcolumn in ("min", "max"):
                    signature = (
                        f'"{key}_{subcolumn}": {self.cvars["columns_types"][key]}'
                    )
                    output_type_signature.append(signature)
                    yt_metrics_keys.append(f"{key}_{subcolumn}")
                output_type_signature.append(f'"{key}_position": Double?')
                yt_metrics_keys.append(f"{key}_position")
            else:
                output_type_signature.append(f'"{key}_other": Bool?')
                yt_metrics_keys.append(f"{key}_other")
        return input_type_signature, output_type_signature, yt_metrics_keys

    def generate(self, d_type, a_type):
        stub = self.cvars["pivoter_stub"]
        stub = stub.replace("@[d_type]", d_type)
        stub = stub.replace("@[a_type]", a_type)
        stub = stub.replace(
            "@[pivoter_type]",
            'CAST("binary" as Utf8)'
            if a_type == "s2_month_binary"
            else 'CAST("normal" as Utf8)',
        )
        key = f"{d_type}_by_{a_type}_grouping_keys"
        grouping_keys = self.cvars[key]
        grouping_keys = [x.split(" ")[-1] for x in grouping_keys.split(", ")]
        grouping_keys_wo_op = [x for x in grouping_keys if x != "operator"]
        if a_type != "s2_month_binary":
            metrics = get_metrics_keys(self.cvars[f"{d_type}_metrics"])
        else:
            metrics = ["SignalStrength_p50"]
        stub = stub.replace("@[keys]", json.dumps(grouping_keys_wo_op, indent=4))
        stub = stub.replace("@[metrics]", json.dumps(metrics, indent=4))

        (
            input_type_signature,
            output_type_signature,
            yt_metrics_keys,
        ) = self.generate_signatures(grouping_keys, metrics, a_type)

        stub = stub.replace("@[input_type_signature]", ", ".join(input_type_signature))
        stub = stub.replace(
            "@[output_type_signature]", ", ".join(output_type_signature)
        )
        input_keys_types = ", ".join(
            self.cvars["columns_types"][x] for x in grouping_keys_wo_op
        )
        stub = stub.replace("@[input_keys_types]", input_keys_types)
        self.pivoters.append(stub)
        self.export.append(f"${d_type}_by_{a_type}_pivoter")
        yt_keys = grouping_keys_wo_op + yt_metrics_keys
        if self.config["args"]["no_throughput"]:
            yt_keys = [
                x
                for x in yt_keys
                if "throughput" not in x
                and "traffic" not in x
                and "quality" not in x
                and "chunks" not in x
            ]
        self.bash.extend(
            generate_bash_lines(d_type, a_type, yt_keys, self.cvars, self.config)
        )

    def generate_custom(self, d_type, a_type, yt_keys):
        self.bash.extend(
            generate_bash_lines(d_type, a_type, yt_keys, self.cvars, self.config)
        )

    def output_text(self):
        result = "\n\n".join(self.pivoters)
        result += "EXPORT {}".format(", ".join(self.export))
        return result

    def output_bash(self):
        return "\n".join(self.bash) + "\n"


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--pool", "-p")
    parser.add_argument("--config", "-c")
    parser.add_argument("--output_text", "-o")
    parser.add_argument("--s3-host", "-s3", default="s3.mdst.yandex.net")
    parser.add_argument("--dry-run", "-d", action="store_true")
    parser.add_argument("--no-throughput", action="store_true")
    args = vars(parser.parse_args())

    with open(args["config"]) as f:
        config = yaml.safe_load(f)
    config["args"] = args

    args["output_text"] = args["output_text"] or f"{config['export_name']}.sh"

    yc = YqlClient(token=os.environ["YQL_TOKEN"])
    yr = YqlRunner(yc, title="beeline aggregation runner | YQL")

    with codecs.open("beeline_aggregation_codegen_stub.sql", "r") as f:
        stub = f.read()
    query = apply_replacements(
        stub,
        [
            ("@[pool]", args["pool"]),
            ("@[date_from]", str(config["date_from"])),
            ("@[date_to]", str(config["date_to"])),
            ("@[export_name]", config["export_name"]),
            ("@[reduced_root]", config["reduced_root"]),
            ("@[output_root]", config["output_root"]),
        ],
    )
    non_pivoted_bash = []
    pivoter_gen = PivoterGen(codegen_vars, config)
    for d_type in ("radio", "video"):
        for a_type in (
            "s2",
            "cell_lac",
            "geounit",
            "filial",
            "ip",
            "s2_month",
            "s2_month_binary",
        ):
            key = f"{d_type}_by_{a_type}_grouping_keys"
            if a_type in ("s2_month", "s2_month_binary") and d_type == "video":
                continue
            try:
                grouping_keys = codegen_vars[key]
            except KeyError:
                continue
            grouping_keys_ = [x.split(" ")[-1] for x in grouping_keys.split(", ")]
            grouping_keys_select = ", ".join(grouping_keys_)
            query = query.replace(f"@[{key}]", grouping_keys)
            query = query.replace(f"@[{key}_select]", grouping_keys_select)
            if a_type in PIVOTING_AGGREGATIONS:
                grouping_keys_select_pivot = ", ".join(
                    [x for x in grouping_keys_ if x != "operator"]
                )
                query = query.replace(
                    f"@[{key}_select_pivot]", grouping_keys_select_pivot
                )
            else:
                metrics = get_metrics_keys(codegen_vars[f"{d_type}_metrics"])
                non_pivoted_bash.extend(
                    generate_bash_lines(
                        d_type, a_type, grouping_keys_ + metrics, codegen_vars, config
                    )
                )

        for a_type in ("metrics", "common_conditions", "having"):
            query = query.replace(
                f"@[{d_type}_{a_type}]", codegen_vars[f"{d_type}_{a_type}"]
            )
        for p_type in PIVOTING_AGGREGATIONS:
            if p_type in ("s2_month", "s2_month_binary") and d_type == "video":
                continue
            pivoter_gen.generate(d_type, p_type)
    pivoter_gen.generate_custom(
        "coordinates", None, ["s2_level", "s2_value", "coordinates"]
    )

    with open("beeline_codegen_pivoters.sql", "w") as f:
        f.write(pivoter_gen.output_text())
    with open(args["output_text"], "w") as f:
        f.write(
            "set -e\n\n"
            + pivoter_gen.output_bash()
            + "\n".join(non_pivoted_bash)
            + "\n"
        )

    if args["dry_run"]:
        sys.exit(0)
    yr.run(
        query,
        attachments=[
            "beeline_codegen_pivoters.sql",
            "beeline_pivot.py",
            {
                "path": "arcadia/analytics/videolog/strm-stats/operators_data/v2/prepare_data/helpers.sql"
            },
            {
                "path": "arcadia/analytics/videolog/strm-stats/operators_data/v2/prepare_data/sphere_v2.py"
            },
        ],
        wait=True,
    )


if __name__ == "__main__":
    main()
