import argparse
import logging
import os
import re
from datetime import datetime, timedelta

import yt.wrapper as yt
from crypta.graph.soupy_indevice.lib import (
    build_indevice,
    build_link_count_percentiles,
    cleanup,
    config,
    fast_match,
    indevice_edge_types,
    make_sorted_tables,
    set_output_date_attr,
    soup_tables_from_dir,
    stream_soup_tables,
)
from crypta.graph.soupy_indevice.lib.step import send_indevice_ready_event
from crypta.graph.soupy_indevice.lib.transfer import copy_to_cluster
from crypta.graph.soupy_indevice.lib.misc import set_svn_version
from crypta.graph.soupy_indevice.lib.util import setup_logging


logger = logging.getLogger(__name__)


def soup_tables_from_string(s):
    result = []
    if s:
        result = s.split(",")
    return result


def dump_edges():
    for et in indevice_edge_types():
        print("{} {} {} {}".format(et.Id1Type.Name, et.Id2Type.Name, et.SourceType.Name, et.LogSource.Name))


def make_last_link(output_dir, output_date, output_table):
    last = yt.ypath_join(output_dir, "last")
    yt.link(output_table, last, force=True)
    make_sorted_tables(last, last + "_by_id", last + "_by_indevice_id")
    set_output_date_attr(output_date, last, last + "_by_id", last + "_by_indevice_id")


def run_transfer(output_date, output_table, args, kind=None):
    def _do_cluster(cluster, root, keep):
        remote_root = yt.ypath_join(root, kind) if kind is not None else root
        remote_table = yt.ypath_join(remote_root, output_date)
        copy_to_cluster(output_table, remote_table, cluster, config.YT_POOL)
        send_indevice_ready_event(output_date, cluster, kind=kind)

        remote_yt = yt.YtClient(proxy="{cluster}.yt.yandex.net".format(cluster=cluster), token=config.YT_TOKEN)
        remote_yt.link(remote_table, yt.ypath_join(remote_root, "last"), force=True)
        cleanup(remote_root, keep, remote_yt)

    for cluster in config.REMOTE_YT_CLUSTERS:
        root = getattr(config, "{cluster}_INDEVICE_ROOT".format(cluster=cluster.upper()))
        if root is None:
            raise ValueError("Unknown cluster root for '{}'".format(cluster))
        try:
            keep = int(args.keep) or 3
            if cluster == "hume":
                keep = min(3, keep)
            _do_cluster(cluster, root, keep)
        except:
            logger.exception("Failed to copy on '%s'" % cluster)


def get_input_tables_and_date(soup_root, soup_tables):
    in_tables = []
    output_date = ""

    if soup_root:
        in_tables, output_date = soup_tables_from_dir(soup_root)

    if soup_tables:
        in_tables.extend(soup_tables_from_string(soup_tables))
        output_date = datetime.now().strftime("%Y-%m-%d")

    return in_tables, output_date


def get_last_date_table(root):
    if yt.exists(root):
        tbls = yt.list(root)
        if len(tbls) > 0:
            date = sorted([x for x in tbls if re.match(r"\d{4}-\d{2}-\d{2}", x)], reverse=True)[0]
            tbl = yt.ypath_join(root, date)
            return date, tbl
    return None, None


def run_build_indevice(soup_root, soup_tables, output_root, idstorage_root, args):
    in_tables, output_date = get_input_tables_and_date(soup_root, soup_tables)

    if args.resume_for_date is not None:
        output_date = args.resume_for_date

    output_table = yt.ypath_join(output_root, output_date)
    output_sizes = yt.ypath_join(output_root, "sizes", output_date)
    output_bad_edges = yt.ypath_join(output_root, "bad_edges", output_date)
    output_bad_ids = yt.ypath_join(output_root, "bad_ids", output_date)

    if args.workdir:
        workdir_root = None
        workdir = args.workdir
    else:
        workdir_root = yt.ypath_join(output_root, "workdir")
        workdir = yt.ypath_join(workdir_root, output_date)

    if yt.exists(output_table) and not args.force_recalc:
        logger.warn("{} exists and --force-recalc is not specified: will not recalculate".format(output_table))
        return

    build_indevice(
        in_tables,
        idstorage_root,
        output_table,
        output_sizes,
        output_bad_edges,
        output_bad_ids,
        workdir=workdir,
        collapse_uuids=args.collapse_uuids,
    )

    set_svn_version(output_table)
    send_indevice_ready_event(output_date, "hahn")
    make_last_link(output_root, output_date, output_table)

    cleanup(output_root, int(args.keep))
    for i in ["sizes", "bad_edges", "bad_ids"]:
        cleanup(yt.ypath_join(output_root, i), int(args.keep))

    if workdir_root is not None:
        cleanup(workdir_root, 3)


def run_build_link_count_percentiles(soup_root, soup_tables, output_root, idstorage_root, keep_count):
    in_tables, output_date = get_input_tables_and_date(soup_root, soup_tables)

    output_dir = yt.ypath_join(output_root, "link_count_percentiles")
    output_table = yt.ypath_join(output_dir, output_date)

    build_link_count_percentiles(in_tables, idstorage_root, output_table)
    cleanup(output_dir, keep_count)


def run_fast_match(args):
    soup_date, _ = get_last_date_table(yt.ypath_join(args.soup_dir, "day"))
    indev_date, indev_tbl = get_last_date_table(args.full_indevice_root)
    bad_ids_tbl = yt.ypath_join(args.full_indevice_root, "bad_ids", indev_date)
    bad_edges_tbl = yt.ypath_join(args.full_indevice_root, "bad_edges", indev_date)
    sizes_tbl = yt.ypath_join(args.full_indevice_root, "sizes", indev_date)

    output_dir = yt.ypath_join(args.output_dir, "fast")
    output_table = yt.ypath_join(output_dir, soup_date)
    output_dir_sizes = yt.ypath_join(output_dir, "sizes")
    output_dir_bad_ids = yt.ypath_join(output_dir, "bad_ids")
    output_sizes = yt.ypath_join(output_dir_sizes, soup_date)
    output_bad_ids = yt.ypath_join(output_dir_bad_ids, soup_date)

    dates = []
    soup_dt = datetime.strptime(soup_date, "%Y-%m-%d")
    indev_dt = datetime.strptime(indev_date, "%Y-%m-%d")

    assert soup_dt >= indev_dt

    dt = indev_dt + timedelta(days=1)
    while dt <= soup_dt:
        dates.append(dt.strftime("%Y-%m-%d"))
        dt = dt + timedelta(days=1)

    with yt.Transaction() as tx:
        tbls = stream_soup_tables(args.soup_dir, args.stream_soup_root, dates)
        fast_match(
            tbls,
            dates,
            indev_tbl,
            sizes_tbl,
            bad_ids_tbl,
            bad_edges_tbl,
            output_table,
            output_sizes,
            output_bad_ids,
            tx,
        )

    set_svn_version(output_table)
    send_indevice_ready_event(soup_date, "hahn", kind="fast")
    make_last_link(output_dir, soup_date, output_table)
    cleanup(output_dir, int(args.keep))
    cleanup(output_dir_sizes, int(args.keep))
    cleanup(output_dir_bad_ids, int(args.keep))

    if config.ENVIRONMENT == "stable":
        run_transfer(soup_date, output_table, args, kind="fast")


def check_output_table_exists(args):
    now = datetime.now()
    yesterday = now - timedelta(days=1)
    tbl = "{}/{}".format(args.output_dir, yesterday.strftime("%Y-%m-%d"))

    if now.hour >= int(args.check_output_table_exists_by) and not yt.exists(tbl):
        raise Exception("{} does not exist at {}".format(tbl, now.strftime("%Y-%m-%d %H:%M:%S")))


def check_sparse_date(args):
    _, output_date = get_input_tables_and_date(args.soup_dir, args.soup_tables)
    return datetime.strptime(output_date, "%Y-%m-%d").day & 1


def main():
    setup_logging()

    parser = argparse.ArgumentParser()
    parser.add_argument("--soup-dir", required=False)
    parser.add_argument("--soup-tables", required=False)
    parser.add_argument("--idstorage-dir", required=False)
    parser.add_argument("--output-dir", required=False)
    parser.add_argument("--keep", required=False)
    parser.add_argument("--dump-edges", required=False, action="store_true")
    parser.add_argument("--link-count-perc", required=False, action="store_true")
    parser.add_argument("--force-recalc", required=False, action="store_true")
    parser.add_argument("--stream-soup-root", required=False)
    parser.add_argument("--full-indevice-root", required=False)
    parser.add_argument("--fast-match", required=False, action="store_true")
    parser.add_argument("--workdir", required=False)
    parser.add_argument("--resume-for-date", required=False)
    parser.add_argument("--collapse-uuids", required=False, action="store_true")
    parser.add_argument("--check-output-table-exists-by", required=False)
    parser.add_argument("--sparse-run", required=False, action="store_true")
    args = parser.parse_args()

    if args.dump_edges:
        dump_edges()
        return

    soup_root = args.soup_dir
    soup_tables = args.soup_tables
    output_root = args.output_dir
    idstorage_root = args.idstorage_dir

    config.YT_PROXY = os.getenv("YT_PROXY")
    config.YT_TOKEN = os.getenv("YT_TOKEN")
    config.YT_POOL = os.getenv("YT_POOL")

    if output_root.endswith("/"):
        output_root = output_root[:-1]

    if not yt.exists(output_root):
        yt.mkdir(output_root, recursive=True)

    if args.link_count_perc:
        run_build_link_count_percentiles(soup_root, soup_tables, output_root, idstorage_root, int(args.keep))
        return

    if args.fast_match:
        run_fast_match(args)
        return

    if args.check_output_table_exists_by:
        check_output_table_exists(args)
        return

    if args.sparse_run and check_sparse_date(args):
        return

    run_build_indevice(soup_root, soup_tables, output_root, idstorage_root, args)
