import datetime
from collections import defaultdict

from crypta.graph.v1.python.utils import utils
from crypta.graph.v1.python.v2.soup import soup_validation
from crypta.graph.v1.python.v2.soup.soup_tables import SoupDumpTable, SoupDailyTable

from crypta.graph.soup.config.python import EDGE_TYPE as edges


def get_rec_edge_type(rec):
    """
    :rtype crypta.graph.soup.config.proto.edge_type.TEdgeTypeExt
    """
    return edges.get_edge_type_by_name(rec["id1Type"], rec["id2Type"], rec["sourceType"], rec["logSource"])


def _is_key_weight_ok(edge_type, id1, id2, key_weight_limit=16384):
    try:
        weight = 0
        weight += len(edge_type.Id1Type.Name)
        weight += len(edge_type.Id2Type.Name)
        weight += len(edge_type.SourceType.Name)
        weight += len(edge_type.LogSource.Name)
        weight += len(id1) + len(id2)
        return weight <= key_weight_limit
    except:
        return False


# ===reduce ops===
def reduce_day_activity(edge_key, recs, date, out_table_indexes, key_weight_limit=16384):
    edge_type = get_rec_edge_type(edge_key)
    day_hits = 0
    day_activity = defaultdict(int)
    for r in recs:
        hits_count = r.get("hits") or 1
        day_hits += hits_count

        rec_ts = r.get("ts")
        if rec_ts:  # source may contain or not contain hits info
            rec_time = datetime.datetime.fromtimestamp(rec_ts)
            rec_date = utils.ts_to_date_str(rec_ts)

            if rec_date != date:
                # track when ts doesn't correspond to date (only if date is know and specified outside)
                day_activity[None] += hits_count
            else:
                day_activity[rec_time.hour] += hits_count

    day_activity = utils.default_to_regular(day_activity) if day_activity else None

    table_index = 0
    if out_table_indexes:
        table_index = out_table_indexes[edges.tuplize(edge_type)]

    if not _is_key_weight_ok(edge_type, edge_key["id1"], edge_key["id2"], key_weight_limit):
        table_index = out_table_indexes["errors"]

    yield SoupDailyTable.make_rec(
        edge_key["id1"], edge_key["id2"], edge_type, date, day_activity, day_hits, table_index
    )


VALID_COL_NAME = "_valid"


def normalize_edge(rec):
    normalized_out_rec = soup_validation.normalize_rec(rec)
    if normalized_out_rec:
        normalized_out_rec[VALID_COL_NAME] = True
        normalized_out_rec["@table_index"] = 0
        yield normalized_out_rec
    else:
        # is call in mapreduce mapper, so output is always one table
        rec[VALID_COL_NAME] = False
        rec["@table_index"] = 0
        yield rec


def increment_day_activity(edge_key, recs, out_table_indexes, throw_before_date):
    all_dates = set()
    valid = True
    for r in recs:
        prev_all_dates = r.get("dates")
        if prev_all_dates:  # all table
            all_dates.update(prev_all_dates)

        today_date = r.get("date")
        if today_date:  # day table
            all_dates.add(today_date)

        valid = valid and r[VALID_COL_NAME]

    edge_type = get_rec_edge_type(edge_key)

    if not valid:
        out_rec = SoupDumpTable.make_rec(
            edge_key["id1"], edge_key["id2"], edge_type, sorted(all_dates), len(out_table_indexes)
        )
        yield out_rec
        return

    table_index = 0
    if out_table_indexes:
        table_index = out_table_indexes[edges.tuplize(edge_type)]

    keep_rec = True
    if throw_before_date:
        # only for daily sources
        if all_dates and all(dt < throw_before_date for dt in all_dates):
            keep_rec = False

    if keep_rec:
        # assume normalized records
        # validation and normalization should've happened in a mapper before this
        out_rec = SoupDumpTable.make_rec(edge_key["id1"], edge_key["id2"], edge_type, sorted(all_dates), table_index)
        yield out_rec


def uniq_dump(edge_key, recs, out_table_indexes, add_dates=False):
    edge_type = get_rec_edge_type(edge_key)
    valid = True
    all_dates = []
    for rec in recs:
        valid = valid and rec.get(VALID_COL_NAME)
        if "date" in rec and rec["date"]:
            all_dates.append(rec["date"])
        elif "dates" in rec and isinstance(rec["dates"], list):
            all_dates.extend(rec["dates"])

    table_index = out_table_indexes[edges.tuplize(edge_type)] if valid else len(out_table_indexes)

    all_dates = sorted(set(all_dates))
    out_rec = SoupDumpTable.make_rec(
        id1=edge_key["id1"], id2=edge_key["id2"], edge_type=edge_type, dates=all_dates, table_index=table_index
    )

    yield out_rec
