#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import datetime
import argparse
import time
import json
from nile.api.v1 import clusters
from yql.api.v1.client import YqlClient
from videolog_common import (
    get_cluster,
    YqlRunner,
    apply_replacements,
    get_driver,
    date_range,
    get_date,
    send_mail,
)

os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt"


TITLE = "Cube v2 Prod | YQL"
PRE_TABLES = ["strm_map", "jstracer_map", "redir_map", "money_map"]
ROOT = "//cubes/video-strm"

PREPROCESSED_NEW = """
$strm_map = (
    select
    IF(a.androidVsid is not null, a.vsid, s.vsid) as vsid,
    s.* without s.vsid
    from $strm_map_table as s
    left join any $androidvsids_table as a
    on (s.vsid == a.androidVsid)
);
$preprocessed = (
    select * from $strm_map
    union all
    select * from $jstracer_map_table
    union all
    select * from $rtbdsp_map_table
    union all
    select * from $redir_map_table
);
"""
PREPROCESSED_OUTPUT = "INSERT INTO $output_table_preprocessed WITH TRUNCATE\nSELECT * FROM $preprocessed;"
PREPROCESSED_EXISTING = (
    "$preprocessed = select * from $output_table_preprocessed;"
)

prep_sources = {
    "strm_map": ["//logs/strm-access-log/1d"],
    "jstracer_map": ["//logs/jstracer-log/1d", "//logs/strm-gogol-log/1d"],
    "redir_map": ["//logs/redir-log/1d"],
    "money_map": [
        "//statbox/cooked_logs/bs-dsp-cooked-log/v1/1d",
        "//statbox/cooked_logs/bs-chevent-cooked-log/v2/1d",
    ],
}

sessions_sources = {
    # "antifraud": ["//home/antifraud/export/videohosting/views"],
    "access_log": ["//logs/yandex-access-log/1d"],
    "morda_access_log": ["//logs/morda-access-log/1d"],
    "news_access_log": ["//logs/news-scarab-access-log/1d"],
    # "answers": ["//home/answers/yuid_testids"],
    "zen": ["//logs/zen-events-log/1d"],
    "strm": ["//logs/strm-access-log/1d"],
    "gogol": ["//logs/strm-gogol-log/1d"],
    "hit-log": ["//logs/bs-hit-log/1d"],
    "apphost": ["//logs/vh-apphost-logs/1d"],
    "subscriptions": ["//home/msdata/user-profiles/v1"],
}


def _check_non_empty(yt, table):
    return yt.exists(table) and yt.get_attribute(table, "row_count") > 0


def _check_date_in_folders(yt, folders, date):
    for folder in folders:
        table = "{}/{}".format(folder, date)
        if not _check_non_empty(yt, table):
            return False
    return True


def get_available_dates(yt, dates):
    prep_available = []
    sess_available = []
    for date in sorted(dates)[::-1]:
        preprocessed_exists = _check_non_empty(
            yt, "{}/{}/preprocessed".format(ROOT, date)
        )
        prep_sources_ready = [
            k
            for k in prep_sources
            if _check_date_in_folders(yt, prep_sources[k], date)
        ]
        prep_sources_mapped = [
            k
            for k in prep_sources
            if _check_non_empty(yt, "{}/{}/{}".format(ROOT, date, k))
        ]
        prep_sources_for_map = [
            k
            for k in prep_sources
            if (
                not preprocessed_exists
                and _check_date_in_folders(yt, prep_sources[k], date)
                and not _check_non_empty(yt, "{}/{}/{}".format(ROOT, date, k))
            )
        ]
        need_not_make_prep = preprocessed_exists or len(
            prep_sources_mapped
        ) == len(prep_sources.keys())
        can_make_prep = len(prep_sources_ready) == len(prep_sources.keys())
        if need_not_make_prep:
            print("preprocessed already mapped: {}".format(date))
        elif not can_make_prep:
            print(
                "{} preprocessed requirements missing: {}".format(
                    date,
                    ",".join(
                        sorted(
                            [
                                x
                                for x in prep_sources
                                if x not in prep_sources_ready
                            ]
                        )
                    ),
                )
            )
        sessions_sources_ready = [
            k
            for k in sessions_sources
            if _check_date_in_folders(yt, sessions_sources[k], date)
        ]
        can_make_sessions = (can_make_prep or need_not_make_prep) and len(
            sessions_sources_ready
        ) == len(sessions_sources.keys())
        if (can_make_prep or need_not_make_prep) and not can_make_sessions:
            print(
                "{} sessions requirements missing: {}".format(
                    date,
                    ",".join(
                        sorted(
                            [
                                x
                                for x in sessions_sources
                                if x not in sessions_sources_ready
                            ]
                        )
                    ),
                )
            )
        if (can_make_prep or prep_sources_for_map) and not need_not_make_prep:
            prep_available.append(date)
        if can_make_sessions:
            sess_available.append(date)
    return prep_available, sess_available


def list_uniq(lst):
    result = []
    for i in lst:
        if i not in result:
            result.append(i)
    return result


def get_dates_to_process(args):
    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    dates_with_absent_sessions = {
        x
        for x in date_range(yesterday - datetime.timedelta(days=7), yesterday)
        if not yt.exists("//cubes/video-strm/{}/sessions".format(x))
    }
    print(
        "dates last week with absent sessions: {}".format(
            ", ".join(map(str, sorted(dates_with_absent_sessions)))
        )
    )
    prep_available, sess_available = get_available_dates(
        yt, dates_with_absent_sessions
    )
    if sess_available or prep_available:
        return list_uniq(sess_available + prep_available)
    else:
        print("nothing to do, exiting")
        sys.exit(0)


def run_query(
    yr,
    date_,
    query_file,
    args,
    attachments=["common.sql"],
    wait=False,
    type_="pre",
    use_existing_preprocessed=False,
    alice=False,
):
    date = str(date_)
    with open(query_file, "r") as f:
        query = f.read()
    query = query.replace("@[date]", date)
    query = query.replace("@[week_ago]", str(date_ - datetime.timedelta(days=7)))
    query = query.replace("@[pool]", args.pool)
    query = query.replace("@[weight]", str(args.weight))
    query = query.replace("@[root]", args.root or ROOT)
    if type_ == "sessions":
        if use_existing_preprocessed:
            query = query.replace("--@[preprocessed]", PREPROCESSED_EXISTING)
            query = query.replace("--@[preprocessed_output]", "")
        else:
            query = query.replace("--@[preprocessed]", PREPROCESSED_NEW)
            if not args.disable_preprocessed:
                query = query.replace(
                    "--@[preprocessed_output]", PREPROCESSED_OUTPUT
                )
        if alice:
            query = query.replace("/*alice", "")
            query = query.replace("alice*/", "")
    return yr.run(query, wait=wait, attachments=attachments)


def get_locks(yt, folder):
    locks = yt.get_attribute(folder, "locks")
    return {x["child_key"] for x in locks}


def process_date(date_, args):
    date = str(date_)
    redo = getattr(args, "redo", False) or False
    yc = YqlClient(token=os.environ["YQL_TOKEN"])
    yc.config.db = None
    yr = YqlRunner(yc, title=TITLE)
    cluster = get_cluster(clusters, args)
    yt = get_driver(cluster).client
    root = args.root or ROOT
    root_folder = "{}/{}".format(root, date)
    if not yt.exists(root_folder):
        print("folder {} does not exist, creating".format(root_folder))
        yt.create("map_node", path=root_folder)
    else:
        print("folder {} already exists, checking".format(root_folder))
    preprocessed_exists = yt.exists("{}/preprocessed".format(root_folder))
    existing_tables = []
    if not preprocessed_exists or redo:
        locks = get_locks(yt, root_folder)
        for table in PRE_TABLES:
            if not yt.exists("{}/{}".format(root_folder, table)):
                if table in locks:
                    print("{} is locked by earlier transaction".format(table))
                    continue
                ready = True
                for source_root in prep_sources[table]:
                    source_table = "{}/{}".format(source_root, date)
                    if not yt.exists(source_table) or not yt.get_attribute(
                        source_table, "row_count"
                    ):
                        print(
                            "source table {} for {} does not yet exist".format(
                                source_table, table
                            )
                        )
                        ready = False
                        break
                if not ready:
                    continue
                print("launching {}".format(table))
                query_file = "{}.sql".format(table)
                attachments = ["common.sql"]
                if "jstracer" in table:
                    attachments.append(
                        {
                            "path": "analytics/videolog/strm-stats/strm_cube_2/stability/quality_report_avglog/quality_report_avglog_common.sql"
                        }
                    )
                run_query(yr, date_, query_file, args, attachments=attachments)
            else:
                existing_tables.append(table)
        locks = get_locks(yt, root_folder)
    locks = get_locks(yt, root_folder)
    while args.wait_mode and locks:
        time.sleep(60 * 10)
        locks = get_locks(yt, root_folder)
        existing_tables = [
            x for x in PRE_TABLES if yt.exists("{}/{}".format(root_folder, x))
        ]
    print("locks: {}".format(", ".join(sorted(locks))))
    print("existing_tables: {}".format(", ".join(sorted(existing_tables))))
    alice_table = "//home/alice/dialog/prepared_logs_expboxes/{}".format(date)
    alice_table_exists = yt.exists(alice_table)
    print(
        "alice table {} {}".format(
            alice_table, "exists" if alice_table_exists else "doesn't exist"
        )
    )
    if (
        any(x in locks for x in PRE_TABLES)
        or "preprocessed" in locks
        or "sessions" in locks
    ):
        print("some tables are locked: {}, exiting".format(locks))
        return
    elif len(existing_tables) == 4:
        print("launching sessions reduce")
        if not alice_table_exists:
            send_mail(
                msg="Alice table {} doesn't exist yet: {}".format(
                    alice_table, datetime.datetime.now()
                ),
                addressees=[
                    "pecheny@yandex-team.ru",
                    "nstbezz@yandex-team.ru",
                ],
                from_="strm-cube-monitoring@yandex-team.ru",
                subject="Alice table missing",
            )
        req = run_query(
            yr,
            date_,
            "sessions_reduce.sql",
            args,
            attachments=["common.sql", "microsessions_reducer.py"],
            wait=True,
            type_="sessions",
            alice=alice_table_exists,
        )
        return date
    elif len(existing_tables) == 0 and preprocessed_exists:
        print("launching sessions reduce from existing preprocessed")
        req = run_query(
            yr,
            date_,
            "sessions_reduce.sql",
            args,
            attachments=["common.sql", "microsessions_reducer.py"],
            wait=True,
            type_="sessions",
            use_existing_preprocessed=True,
            alice=alice_table_exists,
        )
    sessions_table = "{}/sessions".format(root_folder)
    if (
        yt.exists(sessions_table)
        and "sessions" not in locks
        and yt.get_attribute(sessions_table, "row_count", 0)
    ):
        for table in PRE_TABLES + ["androidvsids"]:
            fullpath = "{}/{}".format(root_folder, table)
            if yt.exists(fullpath):
                print("removing tmp tables {}".format(fullpath))
                yt.remove(fullpath)
            else:
                print("tmp table {} already not exists".format(fullpath))
    else:
        print("not all tmp maps are finished, exiting")
        return


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--from", "-f")
    parser.add_argument("--to", "-t")
    parser.add_argument("--pool", default="cubes-video-strm")
    parser.add_argument("--weight", type=int, default=1)
    parser.add_argument("--root")
    parser.add_argument("--redo", action="store_true")
    parser.add_argument("--disable_preprocessed", action="store_true")
    parser.add_argument("--wait_mode", action="store_true")
    parser.add_argument("--output_mr_table", default="mr_table.json")
    parser.add_argument("--output_json_timestamp", default="ts.json")
    args = parser.parse_args()

    from_ = getattr(args, "from", None)
    to_ = getattr(args, "to", None)

    processed_date = None

    if from_ and to_:
        for date in date_range(from_, to_):
            process_date(date, args)
    else:
        dates_to_process = get_dates_to_process(args)
        for date in dates_to_process:
            processed_date = process_date(date, args)

    if processed_date and args.root is None:
        with open(args.output_mr_table, "w") as f:
            f.write(json.dumps({
                "cluster": "hahn",
                "table": "//cubes/video-strm/{}/sessions".format(processed_date)
            }))
        with open(args.output_json_timestamp, "w") as f:
            f.write(json.dumps({
                "userTimestamp": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S+03:00")
            }))


if __name__ == "__main__":
    main()
