#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import sys
import os
import codecs
import argparse
import copy
import json
import datetime
import requests
from nile.api.v1 import (
    clusters,
    filters as nf,
    extractors as ne,
    aggregators as na,
    Record,
    with_hints,
)
import qb2.api.v1.typing as qt
from yql.api.v1.client import YqlClient
from videolog_common import (
    get_cluster,
    get_driver,
    optionalize_schema,
    YqlRunner,
    apply_replacements,
    read_file,
)


MEMORY_LIMIT = 16000
DELETED = "OptionsDeleted"
NODELETE_FILTER = nf.equals(DELETED, False)
base_table = "//home/videoquality/strm_meta/base/ContentGroup"
content_type_table = "//home/video-hosting/base/ContentType"
content_resource_table = "//home/videoquality/strm_meta/base/ContentResource"
DEFAULT_JOB_ROOT = "//home/videolog/strm_meta/iron_branch"
job_root = DEFAULT_JOB_ROOT
current_table = "{}/current".format(job_root)
full_table = "{}/full".format(job_root)
full_work_table = "{}/full_work".format(job_root)
full_stats_table = "{}/full_stats".format(job_root)
alt_table = "{}/alt".format(job_root)
concat_table = "{}/concat".format(job_root)
fields_to_project = "ContentGroupID"
chmh = "43aa0f9f9d7c57bca56cff1ddca64a74"
yatv_channels_table = "{}/yatv_channels".format(job_root)
ontoids_table = "{}/onto_ids_data".format(job_root)
acg_table = "{}/active_cgs".format(job_root)
common_prefix_stub = """
pragma yt.Pool = "@pool";
pragma yt.PoolTrees = "physical";
pragma yt.UseDefaultTentativePoolTrees;
"""
channels_yql = """

$cg_table = "BASE_TABLE";
$cr_table = "CONTENT_RESOURCE_TABLE";
$output_table = "OUTPUT_TABLE";
$yatv_list = AsList("yatv", "yatv@yttv", "yatv@yttv@news");

$yatv = (
    select ContentGroupID, "yatv" as special_type
    from $cr_table
    where ResourceName == 'channel_type' and Value like 'yatv%'
);

$channels = (
    select distinct ContentGroupID from $cg_table
    where ContentTypeID == 2
);

$yatv_release = (
    select a.ContentGroupID as ContentGroupID, special_type
    from $yatv as a
    inner join $channels as b using (ContentGroupID)
);

$special_project = (
    select
        cr.ContentGroupID as ContentGroupID,
        "special_project" as special_type
    from $cr_table as cr
    inner join $channels as ch using (ContentGroupID)
    where cr.ResourceName == 'is_special_project' and cr.Value == '1'
);

$yandex_originals = (
    select
        cr.ContentGroupID as ContentGroupID,
        "yandex_originals" as special_type
    from $cr_table as cr
    inner join $channels as ch using (ContentGroupID)
    where cr.ResourceName == 'auto_fields' and Yson::ConvertToBool(
        Yson::YPath(Yson::ParseJson(Value), "/yandex_own_content")
    ) == true
);

$broadcasts_from_cg = (
    select
        ContentGroupID,
        ParentID
    from $cg_table
    where ContentTypeID == 43
);

$youtube_broadcasts = (
    select
        cr.ContentGroupID as ContentGroupID,
        ParentID,
        "youtube" as special_type
    from $cr_table as cr
    inner join $broadcasts_from_cg as ch using (ContentGroupID)
    where (
        cr.ResourceName == 'content_source_url' and
        cr.Value like '%youtube%'
    )
);

$youtube_broadcasts_release = (
    select
        ContentGroupID,
        special_type
    from $youtube_broadcasts
);

--$youtube_channels = (
--    select
--        ch.ContentGroupID as ContentGroupID,
--        special_type
--    from $youtube_broadcasts as yb
--    inner join $channels as ch
--    on ch.ContentGroupID == yb.ParentID
--);

$sp_only = (
    select
        sp.ContentGroupID as ContentGroupID,
        "special_project" as special_type
    from $special_project as sp
    left only join $yatv as yatv using (ContentGroupID)
);

insert into $output_table with truncate
select * from $yatv_release
union all
select * from $sp_only
union all
select * from $youtube_broadcasts_release
union all
select * from $yandex_originals;
--union all
--select * from $youtube_channels;
"""


ontoids_yql = """
pragma yson.DisableStrict;

$all_ontoids = (
    SELECT
        key as onto_id,
        ListFlatMap(
            Yson::ConvertToList(
                Yson::YPath(Yson::ParseJson(value), "/isa/otype")
            ), ($y) -> {
                $value = (
                    (Yson::LookupString($y, "value") ?? "")
                    || '/'
                    || (Yson::LookupString($y, "subvalue") ?? "")
                );
                RETURN IF($value == "/", NULL, $value)
            }
        ) as type_subtype,
        ListFlatMap(
            Yson::ConvertToList(
                Yson::YPath(Yson::ParseJson(value), "/isa/tags")
            ), ($y) -> {
                RETURN Yson::LookupString($y, "value")
            }
        ) as tags
    FROM `//home/dict/ontodb/ver/daily/production/all_cards_final`
);


INSERT INTO `@ontoids_table` WITH TRUNCATE
SELECT
    cms.ContentGroupID as ContentGroupID,
    cms.Value as onto_id,
    ontodb.type_subtype as onto_type,
    ontodb.tags as onto_tags
FROM (
    SELECT *
    FROM `@cr_table`
    WHERE ResourceName == 'onto_id' and Value != "0")  as cms
LEFT JOIN $all_ontoids as ontodb
ON ontodb.onto_id == cms.Value
"""


active_content_groups_yql = """
PRAGMA yt.InferSchema = '1';

$output_stream = (
    SELECT
        ContentVersionID,
        SOME(Data) AS Data
    FROM `//home/video-hosting/base/OutputStream`
    WHERE Data like '%//%'
    GROUP BY ContentVersionID
);

$cvg = (
    SELECT
        ContentGroupID, cvg.ContentVersionID as ContentVersionID
    FROM `//home/video-hosting/base/ContentVersionGroup` as cvg
    INNER JOIN $output_stream as os using (ContentVersionID)
);

$cvg_filtered = (
    SELECT ContentGroupID, MAX(ContentVersionID)
    FROM $cvg
    GROUP BY ContentGroupID
);

INSERT INTO `@acg_table` WITH TRUNCATE
SELECT
    ContentGroup.ContentGroupID as ContentGroupID,
    SOME(ContentGroup.TMP_OvsServiceFlags) as TMP_OvsServiceFlags
FROM
    `@cg_table` as ContentGroup
JOIN
    `@cr_table` as ThumbnailResource
ON
    ContentGroup.ContentGroupID = ThumbnailResource.ContentGroupID
JOIN
    $cvg_filtered as ContentVersionGroup
ON
    ContentGroup.ContentGroupID = ContentVersionGroup.ContentGroupID
WHERE
    ContentGroup.Options not like '%deleted%'
    AND ThumbnailResource.ResourceName = 'thumbnail'
    AND ThumbnailResource.Value like '%//%'
GROUP BY
    ContentGroup.ContentGroupID
"""

make_new_base_tables = """

$current_base = "home/video-hosting/base/ContentGroup";
$current_resource = "home/video-hosting/base/ContentResource";

$base_filter_only_absent = (
    select * from `//home/videolog/strm_meta/vh_old_episodes/ContentGroup_2021_03_01` as s
    left only join $current_base as b using (ContentGroupID)
);

$base_concat = (
    select * from $current_base
    union all
    select * from $base_filter_only_absent
);

insert into `home/videoquality/strm_meta/base/ContentGroup` with truncate
select * from $base_concat;

$resource_filter_only_absent = (
    select * from `//home/videolog/strm_meta/vh_old_episodes/ContentResource_2021_03_01` as s
    left only join $current_resource as b using (ContentGroupID)
);

$resource_concat = (
    select * from $current_resource
    union all
    select * from $resource_filter_only_absent
);

insert into `home/videoquality/strm_meta/base/ContentResource` with truncate
select * from $resource_concat;

"""


def update_global_vars(job_root_):
    global job_root
    global current_table
    global full_table
    global full_work_table
    global full_stats_table
    global alt_table
    global concat_table
    global yatv_channels_table
    global ontoids_table
    global acg_table
    job_root = job_root_
    current_table = "{}/current".format(job_root)
    full_table = "{}/full".format(job_root)
    full_work_table = "{}/full_work".format(job_root)
    full_stats_table = "{}/full_stats".format(job_root)
    alt_table = "{}/alt".format(job_root)
    concat_table = "{}/concat".format(job_root)
    yatv_channels_table = "{}/yatv_channels".format(job_root)
    ontoids_table = "{}/onto_ids_data".format(job_root)
    acg_table = "{}/active_cgs".format(job_root)


header_stub = """
pragma yt.Pool = "@pool";
pragma yt.PoolTrees = "physical";
pragma yt.UseDefaultTentativePoolTrees;
"""


def make_step_query(step=0):
    if step == 0:
        stub = read_file("step0.sql")
        return apply_replacements(stub, {"@root": job_root})
    else:
        stub = read_file("step1.sql")
        return apply_replacements(
            stub, {"@root": job_root, "@step": "step{}".format(step)}
        )


def get_rc(cluster, table):
    return get_driver(cluster).client.get_attribute(table, "row_count") or 0


def convert_struct(x):
    result = {k: getattr(x, k) for k in dir(x) if not k.startswith("_")}
    result["content_type_id"] = str(result["ContentTypeID"])
    return result


def remap(lst):
    if not isinstance(lst, list):
        return lst
    return [convert_struct(x) for x in lst]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--nodeleted", action="store_true")
    parser.add_argument("--no_yql", action="store_true")
    parser.add_argument("--pool", default="pecheny")
    parser.add_argument("--title", default="Iron Branch")
    parser.add_argument("--ugc_debug", action="store_true")
    parser.add_argument("--job_root")
    args = parser.parse_args()
    yql_client = YqlClient(
        db=os.environ["YT_PROXY"].split(".")[0].lower(),
        token=os.environ["YQL_TOKEN"],
    )
    if args.job_root:
        update_global_vars(args.job_root)
    start_time = datetime.datetime.now()

    cluster = get_cluster(clusters, args)

    common_prefix = apply_replacements(
        common_prefix_stub, {"@pool": args.pool}
    )

    yr = YqlRunner(
        client=yql_client, title="Iron Branch | YQL", prefix=common_prefix
    )

    yr.run(make_new_base_tables)

    yr.run(
        apply_replacements(
            ontoids_yql,
            {
                "@ontoids_table": ontoids_table,
                "@cr_table": content_resource_table,
            },
        ),
        wait=False,
    )

    yr.run(
        apply_replacements(
            channels_yql,
            {
                "OUTPUT_TABLE": yatv_channels_table,
                "BASE_TABLE": base_table,
                "CONTENT_RESOURCE_TABLE": content_resource_table,
            },
        ),
        wait=False,
    )

    yr.run(
        apply_replacements(
            active_content_groups_yql,
            {
                "@cg_table": base_table,
                "@cr_table": content_resource_table,
                "@acg_table": acg_table,
            },
        )
    )

    print("performing step 0")

    query = make_step_query(step=0)
    yr.run(query)

    step = 1
    rc = 1

    while rc and (True if args.debug else step <= 7):
        print("performing step {}".format(step))

        query = make_step_query(step=step)
        yr.run(query)

        rc = get_rc(cluster, "{}/parents".format(job_root))
        if not rc:
            print("nothing left, soon to be finished")
        step += 1

    if args.ugc_debug:
        now = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
        folder_path = "//home/videolog/strm_meta/iron_branch_test/ugc_debug/{}".format(
            now
        )
        copy_query = ["use hahn;"]
        for table in (
            "ugc_channel",
            "ugc_video_meta",
            "ugc_video_file",
            "ugc_video_prod",
        ):
            copy_query.append(
                "insert into `{}` with truncate select * from `{}`;".format(
                    "{}/{}".format(folder_path, table),
                    "//home/video-hosting/ugc_replica/{}".format(table),
                )
            )
        yr.run("\n\n".join(copy_query), title="Iron Branch UGC Copy | YQL")
        ugc_kwargs = {
            "@[channel_data_table]": "{}/ugc_channel".format(folder_path),
            "@[meta_data_table]": "{}/ugc_video_meta".format(folder_path),
            "@[file_data_table]": "{}/ugc_video_file".format(folder_path),
            "@[prod_data_table]": "{}/ugc_video_prod".format(folder_path),
            "@[ugc_debug_table]": "{}/OUTPUT".format(folder_path),
            "/*ugc_debug": "",
            "ugc_debug*/": "",
        }
    else:
        ugc_kwargs = {
            "@[channel_data_table]": "//home/video-hosting/ugc_replica/ugc_channel",
            "@[meta_data_table]": "//home/video-hosting/ugc_replica/ugc_video_meta",
            "@[file_data_table]": "//home/video-hosting/ugc_replica/ugc_video_file",
            "@[prod_data_table]": "//home/video-hosting/ugc_replica/ugc_video_prod",
        }

    query = apply_replacements(
        read_file("ib_reduce.sql"),
        {
            "@root": job_root,
            "@[current_ts]": datetime.datetime.now().strftime("%s"),
        },
    )
    query = apply_replacements(query, ugc_kwargs)
    yr.run(
        query,
        attachments=[
            {
                "path": "analytics/videolog/strm-stats/strm_cube_2/ugc_stats_regular/get_ugc_meta.sql"
            },
            {
                "path": "analytics/videolog/strm-stats/strm_cube_2/ugc_stats_regular/ugc_stats.py"
            },
        ],
        title="Iron Branch Final Step | YQL",
    )

    end_time = datetime.datetime.now()

    print(
        "total time: {} minutes".format(
            (end_time - start_time).total_seconds() / 60
        )
    )


if __name__ == "__main__":
    main()
