from datetime import datetime, timedelta

from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment


YQL_TEMPLATE = """
PRAGMA yt.InferSchema;
PRAGMA yt.Pool = "collections_production";

DECLARE $cards as String;
DECLARE $output as String;

$md5_table = (
    SELECT md5,
           True as has_duplicate
    FROM (
        SELECT md5,
               CAST(COUNT(*) as Int32) as cnt
        FROM (
            SELECT *
            FROM (
                SELECT Yson::ConvertToString(Yson::YPath(document, '/content/0/content/avatars_meta/md5')) as md5,
                       board
                FROM $cards
                WHERE owner IS NOT NULL and board IS NOT NULL and NOT Yson::ConvertToBool(Yson::YPath(document, '/is_private'))
            )
            GROUP BY md5, board
        )
        GROUP BY md5
    )
    WHERE cnt > 3
);

$current_has_duplicate = (
    SELECT id,
           Yson::ConvertToString(Yson::YPath(document, '/content/0/content/avatars_meta/md5')) as md5,
           Yson::ConvertToBool(Yson::YPath(document, '/has_duplicate')) ?? False as has_duplicate
    FROM $cards
);

$actual_has_duplicate = (
    SELECT card.md5 as md5, card.id as id, md5_table.has_duplicate ?? False as has_duplicate
    FROM (
        SELECT Yson::ConvertToString(Yson::YPath(document, '/content/0/content/avatars_meta/md5')) as md5, id
        FROM $cards
    ) as card
    LEFT JOIN $md5_table as md5_table
    ON card.md5 == md5_table.md5
);

INSERT INTO $output
WITH TRUNCATE
SELECT actual.id as id, actual.has_duplicate as has_duplicate
FROM $actual_has_duplicate as actual
JOIN $current_has_duplicate as current
ON actual.id == current.id
WHERE actual.has_duplicate != current.has_duplicate
"""


class CollectionsUpdateHasDuplicate(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        environments = (
            PipEnvironment('yql'),
            PipEnvironment('yandex-yt'),
            PipEnvironment('yandex-yt-yson-bindings-skynet'),
        )

    class Parameters(sdk2.Task.Parameters):
        dump_dir = sdk2.parameters.String('YT directory with PDB Dump')
        result_dir = sdk2.parameters.String('YT directory for result table')

        yql_token_secret = sdk2.parameters.String('YQL token secret')
        yt_token_secret = sdk2.parameters.String('YT token secret')
        yt_proxy = sdk2.parameters.String('YT proxy (cluster)')
        admin_url = sdk2.parameters.String('Admin tasks endpoint')

        expiration_time = sdk2.parameters.Integer('Expiration time of output table in days', default=7)

    def on_execute(self):
        from yt.wrapper import YtClient, ypath_join
        yt_token = sdk2.Vault.data(self.owner, self.Parameters.yt_token_secret)
        yt_client = YtClient(proxy=self.Parameters.yt_proxy, token=yt_token)
        now = datetime.utcnow()
        output_table = ypath_join(self.Parameters.result_dir, now.isoformat())
        yt_client.create_table(
            output_table
        )

        from yql.api.v1.client import YqlClient
        from yql.client.parameter_value_builder import YqlParameterValueBuilder as ValueBuilder
        yql_client = YqlClient(
            db=self.Parameters.yt_proxy,
            token=sdk2.Vault.data(self.owner, self.Parameters.yql_token_secret)
        )

        parameters = {
            '$cards': ValueBuilder.make_string(ypath_join(self.Parameters.dump_dir, 'card')),
            '$output': ValueBuilder.make_string(output_table),
        }
        request = yql_client.query(YQL_TEMPLATE, syntax_version=1).run(parameters=ValueBuilder.build_json_map(parameters))
        errors = request.get_results(wait=True).errors
        if errors:
            raise RuntimeError([error.message for error in errors])
