from sandbox import sdk2
from sandbox.projects.cloud.billing.common.task import SmallTask
from sandbox.sandboxsdk import environments
from sandbox.projects.common import binary_task


class GroupRowsToTablesByField(binary_task.LastBinaryTaskRelease, SmallTask):
    """ Task to run group all raws from folder to tables by field """

    class Parameters(SmallTask.Parameters):
        yt_cluster = sdk2.parameters.String(
            'Destination YT cluster',
            default='hahn',
            required=True
        )
        yql_token_name = sdk2.parameters.String(
            'YQL Token secret name',
            required=True
        )
        src_paths = sdk2.parameters.String(
            'YT Path to sources, separated by comma',
            required=True
        )
        are_srcs_dirs = sdk2.parameters.Bool(
            "True if sources are directories",
            required=True,
            default=True
        )
        intermediate_table = sdk2.parameters.String(
            'YT Path to intermediate table',
            required=True
        )
        dst_dir = sdk2.parameters.String(
            'YT Path to destination directory',
            required=True
        )
        field_name = sdk2.parameters.String(
            'Group by this field',
            required=True
        )
        limit = sdk2.parameters.Integer(
            'how much tables (max by field) to create',
            required=False
        )
        ext_params = binary_task.binary_release_parameters(stable=True)

    class Requirements(SmallTask.Requirements):
        environments = (
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet'),
            environments.PipEnvironment('yql')
        )

    def on_execute(self):
        super(GroupRowsToTablesByField, self).on_execute()

        from cloud.billing.utils.scripts.run_query import run_query
        from yql.api.v1.client import YqlClient
        import yt.wrapper as ytw

        yql_token = sdk2.Vault.data(self.owner, self.Parameters.yql_token_name)
        yt_proxy = self.Parameters.yt_cluster

        yql_client = YqlClient(db=yt_proxy, token=yql_token)

        ytw.config['token'] = yql_token
        ytw.config['proxy']['url'] = yt_proxy

        query1 = '$field = "{field_name}";\n' \
                 '$intermediate_table = "{intermediate_table}";\n' \
                 '{paths}\n' \
                 'INSERT INTO $intermediate_table WITH TRUNCATE\n' \
                 'SELECT DISTINCT TableRow().$field as field FROM EACH($paths)\n'

        if self.Parameters.limit:
            query1 += 'ORDER by field DESC LIMIT {limit}\n'.format(limit=self.Parameters.limit)

        query2 = '$field = "{field_name}";\n' \
                 '$intermediate_table = "{intermediate_table}";\n' \
                 '$dst_dir = "{dst_dir}";\n' \
                 '{paths}\n' \
                 '$distinct_values = (SELECT AGGREGATE_LIST_DISTINCT(field) FROM $intermediate_table);\n' \
                 'DEFINE ACTION $insert_action($value) AS\n' \
                 '  $path = $dst_dir || "/" || String::ReplaceAll($value, "/", "#");\n' \
                 '  INSERT INTO $path WITH TRUNCATE SELECT * FROM EACH($paths) WHERE TableRow().$field = $value;\n' \
                 'END DEFINE;\n' \
                 'EVALUATE FOR $value in $distinct_values DO $insert_action($value);\n'

        if self.Parameters.are_srcs_dirs:
            paths = ['SELECT Path FROM FOLDER("{dir}")\n'.format(dir=dir) for dir in
                     self.Parameters.src_paths.split(',')]
            paths = 'UNION ALL\n'.join(paths)
            paths = "$paths = (SELECT AGGREGATE_LIST(Path) FROM (\n {paths}));".format(paths=paths)
        else:
            paths = "$paths = [{}];".format(
                ', '.join(["'" + path + "'" for path in self.Parameters.src_paths.split(',')]))
        query1 = query1.format(
            field_name=self.Parameters.field_name,
            intermediate_table=self.Parameters.intermediate_table,
            paths=paths
        )
        query2 = query2.format(
            field_name=self.Parameters.field_name,
            intermediate_table=self.Parameters.intermediate_table,
            dst_dir=self.Parameters.dst_dir,
            paths=paths
        )
        run_query(
            query=query1,
            yql_client=yql_client,
            yt_wrapper=ytw
        )
        run_query(
            query=query2,
            yql_client=yql_client,
            yt_wrapper=ytw
        )
