import datetime
import os
import time

import sandbox.common.errors as errors
import sandbox.common.types.task as ctt

from sandbox import sdk2

from sandbox.projects.common import task_env
from sandbox.projects.common.nanny import nanny
from sandbox.projects.geosearch.CleanupYtFolder import clean_folder
from sandbox.projects.ydo import ydo_releasers
from sandbox.projects.ydo.backup.LinkTables import YdoBackupLinkTables
from sandbox.projects.yql.RunYQL2 import RunYQL2

from sandbox.sandboxsdk import environments


class YdoYtJsonDump(sdk2.Resource):
    """
        Dump of YT table as JSON lines
    """
    executable = False
    releasable = True
    releasers = ydo_releasers


class YdoNotActiveWorkersDump(YdoYtJsonDump):
    pass


class YdoPuidsThatDontUseSmsDump(YdoYtJsonDump):
    pass


class YdoServicePricesDump(YdoYtJsonDump):
    pass


class YdoClicksByRubricDump(YdoYtJsonDump):
    pass


class YdoRunYQL2(nanny.ReleaseToNannyTask2, sdk2.Task):
    class Parameters(sdk2.Parameters):
        yql_vault_token = sdk2.parameters.String("Your yql token name in vault", default="YQL_TOKEN", required=True)

        yt_cluster = sdk2.parameters.String('YT cluster', required=True, default='hahn')
        yt_vault_token = sdk2.parameters.String("Your yt token name in vault", default="yt-token")

        with sdk2.parameters.RadioGroup('YQL script file') as yql_script:
            yql_script.values['aggregated_orders.yql'] = yql_script.Value('Calculate aggregated orders info by worker in service model')
            yql_script.values['antifraud_dublicates.yql'] = yql_script.Value('Preprocess antifraud dublicates')
            yql_script.values['average_position.yql'] = yql_script.Value('Calculate average position of worker')
            yql_script.values['callable_workers.yql'] = yql_script.Value('Collect ids of workers with last app version')
            yql_script.values['chats_no_answers.yql'] = yql_script.Value('Find workers that do not answer in chats')
            yql_script.values['clicks_by_rubric.yql'] = yql_script.Value('Calculate clicks factors by rubric')
            yql_script.values['count_ctr.yql'] = yql_script.Value('Calculate ctr of worker')
            yql_script.values['customer_orders.yql'] = yql_script.Value('Prepare customer orders in service model')
            yql_script.values['feedback_stats.yql'] = yql_script.Value('Calculate rating and review stats by rubric and region')
            yql_script.values['get_trial_promotions.yql'] = yql_script.Value('Prepare table with workers on promotion trial')
            yql_script.values['get_youtube_worker_videos.yql'] = yql_script.Value('Mining youtube videos of our workers')
            yql_script.values['not_active_workers.yql'] = yql_script.Value('Collect ids of workers with low activity on service')
            yql_script.values['promo_squeeze.yql'] = yql_script.Value('Parse yabs promotion logs')
            yql_script.values['regular_coords.yql'] = yql_script.Value('Prepare workers\' regular coordinates')
            yql_script.values['service_prices.yql'] = yql_script.Value('Calculate average prices by rubric and region')
            yql_script.values['sprav_to_workers.yql'] = yql_script.Value(value='Prepare sprav workers', default=True)
            yql_script.values['sprav_to_workers_direct.yql'] = yql_script.Value(value='Prepare sprav workers for direct replacement')
            yql_script.values['toponyms_count.yql'] = yql_script.Value('Calculate number of toponyms, cities, regions on worker cards')
            yql_script.values['workers_to_sprav_companies.yql'] = yql_script.Value('Get all companies needed for workers with org_permalink or chain_permalink')
            yql_script.values['puids_dont_use_sms.yql'] = yql_script.Value('Puids that don\'t use sms')
            yql_script.values['merge_yabs_banners.yql'] = yql_script.Value('Merge yabs banners')
            yql_script.values['yabs_prepare_cpc.yql'] = yql_script.Value('Prepare CPC (cost per click) yabs data')
            yql_script.values['worker_behavior.yql'] = yql_script.Value('Add worker\'s behaviour factors')
            yql_script.values['worker_user_interaction.yql'] = yql_script.Value('Add worker - interaction factors')
            yql_script.values['workers_activations.yql'] = yql_script.Value('Update worker activations timestamps after indexing')
            yql_script.values['reaction_statistic_by_card.yql'] = yql_script.Value('Calculate notifications reactions factors')
            yql_script.values['reaction_statistic_by_spec_reg.yql'] = yql_script.Value('Calculate probability of 1 or 2 reactions in 1 hour in certain region and spec')
            yql_script.values['prepare_reactions_annotations.yql'] = yql_script.Value('Prepare birzha reactions annotations')
            yql_script.values['count_orders_for_prediction.yql'] = yql_script.Value('Count orders for each specialization for prediction')
            yql_script.values['calc_timings_from_http_adapter.yql'] = yql_script.Value('Calculate timings from http adapter access logs')

        use_aux_file = sdk2.parameters.Bool(
            "Use extra file (as imported YQL library)",
            default=False
        )
        with use_aux_file.value[True]:
            with sdk2.parameters.RadioGroup('YQL script extra file') as yql_library:
                yql_library.values['_geoid.sql'] = yql_library.Value(value='_geoid.sql', default=True)
                yql_library.values['_sprav_to_workers_parts.sql'] = yql_library.Value(value='_sprav_to_workers_parts.sql')

        resource_settings = {
            'not_active_workers.yql': (YdoNotActiveWorkersDump, 'Ydo workers with low activity', 'not_active_workers.json'),
            'puids_dont_use_sms.yql': (YdoPuidsThatDontUseSmsDump, 'Puids that don\'t use sms', 'puids_dont_use_sms.json'),
            'service_prices.yql': (YdoServicePricesDump, 'Average prices data for wizard', 'service_prices.json'),
            'clicks_by_rubric.yql': (YdoClicksByRubricDump, 'Clicks factors by rubric', 'clicks_by_rubric.json'),
        }

        custom_placeholders = sdk2.parameters.Dict("Custom placeholders (keys like %KEY%)")

        res_dir = sdk2.parameters.String('Resulting table directory', required=True)

        with sdk2.parameters.RadioGroup('Resulting table naming') as res_table_naming:
            res_table_naming.values['timestamp'] = res_table_naming.Value('timestamp (like 1626106000)', default=True)
            res_table_naming.values['date'] = res_table_naming.Value('date (like 2021-05-01)')
            res_table_naming.values['custom'] = res_table_naming.Value('custom')

        with res_table_naming.value['custom']:
            res_table_name = sdk2.parameters.String('Resulting table name (without full path)', required=True)

        need_create_table_link = sdk2.parameters.Bool('Create link for table', default=True)
        with need_create_table_link.value[True]:
            res_table_link = sdk2.parameters.String('Resulting table link name (default: "current")')

        merge_result = sdk2.parameters.Bool('Merge resulting table to reduce number of chunks', default=False)
        need_clean_folder = sdk2.parameters.Bool('Delete older tables from output directory', default=True)
        with need_clean_folder.value[True]:
            yt_folder_size = sdk2.parameters.Integer('How many previous tables to leave', default=5)

        min_rows_in_result = sdk2.parameters.Integer('Check that result has at least this many rows', default=0)

        with sdk2.parameters.RadioGroup('Release resulting table') as release_mode:
            release_mode.values['none'] = yql_script.Value('Do not release', default=True)
            release_mode.values['testing'] = yql_script.Value('Release to testing')
            release_mode.values['stable'] = yql_script.Value('Release to stable')

        start_date = sdk2.parameters.String('Insert value in %START_DATE% placeholder. Can be other placeholder', default="%YESTERDAY%", required=True)
        end_date = sdk2.parameters.String('Insert value in %END_DATE% placeholder. Can be other placeholder', default="%YESTERDAY%", required=True)

    class Requirements(task_env.TinyRequirements):
        environments = [
            environments.PipEnvironment('yandex-yt'),
        ]

    def get_output_table_name(self):
        mode = self.Parameters.res_table_naming
        if mode == "custom":
            return self.Parameters.res_table_name
        elif mode == "date":
            return time.strftime('%Y-%m-%d')
        else:
            return str(int(time.time()))

    def run_script(self):
        def read_file(local_path):
            with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), local_path)) as local_file:
                return local_file.read()

        query = read_file(self.Parameters.yql_script)

        self.Context.out_table = os.path.join(self.Parameters.res_dir, self.get_output_table_name())

        # put params to placeholders
        placeholders = dict()
        placeholders['%CLUSTER%'] = self.Parameters.yt_cluster
        placeholders['%RES_PATH%'] = self.Context.out_table
        placeholders['%RES_DIR%'] = self.Parameters.res_dir
        today = datetime.date.today()
        placeholders['%TODAY%'] = today.isoformat()
        placeholders['%YESTERDAY%'] = (today - datetime.timedelta(days=1)).isoformat()
        placeholders['%START_DATE%'] = placeholders.get(self.Parameters.start_date, self.Parameters.start_date)
        placeholders['%END_DATE%'] = placeholders.get(self.Parameters.end_date, self.Parameters.end_date)
        placeholders.update(self.Parameters.custom_placeholders)

        aux_file = None
        if self.Parameters.use_aux_file:
            aux_file = {self.Parameters.yql_library: "raw:" + read_file(self.Parameters.yql_library)}

        task = RunYQL2(
            self,
            description='Run ' + self.Parameters.yql_script + ' script',
            notifications=self.Parameters.notifications,
            create_sub_task=False,
            query=query,
            custom_placeholders=placeholders,
            trace_query=True,
            yql_token_vault_name=self.Parameters.yql_vault_token,
            use_v1_syntax=True,
            publish_query=True,
            add_files=aux_file,
        )
        task.enqueue()

        self.Context.run_script_task_id = task.id

        raise sdk2.WaitTask([task.id], ctt.Status.Group.SUCCEED + ctt.Status.Group.SCHEDULER_FAILURE, wait_all=True)

    def check_result(self, yt):
        if self.Parameters.min_rows_in_result > 0:
            expected_count = self.Parameters.min_rows_in_result
            actual_count = yt.row_count(self.Context.out_table)
            if actual_count < expected_count:
                raise errors.TaskError("Too few resulting rows ({} < {})".format(actual_count, expected_count))

    def link(self):
        link_target = os.path.join(self.Parameters.res_dir, 'current')
        # overrides default if explicitly provided
        if self.Parameters.res_table_link:
            link_target = self.Parameters.res_table_link
        tables_to_link = {self.Context.out_table: link_target}
        link_task = YdoBackupLinkTables(
            self,
            description='Link tables for task {}'.format(self.id),
            notifications=self.Parameters.notifications,
            create_sub_task=False,
            yt_host=self.Parameters.yt_cluster,
            yt_vault_token=self.Parameters.yt_vault_token,
            yt_tables=tables_to_link
        )
        link_task.enqueue()

        raise sdk2.WaitTask([link_task.id], ctt.Status.Group.SUCCEED + ctt.Status.Group.SCHEDULER_FAILURE, wait_all=True)

    def prepare_resource(self, yt):
        resource_type, resource_descr, resource_fname = self.Parameters.resource_settings.get(
            self.Parameters.yql_script,
            (YdoYtJsonDump, 'Ydo YT dump', 'ydo_data.json')
        )
        res = sdk2.ResourceData(resource_type(self, resource_descr, resource_fname))
        with open(str(res.path), "w") as fout:
            for row in yt.read_table(yt.TablePath(self.Context.out_table), format=yt.JsonFormat(), raw=True):
                fout.write(row)
        res.ready()

    def on_execute(self):
        with self.memoize_stage.run_script:
            self.run_script()

        if sdk2.Task[self.Context.run_script_task_id].status != ctt.Status.SUCCESS:
            raise errors.TaskFailure('Child task {} failed'.format(self.Context.run_script_task_id))

        import yt.wrapper as yt
        yt.config['token'] = sdk2.Vault.data(self.owner, self.Parameters.yt_vault_token)
        yt.config['proxy']['url'] = '{}.yt.yandex.net'.format(self.Parameters.yt_cluster)

        self.check_result(yt)

        if self.Parameters.merge_result:
            yt.run_merge(self.Context.out_table, self.Context.out_table, spec={"combine_chunks": True})

        if self.Parameters.need_clean_folder:
            with self.memoize_stage.clean_folder:
                clean_folder(
                    self,
                    self.Parameters.res_dir,
                    yt_host=self.Parameters.yt_cluster,
                    yt_vault_token=self.Parameters.yt_vault_token,
                    history_size=self.Parameters.yt_folder_size,
                )

        if self.Parameters.need_create_table_link:
            with self.memoize_stage.link_to_current:
                self.link()

        with self.memoize_stage.release:
            mode = self.Parameters.release_mode
            if mode in ("stable", "testing"):
                status = ctt.ReleaseStatus.STABLE if mode == "stable" else ctt.ReleaseStatus.TESTING
                self.prepare_resource(yt)
                nanny.ReleaseToNannyTask2.on_release(self, dict(
                    releaser=self.author,
                    release_status=status,
                ))
