# -*- coding: utf-8 -*-
from sandbox import sdk2
import sandbox.common.types.task as ctt
from sandbox.sandboxsdk import process
from sandbox.projects.common import task_env

CH_URL = "bsclickhouse.yabs.yandex.ru"
YQL_TOKEN_OWNER = 'STATKEY'
YQL_TOKEN_NAME = 'YQL_TOKEN'


class BSClickHouseDumperToYT(sdk2.Task):
    class Requirements(task_env.TinyRequirements):
        disk_space = 10000
        ram = 4 * 1024

    class Parameters(sdk2.Parameters):
        _container = sdk2.parameters.Container(
            "Environment container resource",
            default_value=2214828183,  # LXC container with ch-client
            required=True
        )

        dump_date = sdk2.parameters.String('Dump date', required=True)
        ch_tables_path = sdk2.parameters.String('CH YT path', required=True)
        login = sdk2.parameters.String('Login for CH DB', required=True)
        password_vault_name = sdk2.parameters.String('Name of secret with password', required=True)
        password_vault_owner = sdk2.parameters.String('Owner of secret', required=True)
        yt_cluster = sdk2.parameters.String('Yt cluster', default='hahn')

    def GetCHPassword(self):
        secret_name = self.Parameters.password_vault_name
        secret_owner = self.Parameters.password_vault_owner
        token = sdk2.Vault.data(secret_owner, secret_name)
        return token

    def prepare_clickhouse_query(self, max_date, min_date):
        today = "toDate(toDateTime('{} 00:00:00'))".format(self.Parameters.dump_date)
        ch_query = '''
            SELECT
                EventDate,
                PageID,
                DSPID,
                ImpID,
                sum(DirectPartnerPrice) as partner_price_direct,
                sum(BillingPartnerPrice) as partner_price_non_direct,
                sum(PartnerPrice) as tech_partner_price_dsp

            from MdbMasterReportPIStat
            where EventDate > toDate(toDateTime('2021-05-31 00:00:00')) and EventDate < {today} {max_date} and EventDate >= {today} {min_date}
            GROUP BY
                EventDate,
                PageID,
                DSPID,
                ImpID;
        '''

        return ch_query.format(today=today, max_date=max_date, min_date=min_date)

    def run_query_and_put_to_file(self, query, login, password, part):
        proc_download = process.run_process(
            [
                'bash',
                '-c',
                'clickhouse-client -m -h {} -u {} --password {} --query "{}" --format TSKV > ch_{}'.format(CH_URL, login, password, query, part)
            ]
        )
        return proc_download.returncode

    def dump_from_ch_to_file(self):
        login = self.Parameters.login
        password = self.GetCHPassword()
        self.run_query_and_put_to_file(self.prepare_clickhouse_query("", "-30"), login, password, 0)
        self.run_query_and_put_to_file(self.prepare_clickhouse_query("-30", "-60"), login, password, 1)
        self.run_query_and_put_to_file(self.prepare_clickhouse_query("-60", "-90"), login, password, 2)

    def save_to_yt(self):
        self.save_file_to_yt(0)
        self.save_file_to_yt(1)
        self.save_file_to_yt(2)

    def save_file_to_yt(self, part):
        import yt.wrapper as yt

        token = sdk2.Vault.data(YQL_TOKEN_OWNER, YQL_TOKEN_NAME)
        yt.config.config['token'] = token
        yt.config.set_proxy(self.Parameters.yt_cluster)

        table = "{}/ch_{}".format(self.Parameters.ch_tables_path, part)

        if yt.exists(table):
            yt.remove(table)
        yt.write_table(table, open("ch_{}".format(part)), format='dsv', raw=True)

    def add_schema_and_prepare_price(self, date, path):
        from yql.api.v1.client import YqlClient

        get_in_rub = '''
            $get_in_rub = ($price) -> {
                $t = CAST($price as Double);
                $a = 1. * $t / 1000000;
                RETURN  CAST($a as Double);
            };
        '''

        get_partner_price_billing = '''
            $get_partner_price_billing = ($partner_price_direct, $partner_price_non_direct) -> {
                $ppd = CAST($partner_price_direct as Double) * 1. / 1000000;
                $ppnd = CAST($partner_price_non_direct as Double) * 1. / 1000000;
                $result = $ppd + $ppnd;
                RETURN $result;
            };
        '''

        prepare_query = '''
            PRAGMA yt.InferSchema = '1';

            {get_in_rub}

            {get_partner_price_billing}

            INSERT INTO `{path}/{date}_90_days` WITH TRUNCATE
            SELECT
                CAST(DSPID as Int64) as dsp_id,
                EventDate as report_date,
                CAST(ImpID as UInt64) as block_id,
                CAST(PageID as UInt64) as page_id,
                $get_in_rub(partner_price_direct) as partner_price_direct,
                $get_in_rub(partner_price_non_direct) as partner_price_non_direct,
                $get_in_rub(tech_partner_price_dsp) as tech_partner_price_dsp,
                $get_partner_price_billing(partner_price_direct, partner_price_non_direct) as partner_price_billing
            from CONCAT(
                `{path}/ch_0`,
                `{path}/ch_1`,
                `{path}/ch_2`
            )
            order by
                report_date,
                page_id,
                block_id,
                dsp_id;
        '''.format(
            get_partner_price_billing=get_partner_price_billing,
            date=date,
            path=path,
            get_in_rub=get_in_rub,
        )

        token = sdk2.Vault.data(YQL_TOKEN_OWNER, YQL_TOKEN_NAME)
        yql_client = YqlClient(db="hahn", token=token)
        request = yql_client.query(prepare_query, syntax_version=1)
        request.encoding = 'utf-8'
        request.run()

        if not request.get_results().is_success:
            error_description = '\n'.join([str(err) for err in request.get_results().errors])
            raise RuntimeError(error_description)

        return request.get_results().table

    def merge_tmp_and_yesterday_table(self):
        from yql.api.v1.client import YqlClient

        query = '''
            $ttl = cast(Date("{today_date}") + 31 * Interval("P1D") as string);

            PRAGMA SimpleColumns;
            PRAGMA yt.NightlyCompress;
            PRAGMA yt.ExpirationDeadline = $ttl;

            $yesterday = cast(Date("{today_date}") - 1 * Interval("P1D") as string);
            $minus_2_days = cast(Date("{today_date}") - 2 * Interval("P1D") as string);
            $minus_90_days = cast(Date("{today_date}") - 90 * Interval("P1D") as string);
            $path = "{path}";

            $minus_2_days_table = $path || "/" || $minus_2_days;
            $yesterday_table = $path || "/" || $yesterday;

            $old = (select * from $minus_2_days_table where report_date < $minus_90_days);

            $new = (
                select *
                from `{path}/{today_date}_90_days`
            );

            INSERT INTO $yesterday_table with truncate
            select * from (select * from $old union all select * from $new)
            order by report_date, page_id, block_id, dsp_id;
        '''.format(
            today_date=self.Parameters.dump_date,
            path=self.Parameters.ch_tables_path,
        )

        token = sdk2.Vault.data(YQL_TOKEN_OWNER, YQL_TOKEN_NAME)
        yql_client = YqlClient(db="hahn", token=token)
        request = yql_client.query(query, syntax_version=1)
        request.encoding = 'utf-8'
        request.run()

        if not request.get_results().is_success:
            error_description = '\n'.join([str(err) for err in request.get_results().errors])
            raise RuntimeError(error_description)

        return request.get_results().table

    def clear_after_work(self):
        import yt.wrapper as yt

        token = sdk2.Vault.data(YQL_TOKEN_OWNER, YQL_TOKEN_NAME)
        yt.config.config['token'] = token
        yt.config.set_proxy(self.Parameters.yt_cluster)

        for i in range(0, 3, 1):
            table = "{}/ch_{}".format(self.Parameters.ch_tables_path, i)
            if yt.exists(table):
                yt.remove(table)

        table_90 = "{path}/{date}_90_days".format(path=self.Parameters.ch_tables_path, date=self.Parameters.dump_date)
        if yt.exists(table_90):
            yt.remove(table_90)

    def create_link_for_last_table(self):
        import yt.wrapper as yt

        token = sdk2.Vault.data(YQL_TOKEN_OWNER, YQL_TOKEN_NAME)
        yt.config.config['token'] = token
        yt.config.set_proxy(self.Parameters.yt_cluster)

        link_path = self.Parameters.ch_tables_path + "/last"

        if yt.exists(link_path):
            yt.remove(link_path)
        dates = yt.list(self.Parameters.ch_tables_path)
        last_date = max(dates)
        last_date_path = self.Parameters.ch_tables_path + "/" + last_date

        yt.link(last_date_path, link_path)

    def on_create(self):
        self.Requirements.tasks_resource = sdk2.service_resources.SandboxTasksBinary.find(
            attrs={"Name": "BSClickHouseDumperToYT", "release": ctt.ReleaseStatus.STABLE},
        ).first()

    def on_execute(self):
        self.dump_from_ch_to_file()
        self.save_to_yt()
        self.add_schema_and_prepare_price(self.Parameters.dump_date, self.Parameters.ch_tables_path)
        self.merge_tmp_and_yesterday_table()
        self.clear_after_work()
        self.create_link_for_last_table()
