# -*- coding: utf-8 -*-
from datetime import datetime, date, timedelta

from sandbox import sdk2
import logging
import os
import inspect

from sandbox.sandboxsdk import environments
from sandbox import common
import sandbox.common.types.task as ctt

from sandbox.common.types.task import Status

from sandbox.projects.adfox.yt2ch_transfuser import AdfoxYt2ChTransfuser
from sandbox.projects.yql.RunYQL2 import RunYQL2


class AdfoxReportAudienceInventory(sdk2.Task):
    """ Подготовка данных для отчета по инвентарю Я.Аудиторий."""
    # https://wiki.yandex-team.ru/adfox/develop/projects/adfox-audience/inventoryreport

    reportExpireDays = 60
    tmpExpireDays = 6
    __ytClient = None

    class Context(sdk2.Task.Context):
        waiting_subtasks = None       # Идентификаторы подзадач

        strDateReport = None        # Date, for which to build report
        reportPath = None           # Path, where to store report

    class Requirements(sdk2.Task.Requirements):
        cores = 1
        environments = (
            environments.PipEnvironment('yandex-yt', '0.8.17-0'),
        )

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        kill_timeout = 30000  # 5h
        max_restarts = 2

        bl_explicit_date = sdk2.parameters.Bool("Set explicit date to prepare report", default=False, required=True)
        with bl_explicit_date.value[True]:
            dt = datetime.today() - timedelta(days=1)
            date_report = sdk2.parameters.String('Date to build report', default=dt.strftime("%Y-%m-%d"), required=False)

        yt_work_directory = sdk2.parameters.String('YT report root', default='//home/adfox-reports/audience_report', required=True)

        with sdk2.parameters.Group("YT Parameters") as yt_block:
            yl_token_vault_name = sdk2.parameters.String('Vault name to extract YT token', default='ADFOX_ROBOT_YT_TOKEN', required=True)
            yl_token_vault_owner = sdk2.parameters.String('Vault owner to extract YT token', default='ADFOX', required=True)
            yt_database = sdk2.parameters.String('YT database name', default="HAHN", required=True)

            yql_token_vault_name = sdk2.parameters.String('Vault name to extract YQL token', default='ADFOX_ROBOT_YQL_TOKEN', required=True)
            yql_token_vault_owner = sdk2.parameters.String('Vault owner to extract YQL token', default='ADFOX', required=True)
            yql_server = sdk2.parameters.String('YQL server name', default="yql.yandex.net", required=True)

        with sdk2.parameters.Group("ClickHouse Parameters") as ch_block:
            ch_host_name = sdk2.parameters.String('host name', default='ch.adfox.net', required=True)
            ch_user_name = sdk2.parameters.String('user name')
            ch_password_vault_name = sdk2.parameters.String('Vault name to extract ClickHouse password', default='ADFOX_CH_REPORT_PASSWORD', required=True)
            ch_password_vault_owner = sdk2.parameters.String('Vault owner to extract ClickHouse password', default='ADFOX', required=True)
            ch_table_name = sdk2.parameters.String('table', required=True)

        with sdk2.parameters.Group("Debug option") as dbg_block:
            cleanup_on_start = sdk2.parameters.Bool("Remove previous data", default=False)
            is_test_launch = sdk2.parameters.Bool("Test launch", default=False)
            transfuse_to_clickhouse = sdk2.parameters.Bool("Transfer data to clichouse", default=True, required=True)

    def __GetYtToken(self):
        return sdk2.Vault.data(self.Parameters.yl_token_vault_owner, self.Parameters.yl_token_vault_name)

    def __GetYqlToken(self):
        return sdk2.Vault.data(self.Parameters.yql_token_vault_owner, self.Parameters.yql_token_vault_name)

    def on_execute(self):
        logging.info('Running inventory report builder')

        os.environ['YQL_TOKEN'] = self.__GetYqlToken()
        os.environ['YT_TOKEN'] = self.__GetYtToken()

        if self.Parameters.is_test_launch:
            logging.info('*** This is test launch ***')

        dateReport = datetime.strptime(self.Parameters.date_report, '%Y-%m-%d').date() if self.Parameters.bl_explicit_date else datetime.today() - timedelta(days=1)
        self.Context.strDateReport = dateReport.strftime('%Y-%m-%d')
        self.Context.reportPath = self.__GetDailyReportPath(self.Context.strDateReport)

        if self.Context.waiting_subtasks:
            logging.info('There were launched subtasks - {}. Checking them'.format(str(self.Context.waiting_subtasks)))
            tasks = list(sdk2.Task.find(id=self.Context.waiting_subtasks, children=True).limit(len(self.Context.waiting_subtasks)))
            assert all(task.status == ctt.Status.SUCCESS for task in tasks), "One of subtasks has failed"
            logging.info('All subtasks finished successfully')
        self.Context.waiting_subtasks = []

        with self.memoize_stage["report_paths"]:
            self.__PrepareReportPaths()
        with self.memoize_stage["events"]:
            self.__GetDailyEvents()
        with self.memoize_stage["profiles"]:
            self.__GetDailyProfiles()
        with self.memoize_stage["reports"]:
            self.__GetReports()
        with self.memoize_stage["transfuse"]:
            if self.Parameters.transfuse_to_clickhouse:
                self.__UploadReportData()

        logging.info('Exiting task')

    def __GetYtClient(self):
        from yt.wrapper import YtClient
        if self.__ytClient is None:
            self.__ytClient = YtClient(proxy=self.Parameters.yt_database, token=self.__GetYtToken(), )
        return self.__ytClient

    def __DateReportToStr(self, dateReport):
        return dateReport.strftime("%Y-%m-%d")

    def __GetDailyReportPath(self, strDateReport):
        return self.Parameters.yt_work_directory + "/" + strDateReport

    def __GetExpirationDate(self, daysToExpire):
        return date.today() + timedelta(days=daysToExpire)

    def __ReadTextFile(self, strFileName):
        fullPath = os.path.join(os.path.dirname(__file__), strFileName)
        with open(fullPath, "r") as input_file:
            data = input_file.readlines()
        return "".join(data)

    def __RunYqlSubtask(self, query, description):
        logging.info('Launching another YQL subtask: {}'.format(query))

        subtask = RunYQL2(
            self,
            description="{} (subtask for {})".format(description, self.id),
            owner=self.owner,  # required fields cannot be empty
        )

        subtask.fail_on_any_error = True

        subtask.Parameters.query = query
        subtask.Parameters.yql_token_vault_name = "YQL_TOKEN"
        subtask.Parameters.trace_query = True
        subtask.Parameters.retry_period = 60 * 5
        subtask.Parameters.publish_query = True

        logging.info("Saving and queueing subtask")
        subtask.save()
        subtask.enqueue()
        return subtask

    def __PrepareReportPaths(self):
        yt = self.__GetYtClient()
        logging.info("Prepearing path for daily report: " + self.Context.reportPath)
        if self.Parameters.cleanup_on_start:
            logging.info("Eliminating destination path")
            yt.remove(self.Context.reportPath, recursive=True, force=True)
        if not yt.exists(self.Context.reportPath):
            yt.create("map_node", path=self.Context.reportPath, recursive=True, ignore_existing=True, attributes=None)

        dtReportExpire = self.__GetExpirationDate(self.reportExpireDays)
        logging.info("Setting up report expiration date:" + str(dtReportExpire))
        yt.set(self.Context.reportPath + "/@expiration_time", str(dtReportExpire))

        logging.info("Setting up custom attributes...")
        yt.set(self.Context.reportPath + "/@adfox_report_date", self.Context.strDateReport)
        yt.set(self.Context.reportPath + "/@adfox_processor", "Sandbox executor")

        logging.info("Finishing stage '{}'".format(inspect.stack()[0][3]))

    def __GetEventsTablePath(self):
        return self.Context.reportPath + '/' + 'events'

    def __GetDailyEvents(self):
        yt = self.__GetYtClient()
        subs = []

        if not yt.exists(self.__GetEventsTablePath()):
            qryGetEvents = self.__ReadTextFile('Aud.GetEventsRefined.sql').format(
                TARGET_TABLE=self.__GetEventsTablePath(),
                REPORT_DATE=self.Context.strDateReport,
                QUERY_POSTFIX='LIMIT 1000' if self.Parameters.is_test_launch else '',
            )
            subs.append(self.__RunYqlSubtask(query=qryGetEvents, description="Extracting daily amacs events"))

        self.__WaitForSubTasks(subs)

    def __GetProfilesTablePath(self):
        return self.Context.reportPath + '/' + 'profiles'

    def __GetDailyProfiles(self):
        yt = self.__GetYtClient()
        subs = []

        if not yt.exists(self.__GetProfilesTablePath()):
            qryGetProfiles = self.__ReadTextFile('Aud.GetDailyProfilesRefined.sql').format(
                TARGET_TABLE=self.__GetProfilesTablePath(),
                REPORT_DATE=self.Context.strDateReport,
                EVENTS_TABLE=self.__GetEventsTablePath(),
                QUERY_POSTFIX='LIMIT 1000' if self.Parameters.is_test_launch else '',
            )
            subs.append(self.__RunYqlSubtask(query=qryGetProfiles, description="Extracting daily profiles"))

        self.__WaitForSubTasks(subs)

    def __WaitForSubTasks(self, subs_array):
        if len(subs_array) > 0:
            self.Context.waiting_subtasks = [task.id for task in subs_array]
            logging.info("Waiting for subtasks [{}]".format(",".join([str(task_id) for task_id in self.Context.waiting_subtasks])))

            waited_statuses = set(common.utils.chain(Status.Group.FINISH, Status.Group.BREAK))
            raise sdk2.WaitTask(self.Context.waiting_subtasks, waited_statuses, wait_all=True)
        else:
            logging.info("Subtasks array is empty. Nothing to wait for.")

    def __GetAudienceReportPath(self):
        return self.Context.reportPath + '/' + 'auditories_report'

    def __GetDmpReportPath(self):
        return self.Context.reportPath + '/' + 'dmp_report'

    def __GetReports(self):
        yt = self.__GetYtClient()
        subs = []

        if not yt.exists(self.__GetDmpReportPath()):
            qryDmpReport = self.__ReadTextFile('Aud.BuildSegmentsDMP.sql').format(
                TARGET_TABLE=self.__GetDmpReportPath(),
                REPORT_DATE=self.Context.strDateReport,
                PROFILE_SOURCE_TABLE=self.__GetProfilesTablePath(),
                EVENTS_SOURCE_TABLE=self.__GetEventsTablePath()
            )
            subs.append(self.__RunYqlSubtask(query=qryDmpReport, description="Building DMP segments report"))

        if not yt.exists(self.__GetAudienceReportPath()):
            qryYaAudReport = self.__ReadTextFile('Aud.BuildSegmentsYaAud.sql').format(
                TARGET_TABLE=self.__GetAudienceReportPath(),
                REPORT_DATE=self.Context.strDateReport,
                PROFILE_SOURCE_TABLE=self.__GetProfilesTablePath(),
                EVENTS_SOURCE_TABLE=self.__GetEventsTablePath()
            )
            subs.append(self.__RunYqlSubtask(query=qryYaAudReport, description="Building Ya.Audience segments report"))

        self.__WaitForSubTasks(subs)

    def __RunTranfuser(self, strSrcTable, strTransfuseFormat, strCheckQuery):
        logging.info("Starting another transfuse task [{}]->[{}]".format(strSrcTable, self.Parameters.ch_table_name))

        logging.info("Creating subtask")
        subtask = AdfoxYt2ChTransfuser(
            self,
            description="Robotizied data transfuser (subtask for {})".format(self.id),
            owner=self.owner,  # required fields cannot be empty
        )

        subtask.fail_on_any_error = True

        subtask.Parameters.yl_token_vault_name = self.Parameters.yql_token_vault_name
        subtask.Parameters.yl_token_vault_owner = self.Parameters.yql_token_vault_owner
        subtask.Parameters.yt_database = self.Parameters.yt_database
        subtask.Parameters.yt_table_name = strSrcTable

        subtask.Parameters.ch_host_name = self.Parameters.ch_host_name
        subtask.Parameters.ch_user_name = self.Parameters.ch_user_name
        subtask.Parameters.ch_password_vault_name = self.Parameters.ch_password_vault_name
        subtask.Parameters.ch_password_vault_owner = self.Parameters.ch_password_vault_owner
        subtask.Parameters.ch_table_name = self.Parameters.ch_table_name

        subtask.Parameters.tr_format = strTransfuseFormat
        subtask.Parameters.tr_check_query = strCheckQuery
        subtask.Parameters.tr_block_size = 1000000
        logging.info("Saving and queueing subtask")
        subtask.save()
        subtask.enqueue()
        return subtask

    def __UploadReportData(self):
        subs = []

        # Transfusing Ya.Audience report
        subs.append(self.__RunTranfuser(
            strSrcTable=self.__GetAudienceReportPath(),
            strTransfuseFormat="owner_id:Uint64;site_id:Uint64;section_id:Uint64;place_id:Uint64;dmp_id=0;segment_id:Uint64;date:String;requests_count:Uint64",
            strCheckQuery="select count(*) as count from {} where date='{}' and dmp_id=0".format(self.Parameters.ch_table_name, self.Context.strDateReport)
        ))

        # Transfucsing DMP report
        subs.append(self.__RunTranfuser(
            strSrcTable=self.__GetDmpReportPath(),
            strTransfuseFormat="owner_id:Uint64;site_id:Uint64;section_id:Uint64;place_id:Uint64;dmp_id:Uint64;segment_id:Uint64;date:String;requests_count:Uint64",
            strCheckQuery="select count(*) as count from {} where date='{}' and dmp_id<>0".format(self.Parameters.ch_table_name, self.Context.strDateReport)
        ))

        self.Context.waiting_subtasks = [task.id for task in subs]
        logging.info("Waiting for subtasks [{}]".format(",".join([str(task_id) for task_id in self.Context.waiting_subtasks])))

        waited_statuses = set(common.utils.chain(Status.Group.FINISH, Status.Group.BREAK))
        raise sdk2.WaitTask(self.Context.waiting_subtasks, waited_statuses, wait_all=True)

        logging.info("Finishing stage '{}'".format(inspect.stack()[0][3]))
