# -*- coding: utf-8 -*-

import datetime
import logging
import requests
import time
import urllib
from collections import Sequence

from sandbox import sdk2, common
import sandbox.common.types.resource as ctr

RETRIES_COUNT = 5
RETRIES_INTERVAL = 5


class RunYQL2(sdk2.Task):
    """
        Executes YQL query (SDK2 task)
    """
    YQL_API_BASE_URL = "https://yql.yandex.net/api/v2"
    YQL_API_PROXIED_BASE_URL = "https://yql.yandex-team.ru/api/v2"
    YQL_WEBUI_BASE_URL = "https://yql.yandex-team.ru"
    SUCCESS_YQL_STATUSES = [
        "COMPLETED",
    ]
    FAIL_YQL_STATUSES = [
        "ABORTED",
        "ERROR"
    ]
    FINAL_YQL_STATUSES = SUCCESS_YQL_STATUSES + FAIL_YQL_STATUSES

    class Requirements(sdk2.Task.Requirements):
        # Use multislot hosts
        # https://wiki.yandex-team.ru/sandbox/cookbook/#cores1multislot
        cores = 1
        ram = 8192

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Parameters):
        kill_timeout = 25 * 60
        do_not_restart = True

        query = sdk2.parameters.String(
            "YQL Query",
            multiline=True,
            required=False
        )
        custom_placeholders = sdk2.parameters.Dict(
            "Custom placeholders (keys like %KEY%)"
        )
        yql_token_vault_owner = sdk2.parameters.String(
            "Token owner (fill in if token owner is not the task owner)",
            default="",
            required=False
        )
        yql_token_vault_name = sdk2.parameters.String(
            "Token secret name",
            default="YQL_TOKEN",
            required=True
        )

        trace_query = sdk2.parameters.Bool(
            "Trace query",
            default=False
        )

        use_v1_syntax = sdk2.parameters.Bool(
            "Use SQLv1 syntax",
            default=True
        )

        use_clickhouse_syntax = sdk2.parameters.Bool(
            "Use ClickHouse syntax; Overrides use_v1_syntax parameter",
            default=False
        )

        with trace_query.value[True]:
            retry_period = sdk2.parameters.Integer(
                "Time period to check request status (in seconds)",
                default=60 * 5
            )

        single_cluster_mode = sdk2.parameters.Bool(
            "Use single cluster mode (YDB only)",
            default=False
        )

        with single_cluster_mode.value[True]:
            cluster_name = sdk2.parameters.String(
                "Cluster name",
                required=True
            )

        with sdk2.parameters.Group("Result parameters"):
            publish_query = sdk2.parameters.Bool(
                "Publish executed query and show link in task results",
                default=False
            )
            with trace_query.value[True]:
                publish_download_link = sdk2.parameters.Bool(
                    "Publish download link",
                    default=False,
                )
                with publish_download_link.value[True]:
                    with sdk2.parameters.String("Download format file", multiline=True) as download_format:
                        download_format.values["CSV"] = "CSV"
                        download_format.values["JSON"] = "JSON"
                        download_format.values["TSV"] = "TSV"
                        download_format.values["TSKV"] = "TSKV"
                        download_format.values["YSON_TEXT"] = "YSON_TEXT"
                        download_format.values["YSON_BINARY"] = "YSON_BINARY"
                        download_format.values["XLSX"] = "XLSX"
                public_download_link = sdk2.parameters.Bool(
                    "Make download link public",
                    default=False,
                )

        with sdk2.parameters.Group("Additional options") as options_block:
            obtain_query_from_resource = sdk2.parameters.Bool(
                "Obtain query from specified resource",
                default=False
            )
            with obtain_query_from_resource.value[True]:
                auto_search = sdk2.parameters.Bool(
                    "Search for resource automatically",
                    default=False
                )
                with auto_search.value[False]:
                    query_resource = sdk2.parameters.Resource(
                        "Resource with query"
                    )
                with auto_search.value[True]:
                    query_resource_type = sdk2.parameters.String(
                        "Resource type",
                        default="YQL_QUERY_RESOURCE"
                    )

        with sdk2.parameters.Group("Additional files") as files_block:
            add_files = sdk2.parameters.Dict("Attach raw content or url: url:<link>/raw:<content>")

        with sdk2.parameters.Output:
            result_operation_id = sdk2.parameters.String(
                "Operation id",
            )

            with publish_download_link.value[True]:
                results_sample_download_url = sdk2.parameters.Url("Sample download URL")
                full_results_download_url = sdk2.parameters.Url("Full results download URL")

    def on_execute(self):
        with self.memoize_stage.send_request:
            query = self._form_query()
            api_type = "SQLv1" if self.Parameters.use_v1_syntax else "SQL"
            api_type = "CLICKHOUSE" if self.Parameters.use_clickhouse_syntax else api_type
            attributes = {
                'user_agent': "YQL Sandbox ({name} task)".format(name=self.__class__.__name__)
            }
            request = {
                "content": query,
                "action": "RUN",
                "type": api_type,
                'attributes': attributes,
            }
            for file_name, value in self.Parameters.add_files.items():
                type, data = self._get_aux_file_content(value)
                request.setdefault("files", []).append({
                    "name": file_name,
                    "type": type,
                    "data": data,
                })
            if self.Parameters.single_cluster_mode:
                request['cluster'] = self.Parameters.cluster_name
                request['clusterType'] = 'KIKIMR_MVP'  # TODO: fix API YQL-12098

            response = self._access_api(
                method="POST",
                url="{yql_api_base_url}/operations".format(yql_api_base_url=self.YQL_API_BASE_URL),
                json=request
            )

            operation_id = response["id"]
            self._add_link("Operation", operation_id)
            self.Parameters.result_operation_id = operation_id

            if self.Parameters.publish_query:
                self._publish_query(operation_id)

        if self.Parameters.trace_query:
            response = self._access_api(
                method="GET",
                url="{yql_api_base_url}/operations/{operation_id}/meta".format(
                    yql_api_base_url=self.YQL_API_BASE_URL,
                    operation_id=self.Parameters.result_operation_id
                ),
            )
            operation_status = response["status"]

            if operation_status not in self.FINAL_YQL_STATUSES:
                self._sleep()
            elif operation_status in self.FAIL_YQL_STATUSES:
                raise common.errors.TaskFailure("YQL query evaluation failed")

            if self.Parameters.publish_download_link:
                self._publish_download_links(self.Parameters.result_operation_id)

    def on_save(self):
        if self.Parameters.auto_search:
            self.Parameters.query_resource = sdk2.Resource[self.Parameters.query_resource_type].find(
                state=ctr.State.READY,
                attrs={'released': 'stable'}
            ).first()

    def _get_session(self):
        yql_token_vault_owner = self.Parameters.yql_token_vault_owner
        if not yql_token_vault_owner:
            yql_token_vault_owner = self.owner
        yql_token_vault_name = self.Parameters.yql_token_vault_name
        if not yql_token_vault_name:
            yql_token_vault_name = "YQL_TOKEN"
        token = sdk2.Vault.data(yql_token_vault_owner, yql_token_vault_name)
        session = requests.Session()
        session.headers.update({
            "User-Agent": "YQL Sandbox ({name} task)".format(name=self.__class__.__name__),
            "Authorization": "OAuth {token}".format(token=token),
            "Content-Type": "application/json",
        })
        return session

    def _access_api(self, method, url, raise_on_error=True, retries_count=RETRIES_COUNT,
            retries_interval=RETRIES_INTERVAL, **kwargs):
        session = self._get_session()
        while retries_count:
            try:
                response = session.request(
                    method=method,
                    url=url,
                    **kwargs
                )
                response.raise_for_status()
                return response.json()
            except requests.HTTPError:
                retries_count -= 1
                time.sleep(retries_interval)
                continue
            except Exception as e:
                if not raise_on_error:
                    logging.exception("Failed to access api")
                    return None
                raise e

        if not raise_on_error:
            logging.warning("Failed to access api")
            return None
        raise common.errors.TaskFailure(
            "Access api failed: method -- {method}, url -- {url}".format(
                method=method,
                url=url
            )
        )

    def _sleep(self):
        raise sdk2.WaitTime(self.Parameters.retry_period)

    def _form_query(self):
        if self.Parameters.obtain_query_from_resource:
            query_resource_path = sdk2.ResourceData(self.Parameters.query_resource).path
            logging.info("Query resource path: %s", str(query_resource_path))
            with query_resource_path.open() as f:
                query = f.read()
        else:
            query = self.Parameters.query

        now = datetime.datetime.now()
        utc_now = datetime.datetime.utcnow()
        yesterday = now - datetime.timedelta(days=1)
        two_days_ago = now - datetime.timedelta(days=2)
        week_ago = now - datetime.timedelta(days=7)
        two_weeks_ago = now - datetime.timedelta(days=14)
        yql_query_placeholders = {
            "%YEAR%": now.year,
            "%MONTH%": "%02d" % now.month,
            "%DAY%": "%02d" % now.day,
            "%HOUR%": "%02d" % now.hour,
            "%MINUTE%": "%02d" % now.minute,
            "%SECOND%": "%02d" % now.second,
            "%UTC_YEAR%": utc_now.year,
            "%UTC_MONTH%": "%02d" % utc_now.month,
            "%UTC_DAY%": "%02d" % utc_now.day,
            "%UTC_HOUR%": "%02d" % utc_now.hour,
            "%UTC_MINUTE%": "%02d" % utc_now.minute,
            "%UTC_SECOND%": "%02d" % utc_now.second,
            "%YESTERDAY_YEAR%": yesterday.year,
            "%YESTERDAY_MONTH%": "%02d" % yesterday.month,
            "%YESTERDAY_DAY%": "%02d" % yesterday.day,
            "%TWO_DAYS_AGO_YEAR%": two_days_ago.year,
            "%TWO_DAYS_AGO_MONTH%": "%02d" % two_days_ago.month,
            "%TWO_DAYS_AGO_DAY%": "%02d" % two_days_ago.day,
            "%WEEK_AGO_YEAR%": week_ago.year,
            "%WEEK_AGO_MONTH%": "%02d" % week_ago.month,
            "%WEEK_AGO_DAY%": "%02d" % week_ago.day,
            "%TWO_WEEKS_AGO_YEAR%": two_weeks_ago.year,
            "%TWO_WEEKS_AGO_MONTH%": "%02d" % two_weeks_ago.month,
            "%TWO_WEEKS_AGO_DAY%": "%02d" % two_weeks_ago.day,
            "%TIMESTAMP%": int(time.mktime(now.timetuple())),
            "%USER%": self.owner,
            "%OWNER%": self.owner
        }

        custom_placeholders = self.Parameters.custom_placeholders
        if custom_placeholders:
            yql_query_placeholders.update(custom_placeholders)

        for key, value in yql_query_placeholders.iteritems():
            query = query.replace(key, str(value))

        return query

    def _publish_download_links(self, operation_id):
        if self.Parameters.public_download_link:
            operation_id = self._get_share_id(operation_id)

        self._publish_sample_download_link(operation_id)
        self._publish_full_results_download_link(operation_id)

    def _publish_sample_download_link(self, operation_id):
        download_url = (
            "{yql_api_base_url}/operations/{operation_id}/results_data?format={format}&write_index=0"
        ).format(
            yql_api_base_url=self.YQL_API_BASE_URL,
            operation_id=operation_id,
            format=self.Parameters.download_format,
        )

        self.set_info(
            "Sample download link: <a href=\"{download_url}\">{download_url}</a>".format(
                download_url=download_url,
            ),
            do_escape=False,
        )

        self.Parameters.results_sample_download_url = download_url

    def _publish_full_results_download_link(self, operation_id):
        cluster, path, ref_count = self._get_data_yt_reference(operation_id)
        if not ref_count:
            self.set_info("There is no full results")
            return

        if ref_count > 1:
            self.set_info("There is more than one full result. We take the last")

        download_url = (
            "{yql_api_base_url}/table_read?cluster={cluster}&path={path}&format={format}&limit=0"
        ).format(
            yql_api_base_url=self.YQL_API_BASE_URL,
            cluster=urllib.quote(cluster),
            path=urllib.quote(path),
            format=self.Parameters.download_format,
        )

        final_url = self._get_download_final_url(download_url)
        if not final_url:
            self.set_info("Results table is not downloadable")
            return

        self.set_info(
            "Full result download link: <a href=\"{final_url}\">{final_url}</a>".format(
                final_url=final_url,
            ),
            do_escape=False,
        )

        self.Parameters.full_results_download_url = final_url

    def _get_data_yt_reference(self, operation_id):
        result = self._access_api(
            method="GET",
            url="{yql_api_base_url}/operations/{operation_id}/results?filters=DATA".format(
                yql_api_base_url=self.YQL_API_BASE_URL,
                operation_id=operation_id,
            )
        )

        empty = ({},)
        data_refs = result.get("data", empty)
        write_refs = data_refs[-1].get("Write", empty)
        last_write_ref = write_refs[-1].get("Ref")

        if not last_write_ref or not isinstance(last_write_ref, Sequence):
            return None, None, 0

        ref_content = last_write_ref[-1].get("Reference")
        if not ref_content or not isinstance(ref_content, Sequence):
            return None, None, 0

        ref_type = ref_content[0]
        if ref_type != "yt":
            return None, None, 0

        cluster = ref_content[1]
        path = ref_content[2]

        total_ref_count = max(len(data_refs), len(last_write_ref), len(last_write_ref))

        return cluster, path, total_ref_count

    def _get_download_final_url(
            self, download_url,
            retries_count=RETRIES_COUNT, retries_interval=RETRIES_INTERVAL
    ):
        logging.info("Trying to map URL to final form: %s", download_url)

        session = self._get_session()
        while retries_count:
            try:
                response = session.request(
                    method="HEAD",
                    url=download_url,
                )
                response.raise_for_status()

                final_url = response.url
                logging.info(
                    "Get download final URL status: %s (%s -> %s)",
                    response.status_code, download_url, final_url,
                )
                return final_url
            except requests.HTTPError as e:
                status_code = e.response.status_code
                logging.warning("Get download final URL status: %s (%s)", status_code, download_url)
                if status_code == 400:
                    # See https://nda.ya.ru/3VmYmP
                    return None

                retries_count -= 1
                time.sleep(retries_interval)

                continue
            except Exception as e:
                logging.warning("Check table request error: %s (%s)", e, download_url)
                return None

    def _get_share_id(self, operation_id):
        share_id = self._access_api(
            method="GET",
            url="{yql_api_base_url}/operations/{operation_id}/share_id".format(
                yql_api_base_url=self.YQL_API_BASE_URL,
                operation_id=operation_id
            ),
            raise_on_error=False,
        )
        return share_id

    def _publish_query(self, operation_id):
        share_id = self._get_share_id(operation_id)
        if share_id is None:
            self.set_info(
                "Error getting operation public link"
            )
        else:
            self._add_link("Public link", share_id)

    def _add_link(self, title, operation_id):
        operation_url = "{yql_webui_base_url}/Operations/{operation_id}".format(
            yql_webui_base_url=self.YQL_WEBUI_BASE_URL,
            operation_id=operation_id
        )
        self.set_info(
            "{title}: <a href=\"{operation_url}\">{operation_url}</a>".format(
                title=title,
                operation_url=operation_url
            ),
            do_escape=False
        )

    @staticmethod
    def _get_aux_file_content(content_raw):
        PREFIX_URL = "url:"
        PREFIX_RAW = "raw:"
        if content_raw.startswith(PREFIX_URL):
            return "URL", content_raw[len(PREFIX_URL):]
        if content_raw.startswith(PREFIX_RAW):
            return "CONTENT", content_raw[len(PREFIX_RAW):]

        raise common.errors.TaskFailure(
            "Unexpected additional file format, should start with {} or {}".format(PREFIX_URL, PREFIX_RAW))
