"""
Simple yql queries pipeline
"""
from sandbox.projects.common.decorators import retries
import requests
import logging
import json

RETRIES = 10
_LOG = logging.getLogger(__name__)


class YQL(object):
    """
    YQL run query interface

    IMPORTANT Requires yandex-yt (pip) environment

    Interface methods
    * run(query) -> task_id
    * get_status(task_id) -> dict
    * results_iterator(task_id) -> iterator
    """
    POST_QUERY_URL = "https://yql.yandex.net/api/v2/operations"
    GET_TASK_STATUS_URL = "https://yql.yandex.net/api/v2/operations/{task_id}"
    GET_TASK_RESULTS_TABLE_URL = "https://yql.yandex.net/api/v2/operations/{task_id}/results"
    GET_TASK_SHARE_ID = "https://yql.yandex.net/api/v2/operations/{task_id}/share_id"

    def __init__(self, yql_token, yt_token):
        """
        Construct pipeline kit

        :param token: OAuth token for scraper
        """

        self.yql_headers = {
            "Authorization": "OAuth {}".format(yql_token),
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        self.yt_token = yt_token

    @retries(RETRIES)
    def _post_query(self, query):
        r = requests.post(
            self.POST_QUERY_URL,
            data=query,
            headers=self.yql_headers,
            verify=False
        )
        _LOG.debug("Task digest: %s", r.content)
        r.raise_for_status()
        return r.json()

    # {
    #   u'createdAt': u'2017-10-17T17:35:00.305Z',
    #   u'execMode': u'RUN',
    #   u'id': u'59e63f442447c1df248aaf58',
    #   u'queryData': {
    #     u'content': u'....',
    #     u'files': [],
    #     u'type': u'SQL'
    #   },
    #   u'status': u'PENDING',
    #   u'updatedAt': u'2017-10-17T17:35:00.339Z',
    #   u'username': u'realtim',
    #   u'version': 0
    # }

    def run(self, query):
        """
        Send query to YQL

        :return: YQL task id
        """

        api_query = json.dumps({
            "content": query,
            "action": "RUN",
            "type": "SQLv1"
        })

        digest = self._post_query(api_query)

        if "id" not in digest:
            raise Exception("No 'id' in API response\n{}".format(digest))

        return digest["id"]

    # {
    #   u'createdAt': u'2017-10-17T17:35:00.305Z',
    #   u'execMode': u'RUN',
    #   u'id': u'59e63f442447c1df248aaf58',
    #   u'queryData': {
    #     u'content': u'<<YQL SCRIPT>>',
    #     u'files': [],
    #     u'type': u'SQL'
    #   },
    #   u'status': u'COMPLETED', # u'RUNNING'
    #   u'updatedAt': u'2017-10-17T17:37:45.499Z',
    #   u'username': u'realtim',
    #   u'version': 1000
    # }

    @retries(RETRIES)
    def get_status(self, task_id):
        """
        Ask YQL for batch status

        :param batch_id: str with batch_id (run_batch result)
        :return: dict with sort of digest
        """

        r = requests.get(
            self.GET_TASK_STATUS_URL.format(task_id=task_id),
            headers=self.yql_headers,
            verify=False
        )
        _LOG.debug("Task status: %s", r.content)
        r.raise_for_status()
        return r.json()

    @retries(RETRIES)
    def get_share_id(self, task_id):
        """
        Ask YQL for public id

        :param batch_id: str with batch_id (run_batch result)
        :return: dict with sort of digest
        """

        r = requests.get(
            self.GET_TASK_SHARE_ID.format(task_id=task_id),
            headers=self.yql_headers,
            verify=False
        )
        _LOG.debug("Task share id: %s", r.content)
        r.raise_for_status()
        return r.json()

    @retries(RETRIES)
    def _get_results(self, task_id):
        """
        Download task results

        :return: Dict with results
        """
        params = {
            'filters': 'DATA',
            'version': 0,
            'wait_competition': 0,
            'columns_preset': False
        }
        r = requests.get(
            self.GET_TASK_RESULTS_TABLE_URL.format(task_id=task_id),
            params=params,
            headers=self.yql_headers
        )
        _LOG.debug("Recieved %d bytes of task results", len(r.content))
        r.raise_for_status()
        return r.json()

    def _get_results_table_path(self, task_id):
        """
        Get results table location info

        :return: base(yt), cluster(hahn), table_path(tmp/...)
        """
        task_results = self._get_results(task_id)

        if "status" not in task_results:
            raise Exception("Bad response from YQL api for task {}: no \"status\".".format(task_id))
        if task_results["status"] != "COMPLETED":
            raise Exception("YQL task {} is in bad state \"{}\"".format(
                task_results["id"],
                task_results["status"])
            )
        ref = task_results["data"][0]["Write"][0].get("Ref")
        if not ref:
            raise Exception("Empty results of YQL task")

        return ref[0]['Reference']

    def results_iterator(self, task_id):
        """
        YQL query results table iterator
        """
        base, cluster, task_table_path = self._get_results_table_path(task_id)

        import yt.wrapper as yt
        yt.config["proxy"]["url"] = cluster
        yt.config["token"] = self.yt_token

        return yt.read_table("//" + task_table_path, format="json")
