# -*- coding: utf-8 -*-

import logging
import random
import requests

import sandbox.sdk2 as sdk2

import sandbox.common.types.task as ctt
from sandbox.common.types.misc import NotExists
from sandbox.common.errors import TaskFailure

from sandbox.projects import resource_types
import sandbox.projects.images.resource_types as images_resource_type
from sandbox.projects.yql.RunYQL2 import RunYQL2

from sandbox.sandboxsdk.environments import PipEnvironment


class ImagesFetchThumbRequests(sdk2.Task):
    """
        Вытаскивает id тумбов из access-логов балансера выгруженных в MR
    """

    PLAN_FILE_NAME = "thumb.requests"
    YT_TABLE_NAME = "request"

    class Requirements(sdk2.Requirements):
        cores = 1

        environments = [
            PipEnvironment('yandex-yt')
        ]

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        kill_timeout = 3600

        mr_folder = sdk2.parameters.String('Path to folder in YT with logs',
                                           default="logs/balancer-production-improxy-log/30min")
        collection = sdk2.parameters.String('Collection type (all by default)', default="")
        requests_amount = sdk2.parameters.Integer('Requests number upper limit', default=100000)

        with sdk2.parameters.RadioGroup('Scheme (do not work with empty domains)') as scheme:
            scheme.values['http'] = scheme.Value(value='http', default=True)
            scheme.values['https'] = scheme.Value(value='https')
        domains = sdk2.parameters.List('List of domains for plan. Use random for each request. Empty for \
                                        location only plan (e.g. for Yandex.Tank ammo)')

        yql_operation_id = sdk2.parameters.String('Specify YQL operation id (run new one if empty)', default="")

    def _run_yql_task(self, query):
        input_parameters = {
            RunYQL2.Parameters.query.name: query,
            RunYQL2.Parameters.publish_query.name: True,
            RunYQL2.Parameters.trace_query.name: True,
            RunYQL2.Parameters.retry_period.name: 60
        }
        yql_task = RunYQL2(
            self,
            description="Get thumbnail ids from access logs in YT",
            notifications=self.Parameters.notifications,
            priority=ctt.Priority(ctt.Priority.Class.SERVICE, ctt.Priority.Subclass.HIGH),
            create_sub_task=True,
            **{
                key: value.id if isinstance(value, resource_types.AbstractResource) else value
                for key, value in input_parameters.iteritems() if value is not NotExists
            }
        )
        yql_task.enqueue()
        return yql_task.id

    def _form_query(self):
        collection = '{}'.format(self.Parameters.collection) \
            if len(str(self.Parameters.collection)) > 0 else ''
        return '''
use hahn;

$collection = "{collection}";
$get_request = Re2::Capture("^.*GET (?P<{table}>\/i\\\?id=.*) HTTP\/1\.1.*$");

PRAGMA yt.InferSchema;
SELECT {table}
FROM RANGE(
    [{mr_folder}]
)
WHERE String::Contains(processing_tree, "regexp " || $collection)
GROUP BY $get_request(url).{table} as {table}
HAVING {table} IS NOT NULL
LIMIT {limit};
'''.format(collection=collection, mr_folder=self.Parameters.mr_folder, limit=self.Parameters.requests_amount,
           table=self.YT_TABLE_NAME)

    def _rand_hostname(self):
        return random.choice(self.Parameters.domains) if len(self.Parameters.domains) > 0 else ''

    def _get_yt_result_table(self, operation_id):
        yql_token = sdk2.Vault.data(self.owner, "YQL_TOKEN")

        url = "{url}/operations/{id}/results".format(url=RunYQL2.YQL_API_BASE_URL, id=operation_id)
        cgi = {
            'filters': 'DATA',
            'version': 0,
            'wait_competition': 0,
            'columns_preset': False
        }
        headers = {
            'Authorization': 'OAuth {}'.format(yql_token),
            'Accept': 'application/json'
        }
        req = requests.get(url, params=cgi, headers=headers)
        req.raise_for_status()
        req = req.json()
        if 'status' not in req:
            raise TaskFailure('Bad response from YQL api for task {}'.format(operation_id))
        elif req['status'] != 'COMPLETED':
            raise TaskFailure('YT task {} in bad state {}'.format(
                req['id'],
                req['status'])
            )
        ref = req['data'][0]['Write'][0].get('Ref', list())
        if len(ref) < 1:
            raise TaskFailure('Empty result from yql request')
        return ref[0]['Reference']

    def _get_table_content(self, cluster_name, cluster_folder):
        from yt.wrapper import YtClient

        yt_token = sdk2.Vault.data(self.owner, "YT_TOKEN")
        yt_client = YtClient(
            proxy=cluster_name,
            token=yt_token,
        )

        for payload in yt_client.read_table('//{}'.format(cluster_folder), format="json"):
            yield payload[self.YT_TABLE_NAME]

    def _parse_yql_result(self, operation_id):
        _, cluster_name, cluster_folder = self._get_yt_result_table(operation_id)

        scheme = '{}://'.format(self.Parameters.scheme) if len(self.Parameters.domains) > 0 else ''

        for docid in self._get_table_content(cluster_name, cluster_folder):
            if not docid or len(docid) < 1 or not all(ord(char) < 128 for char in docid):
                logging.debug("Malformed id: {}. Skipped".format(docid))
                continue
            yield "{scheme}{host}{id}".format(
                scheme=scheme,
                host=self._rand_hostname(),
                id=docid
            )

    def on_enqueue(self):
        if len(self.Parameters.yql_operation_id) > 0:
            setattr(self.Context, 'yql_operation_id', self.Parameters.yql_operation_id)

        images_resource_type.IMAGES_RAW_REQUESTS(self, self.Parameters.description, self.PLAN_FILE_NAME,
                                                 collection=self.Parameters.collection)

    def on_execute(self):
        if not getattr(self.Context, 'yql_operation_id'):
            with self.memoize_stage.run_yql_task:
                query = self._form_query()

                logging.info('Run task with yql query: {}'.format(query))
                yql_task_id = self._run_yql_task(query)
                setattr(self.Context, 'yql_task_id', yql_task_id)

                logging.info('Wait output {} from {} task id'.format(
                    yql_task_id,
                    RunYQL2.Parameters.result_operation_id
                ))
                raise sdk2.WaitTask(
                    [yql_task_id],
                    ctt.Status.Group.FINISH + ctt.Status.Group.BREAK,
                    wait_all=True
                )

            yql_task_id = getattr(self.Context, 'yql_task_id')
            yql_task = sdk2.Task.find(
                id=yql_task_id,
                children=True,
                status=ctt.Status.SUCCESS
            ).order(-sdk2.Task.id).first()
            if not yql_task:
                raise TaskFailure('Child task in bad state: {}'.format(yql_task_id))
            setattr(self.Context, 'yql_operation_id', yql_task.Parameters.result_operation_id)

        with self.memoize_stage.parse_yql_result:
            result_file = images_resource_type.IMAGES_RAW_REQUESTS.find(task_id=self.id).first()
            result_file.path.write_bytes(
                "\n".join(
                    url for url in self._parse_yql_result(getattr(self.Context, 'yql_operation_id'))
                ))
