# coding: utf-8

"""
    Проверка элемента на серпе по схеме данных и на качество
"""

import logging
import json
import yaml
import urllib
import urllib2

import sandbox.common.types.misc as ctm
import sandbox.common.types.client as ctc

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.errors import SandboxTaskFailureError, SandboxTaskUnknownError
from sandbox.sandboxsdk.channel import channel

from sandbox.projects.common import apihelpers
from sandbox.projects.common import link_builder as lb


QUERIES_RESOURCE = 'USERS_QUERIES'


class CheckerException(Exception):
    pass


class ConfigPath(parameters.SandboxArcadiaUrlParameter):
    name = 'config_path'
    description = 'Path to config file'
    default_value = 'arcadia:/arc/trunk/arcadia/extsearch/wizards/schema_tests/config.json'
    required = True


class FilterType(parameters.SandboxSelectParameter):
    name = 'filter_type'
    description = 'How to filter result'
    choices = [
        ('type/subtype', 'type_subtype'),
        ('counter_prefix', 'counter_prefix'),
        ('other', 'other'),
    ]
    required = True


class FilterValue(parameters.SandboxStringParameter):
    name = 'filter_value'
    description = 'Value of filter (type/subtype - with /)'
    required = False


class ServerString(parameters.SandboxStringParameter):
    name = 'servername'
    description = 'Serverstring (host + path) for shooting'
    default_value = 'hamster.yandex.ru/search/'
    required = True


class Metahost(parameters.SandboxStringParameter):
    name = 'metahost'
    description = 'Metahost'
    required = False


class YtTokenName(parameters.SandboxStringParameter):
    name = 'yt_token'
    description = 'Name of YT token in the vault'
    required = False


class WebResultSchemaCheckerBase(SandboxTask):
    type = 'WEB_RESULT_SCHEMA_CHECKER_BASE'
    dns = ctm.DnsType.DNS64
    client_tags = ctc.Tag.Group.LINUX

    input_parameters = [ConfigPath, FilterType, FilterValue, ServerString, Metahost, YtTokenName]

    environment = (
        environments.PipEnvironment('yandex-yt'),
    )

    def make_email(self):
        subj = "WebResultSchemaCheckerBase failure"
        text = "Abstract text about failure details. Task {}".format(lb.task_link(self.id, 0))
        return subj, text

    def on_failure(self):
        SandboxTask.on_failure(self)
        emails = self.ctx.get('failure_emails')
        if emails:
            logging.info('Try to send email...')
            if isinstance(emails, list):
                pass
            elif isinstance(emails, (str, unicode)):
                emails = [emails.split(',')]
            else:
                return

            (subj, text) = self.make_email()

            channel.sandbox.send_email(
                [x.strip() + "@yandex-team.ru" for x in emails if x.strip()],
                None,
                subj,
                text,
                'text/html',
                'utf-8'
            )

    def on_execute(self):
        self.prepare_params()
        config = self.get_config()

        cfg = self.get_one_element(config)
        if cfg is None:
            raise SandboxTaskFailureError("No such element in config for filter {}: {}".format(self.ctx['filter_type'], self.ctx.get('filter_value') or '-'))

        self.ctx['element_name'] = cfg.get('name')
        try:
            res = self.make_shootings(cfg)
        except CheckerException as e:
            raise SandboxTaskFailureError("Checker exception: {}".format(e))

        self.analyze_shootings_result(res, cfg)

    def make_shootings(self, cfg):
        raise SandboxTaskFailureError("make_shootings() is not implemented!")

    def analyze_shootings_result(self, res, cfg):
        raise SandboxTaskFailureError("analyze_shootings_result() is not implemented!")

    def get_config(self):
        return yaml.load(Arcadia.cat(self.ctx[ConfigPath.name]))

    def prepare_params(self):
        val = self.ctx.get('filter_value')
        if self.ctx['filter_type'] == 'type_subtype':
            if not val:
                raise SandboxTaskFailureError('Empty filter_value for filter by type/subtype')
            stypes = val.split('/', 2)
            self.ctx['snippet_type'] = stypes[0]
            if len(stypes) > 1:
                self.ctx['snippet_subtype'] = stypes[1]
        elif self.ctx['filter_type'] == 'counter_prefix':
            if not val:
                raise SandboxTaskFailureError('Empty filter_value for filter by counter_prefix')
            self.ctx['res_counter_prefix'] = val
            if val.rfind('/') != len(val) - 1:
                self.ctx['res_counter_prefix'] += '/'

    def get_resource_by_id(self, res_id):
        resource_path = self.sync_resource(res_id)
        if resource_path is None or resource_path == '':
            raise SandboxTaskUnknownError('Cannot sync resource {}'.format(res_id))
        return resource_path

    def get_last_resource(self, filter_value):
        logging.info('Trying to get last queries list: resource of type {} with attribute wiztype={}'.format(QUERIES_RESOURCE, filter_value))
        resource = apihelpers.get_last_resource_with_attribute(
            resource_type=QUERIES_RESOURCE,
            attribute_name='wiztype',
            attribute_value=filter_value
        )
        if not resource:
            raise SandboxTaskFailureError('Cannot get queries list with wiztype {}'.format(filter_value))

        return self.get_resource_by_id(resource.id)

    def yt_read_table(self, token, table, filter_type, filter_value):
        parts = table.split(':')
        if len(parts) < 2:
            raise CheckerException("Incorrect YT path: {}".format(table))

        from yt.wrapper import YtClient
        client = YtClient(parts[0], token)
        querieslist = []
        logging.info("Start reading table")
        tbl = client.read_table(parts[1], format='dsv', raw=False)
        if filter_type == 'type_subtype':
            stype = self.ctx['snippet_type']
            subtype = self.ctx.get('snippet_subtype')
            for row in tbl:
                if row["path_type"] == stype and (not subtype or row["path_subtype"] == subtype):
                    querieslist.append([row["query"], row["region"], row["domain"]])
        elif filter_type == 'counter_prefix':
            if filter_value.rfind('/') == len(filter_value) - 1:
                path = filter_value[0:-1]
            else:
                path = filter_value
            for row in tbl:
                if row["path"] == path:
                    querieslist.append([row["query"], row["region"], row["domain"]])

        logging.info("Stop reading table")
        return querieslist

    def get_queries(self, cfg):
        filter_type = self.ctx['filter_type']
        filter_value = self.ctx.get('filter_value')

        queries = cfg["queries"]
        querieslist = []
        if queries.get("type") == "resource":
            val = queries["value"]
            if val == "LRR" or val == "last":
                queryfile = self.get_last_resource(filter_value)
            else:
                queryfile = self.get_resource_by_id(val)
            with open(queryfile) as qf:
                for l in qf.readlines():
                    querieslist.append(l.split('\t'))
        elif queries.get("type") == "yt":
            yt_token = channel.task.get_vault_data(self.ctx[YtTokenName.name])
            querieslist = self.yt_read_table(yt_token, queries["value"], filter_type, filter_value)
        else:
            raise CheckerException("Error while getting list of queries: unknown type of queries")

        return querieslist

    def get_one_element(self, objs):
        if "objects" not in objs:
            return None

        ft = self.ctx['filter_type']
        fv = self.ctx.get('filter_value')
        for el in objs["objects"]:
            if el.get('filter_type') == ft and el.get('filter_value') == fv:
                return el

        return None

    def make_query_link(self, cfg, queryparams):
        dump = cfg.get("json_dump")
        if dump is None or dump == "":
            raise CheckerException("Dump parameter is absent or empty")

        if (len(queryparams) < 4):
            for i in range(len(queryparams), 4):
                queryparams.append('')
        elif (len(queryparams) > 4):
            del queryparams[4:]

        parameters = dict(zip(["text", "lr", "tld", "l10n"], [convert(s) for s in queryparams]))

        if parameters["tld"] == "tr":
            parameters["tld"] = "com.tr"

        if "metahost" in cfg:
            parameters["metahost2"] = cfg["metahost"]
        else:
            mhost = self.ctx[Metahost.name]
            if mhost is not None:
                parameters["metahost2"] = mhost

        serverstring = ""
        if "server" in cfg and cfg["server"]:
            serverstring = cfg["server"]
        else:
            serverstring = self.ctx[ServerString.name]

        parameters["json_dump"] = dump.encode('utf8')

        qs = urllib.urlencode(parameters)
        logging.info("QUERYSTRING: https://{}?{}".format(serverstring, qs))

        return "https://" + serverstring + "?" + qs

    def get_answer(self, cfg, q):
        url = self.make_query_link(cfg, q)

        res = urllib2.urlopen(url, timeout=5)
        answer = res.read()

        jsdata = self.parse_answer(answer, cfg)
        return jsdata

    def parse_answer(self, content, cfg):
        if content is None:
            raise CheckerException('No json content')

        key = cfg.get("path")
        if key is None or key == "":
            key = cfg.get("json_dump")
            if key is None or key == "":
                raise CheckerException("Field key and json_dump are both absent or empty")

        try:
            data = json.loads(content)
            if key in data:
                return data[key]
            else:
                # Try to search 'short' key: searchdata.docs.*.snippets.full -> searchdata.docs.*.snippets
                spkeys = key.rsplit('.', 1)
                logging.info("Make short key: {}".format(spkeys))
                if spkeys[0] in data:
                    return data[spkeys[0]]
                else:
                    logging.error("Answer has no path keys: {}".format(data))
                    return None
        except ValueError as e:  # TODO: Add logging about wrong encoding here
            raise CheckerException('Invalid json content: {}'.format(e))

    def filter_answer(self, jsdata):
        if not jsdata:
            return None
        if self.ctx['filter_type'] == 'type_subtype':
            stype = self.ctx.get("snippet_type")
            subtype = self.ctx.get("snippet_subtype")
            return self.filter_answer_by_type(jsdata, stype, subtype)
        elif self.ctx['filter_type'] == 'counter_prefix':
            return self.filter_answer_by_counter_prefix(jsdata, self.ctx['res_counter_prefix'], self.ctx['filter_value'])

    def filter_answer_by_type(self, jsdata, stype, subtype):
        if not isinstance(jsdata, list):
            jsdata = [jsdata]
        for d in jsdata:
            if isinstance(d, list):
                dd = self.filter_answer_by_type(d, stype, subtype)
                if dd:
                    return dd

            elif isinstance(d, dict):
                if d.get("type") == stype:
                    if subtype and d.get("subtype") == subtype:
                        return d

        return None

    def filter_answer_by_counter_prefix(self, jsdata, counter_prefix, counter_prefix_no_slash):
        if not isinstance(jsdata, list):
            jsdata = [jsdata]

        for d in jsdata:
            if isinstance(d, list):
                dd = self.filter_answer_by_counter_prefix(d, counter_prefix, counter_prefix_no_slash)
                if dd:
                    return dd

            elif isinstance(d, dict):
                cnt_prefix = d.get("counter_prefix")
                if not cnt_prefix:
                    cnt = d.get("counter")
                    if cnt and isinstance(cnt, dict) and (cnt.get("path") == counter_prefix or cnt.get("path") == counter_prefix_no_slash):
                        return d
                elif cnt_prefix == counter_prefix or cnt_prefix == counter_prefix_no_slash:
                    return d

        return None


def get_schema(path):
    return yaml.load(Arcadia.cat(path))


def convert(s):
    if isinstance(s, unicode):
        return s.encode('utf8')
    else:
        return s


def convert_unicode(s):
    if isinstance(s, unicode):
        return s
    else:
        return unicode(s.decode("utf8"))


__Task__ = WebResultSchemaCheckerBase
