# -*- coding: utf-8 -*-
from collections import defaultdict
from datetime import datetime, timedelta
import json
import itertools
import logging
import os
import re
from xml.sax import saxutils

from sandbox.common.rest import Client

from sandbox.sandboxsdk import parameters
from sandbox.sandboxsdk import paths
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.channel import channel

from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils
from sandbox.projects.common.wizard.printwizard import postprocess as normalize_response
from sandbox.projects.websearch.begemot import parameters_sdk1 as bp
from sandbox.projects.websearch.begemot import resources as br
from sandbox.projects.websearch.begemot.tasks.BegemotCreateResponsesDiff import jsondiff


class FirstResponsesParameter(parameters.ResourceSelector):
    name = 'begemot_responses_1'
    description = 'begemot_responses_1'
    resource_type = br.BEGEMOT_RESPONSES_RESULT
    required = True


class SecondResponsesParameter(parameters.ResourceSelector):
    name = 'begemot_responses_2'
    description = 'begemot_responses_2'
    resource_type = br.BEGEMOT_RESPONSES_RESULT
    required = True


class OriginalRequests(bp.PlanParameter):
    required = False


class FailRateParameter(parameters.SandboxBoolParameter):
    name = 'fail_rate'
    description = 'Fail task if diff value is larger than 50%'
    default_value = False
    required = False


def _factors(x):
    xs = {}
    if not isinstance(x, list):
        x = x.split(';')
    for factor in x:
        k, _, v = factor.partition('=')
        xs.setdefault(k, []).append(v)
    return xs


_NO_HTML = '''
<!doctype html>
<html style="font: normal 14px/1.5 sans-serif">
    <head>
        <meta charset="utf-8" />
        <title>non-JSON non-diff non-view</title>
    </head>
    <body>
        <h1><span style="color: #900">error:</span> diff too long to show in html</h1>
        <p>To prevent your browser from freezing up, an HTML5 version of this diff
           was not generated. <a href="$TEST.diff">View the text version &raquo;</a></p>
    </body>
</html>
'''

# type -> oname -> field -> (regexp pattern, regexp substitution)
_MASK_FIELDS = {
    'source_setup': {
        'MARKET_SETUP': {
            'stat-block-id': ['[0-9]+', '###']
        }
    }
}


def write_aggregated_diff(test, x, y):
    timeout = datetime.now() + timedelta(minutes=90)
    obj_diff = jsondiff.diff(x, y, timeout)
    if obj_diff is None:
        return None
    a, b = itertools.tee(jsondiff.line_diff(obj_diff))
    lines = 0
    with open('diffs/{}.diff'.format(test), 'w') as out:
        for chunk in jsondiff.render_text([(test, a)]):
            out.write(chunk.encode('utf-8'))
            lines += chunk.count(u'\n')
    with open('diffs/{}.html'.format(test), 'w') as out:
        if lines < 32000:
            for chunk in jsondiff.render_html([(test, b)]):
                out.write(chunk.encode('utf-8'))
        else:
            out.write(_NO_HTML.replace('$TEST', test))
    try:
        grouped = jsondiff.group(
            (u'<x-pre>{}</x-pre>'.format(saxutils.escape(k)), v) for k, v in obj_diff.iteritems() if v is not None
        )
        with open('diffs/{}.short.html'.format(test), 'w') as out:
            for chunk in jsondiff.render_html((vs[0][0] + u' x{}'.format(len(vs)), vs[0][1]) for vs in grouped):
                out.write(chunk.encode('utf-8'))
    except Exception as e:
        logging.debug("HTML rendering failed with exception: {}".format(e))
    return obj_diff


def _to_common_type(r1, r2):
    if not isinstance(r1, dict) or not isinstance(r2, dict):
        return
    for v in set(r1) & set(r2):
        if isinstance(r1[v], list) and not isinstance(r2[v], list):
            r2[v] = [r2[v]]
        elif not isinstance(r1[v], list) and isinstance(r2[v], list):
            r1[v] = [r1[v]]


def generate_aggregated_diffs(test, x, y, status):
    # these fields are ommited from the response when empty, creating an ugly diff
    for responses in itertools.chain(x.itervalues(), y.itervalues()):
        for key in responses:
            for field in ('pron', 'relev', 'rearr', 'req_for_sources', 'snip'):
                try:
                    responses[key].setdefault(field, [] if field == 'pron' else {})
                except Exception:
                    logging.debug('=== setdefault failed for: ===')
                    logging.debug('%s' % responses[key])

            responses[key].pop('binary', None)
    # TODO unpack and compare protobuf binary responses if there is no JSON in response

    # singleton lists are converted into singular values; if there's a list on one side,
    # and a value on the other, the value can be converted into a singleton for better readability.
    for k in x:
        if k not in y:
            y[k] = defaultdict(dict)

        for resp_type in x[k]:
            if resp_type not in y[k]:
                y[k][resp_type] = defaultdict(dict)
                y[k][resp_type]['rules']['ERROR'] = 'NO SUCH REQUEST'

            for r in set(x[k][resp_type].get('rules', {})) & set(y[k][resp_type].get('rules', {})):
                _to_common_type(x[k][resp_type]['rules'][r], y[k][resp_type]['rules'][r])
            for r in ('relev', 'rearr', 'req_for_sources', 'snip'):
                _to_common_type(x[k][resp_type][r], y[k][resp_type][r])
    # https://st.yandex-team.ru/REQWIZARD-913
    for field in ('pron', 'relev', 'rearr'):
        part_x = defaultdict(dict)
        part_y = defaultdict(dict)
        for req in x:
            part_x[req] = {k: v.get(field, None) for k, v in x[req].iteritems()}
            part_y[req] = {k: v.get(field, None) for k, v in y[req].iteritems()}
        status[test + '_' + field] = write_aggregated_diff(test + '_' + field, part_x, part_y) is not None
    full_diff = write_aggregated_diff(test, x, y)
    status[test] = full_diff is not None
    return full_diff


def gen_diff_footer(task):
    if 'has_diff' not in task.ctx:
        return
    if 'diffs' in task.ctx:
        diffs = channel.sandbox.get_resource(task.ctx['diffs'])
    else:
        diffs = None

    def render():
        yield '<table class="data_custom_fields t t_max t_cross">'
        for test, has_diff in sorted(task.ctx['has_diff'].items()):
            yield '<tr><td>{}</td>'.format(test)
            if has_diff is False:
                yield '<td style="color: #777">no diff</td>'
            elif has_diff is True:
                yield '<td><span style="color:#960;font-weight:bold">changed</span>'
                yield ' / <a href="{}/{}.html">html</a>'.format(diffs.proxy_url, test)
                yield ' / <a href="{}/{}.diff">text</a>'.format(diffs.proxy_url, test)
                yield ' / <a href="{}/{}.short.html">short</a>'.format(diffs.proxy_url, test)
                yield '</td>'
            else:
                yield '<td style="color:#900;font-weight:bold">{}</td>'.format(has_diff)
            yield '</tr>'
        yield '</table>'

    return [{'content': ''.join(render())}]


class BegemotCreateResponsesDiff(SandboxTask):
    type = 'BEGEMOT_CREATE_RESPONSES_DIFF'
    cores = 1
    ram = 8192
    execution_space = 6789
    input_parameters = (
        FirstResponsesParameter,
        SecondResponsesParameter,
        OriginalRequests,
        FailRateParameter,
    )
    diff_resource_path = 'diffs'

    def _parse_resource(self, id):
        with open(self.sync_resource(id)) as data:
            for line in data:
                line = line.strip()
                if not line:
                    continue
                try:
                    line = json.loads(line)
                except ValueError as err:
                    line = [{"__diff_error": str(err)}]
                yield self._normal_form(line)

    def _normal_form(self, responses):
        responses_map = {}
        unknown_types = 0

        for onames in _MASK_FIELDS.itervalues():
            for fields in onames.itervalues():
                for sub in fields.itervalues():
                    sub[0] = re.compile(sub[0])

        for r in responses:
            if type(r) is not dict:
                logging.warn('_normal_form: skipping invalid entry (not a dict): %s' % r)
                continue
            if r.get('name') == "_STATS":
                continue

            r.pop('begemot_nodes', None)
            if "meta" in r:
                r["meta"]["host"] = "<sandbox>"
                r["meta"]["port"] = "0"
            for field in ("relev", "rearr"):
                if field in r:
                    r[field] = _factors(r[field])

            for rules in [r.get("rules", {})] + [res.get("rules", {}) for res in r.get("results", [])]:
                rules.pop('PrintVersion', None)  # ignore hostname and other garbage
                rules.pop('.version', None)

            for field in ("relev", "rearr", "snip"):
                if field in r and isinstance(r[field], list):
                    r[field] = {x: "" for x in r[field]}
            logging.debug("CALLING normalize_response from printwizard.py")
            normalize_response(r, unpackrichtree=None, unpackreqbundle=None)
            key = ""
            for k in ["type", "__oname", "name", "source", "sub_source", "response_source", "apphost_response_type", "product"]:
                if k in r:
                    key += "[" + k + "=" + str(r[k]) + "]"
            if not key:
                unknown_types += 1
                key = "unknown_type_%s" % unknown_types

            for field, (pattern, replacement) in (
                _MASK_FIELDS.get(r.get('type'), {}).get(r.get('__oname'), {}).iteritems()
            ):
                value = r.get(field)
                if value:
                    r[field] = pattern.sub(replacement, value)

            responses_map[key] = r
        return responses_map

    def on_enqueue(self):
        shard = channel.sandbox.get_resource_attribute(self.ctx[SecondResponsesParameter.name], 'Shard')
        if shard:
            Client().task[self.id].tags([shard])

    def on_execute(self):
        self.ctx['HasDiff'] = False
        responses_1 = [r for r in self._parse_resource(self.ctx[FirstResponsesParameter.name])]
        if self.ctx.get(bp.PlanParameter.name):
            with open(self.sync_resource(self.ctx[bp.PlanParameter.name])) as fd:
                requests = [x.strip().decode('utf-8') for x in fd if x.strip() and not x.startswith('@')]
        else:
            requests = []
            for i, r in enumerate(responses_1, 1):
                for ret_type in r:
                    if len(requests) < i and 'original_request' in r[ret_type]:
                        requests.append(r[ret_type]['original_request'])
                if len(requests) < i:
                    requests.append('line {} (empty request)'.format(i))

        responses_1 = {req: resp for req, resp in itertools.izip(requests, responses_1)}
        responses_2 = {
            req: resp for req, resp in itertools.izip(
                requests, self._parse_resource(self.ctx[SecondResponsesParameter.name])
            )
        }

        paths.make_folder('diffs', delete_content=True)

        test = 'response'
        self.ctx['has_diff'] = status = {}
        obj_diff = generate_aggregated_diffs(test, responses_1, responses_2, status)

        if obj_diff:
            diff_rate = float(sum(v is not None for v in obj_diff.itervalues())) / len(obj_diff)
            self.ctx['HasDiff'] = True
            # (BEGEMOT-260) Fail task if Diff value is greater than 0.5
            self.set_info("Responses changed: {:.2f}%.".format(diff_rate * 100))
            self.ctx['diff_rate'] = diff_rate
            if utils.get_or_default(self.ctx, FailRateParameter) and diff_rate > 0.5:
                eh.check_failed("Diff value is greater than 50%.")
        else:
            self.ctx['diff_rate'] = 0.0

        if os.listdir('diffs'):
            self.ctx['diffs'] = self.create_resource(
                self.description + ' diffs',
                'diffs',
                br.BEGEMOT_RESPONSES_DIFF,
                'any',
                attributes={
                    'backup_task': True,
                    'Shard': channel.sandbox.get_resource_attribute(self.ctx[SecondResponsesParameter.name], 'Shard'),
                    'is_cgi': channel.sandbox.get_resource_attribute(self.ctx[SecondResponsesParameter.name], 'is_cgi')
                }
            ).id

    @property
    def footer(self):
        return gen_diff_footer(self)


__Task__ = BegemotCreateResponsesDiff
