# -*- coding: utf-8 -*-

import os
import json
import difflib
from multiprocessing import Pool
import re

from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.util import system_info

from sandbox.projects import resource_types
from sandbox.projects.common import file_utils as fu
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils


_RESPONSE_DELIMITER = "-" * 100

regex = re.compile("can not resolve :(\d+)\"")


class QueriesResourceParameter(sp.LastReleasedResource):
    name = 'queries_resource_id'
    description = 'Queries (optional)'
    resource_type = resource_types.PLAIN_TEXT_QUERIES


class NewsdResponses1Parameter(sp.LastReleasedResource):
    name = 'newsd_responses1_resource_id'
    description = 'Newsd responses #1'
    resource_type = resource_types.BASESEARCH_HR_RESPONSES


class NewsdResponses2Parameter(sp.LastReleasedResource):
    name = 'newsd_responses2_resource_id'
    description = 'Newsd responses #2'
    resource_type = resource_types.BASESEARCH_HR_RESPONSES


class SaveHtmlDiffParameter(sp.SandboxBoolParameter):
    name = 'save_html_diff'
    description = 'Save html diff'
    default_value = False


def filter_debug_info(newsd_answer):
    for module in newsd_answer:
        if type(module) is dict:
            module.pop("debug_info", None)
            module.pop("error", None)
        elif type(module) is list:
            for block in module:
                block.pop("debug_info", None)
    return newsd_answer


def prettify(line, filter_debug=False):
    line = line.replace("CategoryName: ", "")
    line = line.decode('string-escape')
    line = line.lstrip('"')
    line = line.rstrip('"')
    if line != "":
        try:
            json_str = json.loads(line)
            if filter_debug:
                json_str = filter_debug_info(json_str)
            line = json.dumps(json_str, indent=4, sort_keys=True, ensure_ascii=False)
        except:
            line = regex.sub("", line)
    return line


def compare(diff_path, query_number, query, l1, l2, save_html=False):
    r1 = prettify(l1, filter_debug=True)
    r2 = prettify(l2, filter_debug=True)

    d = difflib.unified_diff(r1.split('\n'), r2.split('\n'))
    diff = list(d)
    if len(diff) == 0:
        return 0

    with open(os.path.join(diff_path, "query_" + str(query_number) + ".txt"), 'w') as fd:
        fd.write("query #" + str(query_number) + "\n")
        fd.write(query + "\n")
        fd.write('\n'.join(diff).encode('utf-8'))

    if save_html:
        d = difflib.HtmlDiff()
        with open(os.path.join(diff_path, "query_" + str(query_number) + ".html"), 'w') as fd:
            fd.write(d.make_file(r1.split('\n'), r2.split('\n')).encode('utf-8'))
    return 1


class CompareNewsdResponses(SandboxTask):
    """
        Сравнивает результаты обстрела slave_newsd, полученных в human-readable (hr=da) формате
        Использует результаты задачи GET_NEWSD_RESPONSES
    """
    type = 'COMPARE_NEWSD_RESPONSES'

    input_parameters = (
        QueriesResourceParameter,
        NewsdResponses1Parameter,
        NewsdResponses2Parameter,
        SaveHtmlDiffParameter,
    )

    required_ram = 40 * 1024

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        resource = self.create_resource(
            self.descr, 'compare_result',
            resource_types.SLAVE_NEWSD_RESPONSES_COMPARE_RESULT,
            attributes={'backup_task': True},
        )
        self.ctx['out_resource_id'] = resource.id

    def on_execute(self):
        path_to_diffs = self._get_diff_path()
        make_folder(path_to_diffs)

        resp1_resourse_id = self.ctx[NewsdResponses1Parameter.name]
        resp2_resourse_id = self.ctx[NewsdResponses2Parameter.name]

        resp1 = channel.sandbox.get_resource(resp1_resourse_id)
        resp2 = channel.sandbox.get_resource(resp2_resourse_id)
        if utils.check_resources_by_md5(resp1, resp2, self._get_diff_path()):
            self.ctx['compare_result'] = True
            return

        queries = self.load_queries(self.ctx[QueriesResourceParameter.name])

        # True - no diff
        self.ctx['compare_result'] = self.compare_responses(queries, resp1_resourse_id, resp2_resourse_id)
        if self.ctx['compare_result']:
            fu.write_file(os.path.join(path_to_diffs, 'no_diff_detected.txt'), 'no_diff_detected')

        self.mark_resource_ready(self.ctx['out_resource_id'])

    def load_next(self, fd):
        response_str = ""
        complete = False
        for line in fd:
            line = line.strip()
            if line.startswith(_RESPONSE_DELIMITER):
                complete = True
                break
            if line.startswith("CategoryName"):
                response_str = line
        return (response_str, complete)

    def load_queries(self, resource_id):
        self.sync_resource(resource_id)
        queries_resource = channel.sandbox.get_resource(resource_id)
        queries = []
        with open(queries_resource.path) as f:
            for line in f:
                queries.append(line.strip())
        self.ctx['num_of_queries'] = len(queries)
        return queries

    def compare_responses(self, queries, resp1_resourse_id, resp2_resourse_id):
        diff_path = self._get_diff_path()
        save_html = self.ctx[SaveHtmlDiffParameter.name]

        self.sync_resource(resp1_resourse_id)
        self.sync_resource(resp2_resourse_id)
        resp1_resource = channel.sandbox.get_resource(resp1_resourse_id)
        resp2_resource = channel.sandbox.get_resource(resp2_resourse_id)

        results = []
        pool = Pool(processes=system_info()['ncpu'])
        with open(resp1_resource.path) as f1, open(resp2_resource.path) as f2:
            for query_number in xrange(0, self.ctx['num_of_queries']):
                query = queries[query_number]
                (r1, complete1) = self.load_next(f1)
                (r2, complete2) = self.load_next(f2)

                eh.ensure(complete1 and complete2, "Sizes of responses1, responses2 and queries are different")

                results.append(pool.apply_async(compare, (diff_path, query_number, query, r1, r2, save_html)))

        pool.close()
        pool.join()

        num_of_diffs = 0
        for result in results:
            num_of_diffs += result.get()
        self.ctx['num_of_diffs'] = num_of_diffs

        is_equal = not bool(num_of_diffs)
        return is_equal

    def _get_diff_path(self):
        return channel.sandbox.get_resource(self.ctx['out_resource_id']).path


__Task__ = CompareNewsdResponses
