# -*- coding: utf-8 -*-

import os
import re
import difflib
import codecs

import jinja2

import itertools

from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk import parameters


from sandbox.projects import resource_types

from sandbox.projects.common.geosearch.reqans_logs import load_reqans_log


class QueriesResourceParameter(parameters.LastReleasedResource):
    name = 'queries_resource_id'
    description = 'Queries'
    resource_type = resource_types.PLAIN_TEXT_QUERIES


class ReqAnsLog1ResourceParameter(parameters.LastReleasedResource):
    name = 'log1_resource_id'
    description = 'Log 1'
    resource_type = resource_types.GEOMETASEARCH_REQANS_LOG


class ReqAnsLog2ResourceParameter(parameters.LastReleasedResource):
    name = 'log2_resource_id'
    description = 'Log 2'
    resource_type = resource_types.GEOMETASEARCH_REQANS_LOG


class CompareGeosearchReqAnsLogs(SandboxTask):
    type = 'COMPARE_GEOSEARCH_REQANS_LOGS'

    input_parameters = (QueriesResourceParameter,
              ReqAnsLog1ResourceParameter,
              ReqAnsLog2ResourceParameter)

    def on_execute(self):
        # get input resources
        def sync_and_get_resource(resource_id):
            self.sync_resource(resource_id)
            return channel.sandbox.get_resource(resource_id)

        queries_resource = sync_and_get_resource(self.ctx[QueriesResourceParameter.name])
        log1_resource = sync_and_get_resource(self.ctx[ReqAnsLog1ResourceParameter.name])
        log2_resource = sync_and_get_resource(self.ctx[ReqAnsLog2ResourceParameter.name])

        # make output dir
        out_resource = self.create_resource(
                'compare result',
                'compare_result',
                'OTHER_RESOURCE'

            )
        self.ctx['out_resource_id'] = out_resource.id
        make_folder(out_resource.path)

        # run compare
        logloader = ReqAnsLogLoader(queries_resource.path, log1_resource.path, log2_resource.path)
        self.ctx['diff_count'] = logloader.total_diff_count()

        fill_tempates(logloader, out_resource.path)


class ReqAnsLogLoader(object):
    def __init__(self, queries_file, logs1_file, logs2_file):
        self.queries = list(self._load_queries(queries_file))
        self.logs1 = load_reqans_log(logs1_file)
        self.logs2 = load_reqans_log(logs2_file)

        self._check_logs()
        self._filter_equal_log_recs()

    def total_diff_count(self):
        return len(self.logs1)

    def iter_logs(self):
        for _, q, log1, log2 in self._iter_logs_internal():
            yield q, log1, log2

    def _iter_logs_internal(self):
        for parent_reqid, q in self.queries:
            if parent_reqid not in self.logs1:
                continue

            log1 = self.logs1[parent_reqid]
            log2 = self.logs2[parent_reqid]

            yield parent_reqid, q, log1, log2

    def _filter_equal_log_recs(self):
        for parent_reqid, q, log1, log2 in self._iter_logs_internal():
            if log1 == log2:
                del self.logs1[parent_reqid]
                del self.logs2[parent_reqid]

    def _check_logs(self):
        logs_reqids = set(self.logs1.keys()).union(self.logs2.keys())
        queries_reqids = set([r for r, _ in self.queries])

        if len(queries_reqids) != len(self.queries):
            raise Exception('Not unique queries_reqids')

        diff_set = logs_reqids.difference(queries_reqids)
        if diff_set:
            raise Exception("parent_reqid's in logs not in queries: %s" % list(diff_set))

    def _load_queries(self, file_name):
        with codecs.open(file_name, 'r', 'utf-8') as f:
            queries = [line.strip() for line in f]

        r = re.compile(r'parent_reqid=(\w+)')
        for q in queries:
            m = r.search(q)
            if not m:
                raise Exception('Not found parent_reqid in query %s.' % q)
            yield m.group(1), q


def iter_template_params(logloader):
    for q, log1, log2 in logloader.iter_logs():
        lines1 = log1.splitlines()
        lines2 = log2.splitlines()

        def lines_compare(lines1, lines2):
            for line1, line2 in itertools.izip_longest(lines1, lines2, fillvalue=''):
                s = difflib.SequenceMatcher(a=line1, b=line2)
                ops = list(s.get_opcodes())

                def line_parts_left(ops, line):
                    for op in ops:
                        if op[0] == 'insert':
                            continue
                        part = line[op[1]:op[2]]
                        yield op[0], part

                def line_parts_right(ops, line):
                    for op in ops:
                        if op[0] == 'delete':
                            continue
                        part = line[op[3]:op[4]]
                        yield op[0], part

                yield line_parts_left(ops, line1), line_parts_right(ops, line2)

        yield q, lines_compare(lines1, lines2)


def get_template(template_filename):
    template_path = os.path.dirname(os.path.abspath(__file__))
    env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_path))
    return env.get_template(template_filename)


def fill_tempates(logloader, out_dir):
    PER_PAGE = 100

    template = get_template('reqans_diff.jinja2')

    page_count = logloader.total_diff_count() / PER_PAGE + 1

    itmpl_params = iter_template_params(logloader)
    for page_num in range(1, page_count + 1):
        if page_num > 1:
            file_name = 'reqans_diff_%s.html' % page_num
        else:
            file_name = 'index.html'

        file_name = os.path.join(out_dir, file_name)

        text = template.render(
            page_num=page_num,
            max_page=page_count,
            diff_records=itertools.islice(itmpl_params, PER_PAGE),
        )

        with open(file_name, 'w') as f:
            f.write(text.encode('utf-8'))


__Task__ = CompareGeosearchReqAnsLogs
