# -*- coding: utf-8 -*-

import os
import filecmp
import collections

from sandbox.projects import resource_types
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.paths import make_folder
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.projects.common import error_handlers as eh


class QlossOutput1Parameter(sp.ResourceSelector):
    """
        Выхлоп qloss 1
    """
    name = 'qloss_output1_resource_id'
    description = 'qloss output 1'
    resource_type = resource_types.QLOSS_OUTPUT
    required = True


class QlossOutput2Parameter(sp.ResourceSelector):
    """
        Выхлоп qloss 2
    """
    name = 'qloss_output2_resource_id'
    description = 'qloss output 2'
    resource_type = resource_types.QLOSS_OUTPUT
    required = True


class CompareQlossOutputs(SandboxTask):
    """
        Сравнивает два выхлопа qloss
    """
    type = 'COMPARE_QLOSS_OUTPUTS'

    input_parameters = [
        QlossOutput1Parameter,
        QlossOutput2Parameter,
    ]

    def on_enqueue(self):
        SandboxTask.on_enqueue(self)
        resource = self._create_resource(self.descr, 'compare_result', resource_types.QLOSS_COMPARE_RESULT)
        self.ctx['out_resource_id'] = resource.id

    def _get_data(self, task):
        ARCHIVE_KEY = 'qloss_archive_resource_id'
        return self._read_resource(task.ctx[ARCHIVE_KEY]) if ARCHIVE_KEY in task.ctx else None

    def on_execute(self):
        out1_res = self._read_resource(self.ctx['qloss_output1_resource_id'])
        out2_res = self._read_resource(self.ctx['qloss_output2_resource_id'])

        for output in [out1_res, out2_res]:
            eh.verify(output.type.name == 'QLOSS_OUTPUT', 'Invalid input resource type: ' + output.type.name)

        task1 = channel.sandbox.get_task(out1_res.task_id)
        data1_resource = self._get_data(task1)
        task2 = channel.sandbox.get_task(out2_res.task_id)
        data2_resource = self._get_data(task2)

        if data1_resource and data2_resource and \
           not filecmp.cmp(data1_resource.abs_path(), data2_resource.abs_path()):
            self.ctx['not_compatible_data'] = True
            eh.check_failed("qloss outputs cannot be compared because they are based on different input data")

        with open(out1_res.abs_path(), 'r') as f:
            output1 = set(tuple(l.rstrip('\n').split('\t')[:4]) for l in f)
        with open(out2_res.abs_path(), 'r') as f:
            output2 = set(tuple(l.rstrip('\n').split('\t')[:4]) for l in f)

        equal_lines = output1 & output2
        diff_lines_number = len(output1) + len(output2) - 2*len(equal_lines)

        resource = self._read_resource(self.ctx['out_resource_id'], sync=False)
        make_folder(resource.abs_path())

        changed_qurls = []
        if diff_lines_number == 0:
            with open(os.path.join(resource.abs_path(), 'equal.txt'), 'w') as fEqual:
                print >>fEqual, 'qloss results are equal'
        else:
            statuses1 = collections.defaultdict(list)
            for flds in output1 - equal_lines:
                statuses1[tuple(flds[:2])].append(flds[3])

            statuses2 = collections.defaultdict(list)
            for flds in output2 - equal_lines:
                statuses2[tuple(flds[:2])].append(flds[3])

            for qurl, status1 in statuses1.iteritems():
                status2 = statuses2.get(qurl, ['NONE'])
                changed_qurls.append(list(qurl) + [','.join(status1), ','.join(status2)])

            for qurl in set(statuses2.keys()) - set(statuses1.keys()):
                changed_qurls.append(list(qurl) + ['NONE', ','.join(statuses2[qurl])])

            with open(os.path.join(resource.abs_path(), 'diff.txt'), 'w') as fDiff:
                for qurl_diff in changed_qurls:
                    print >>fDiff, '\t'.join(qurl_diff)

        # this is needed for the Test Environment tests
        self.ctx['changed_qurls'] = changed_qurls
        self.ctx['output_diff'] = diff_lines_number

        resource.mark_ready()

    def get_short_task_result(self):
        if self.is_completed():
            if self.ctx['output_diff'] > 0:
                return "diff"
            else:
                return "equal"
        return None


__Task__ = CompareQlossOutputs
