# -*- coding: utf-8 -*-

import fnmatch
import logging
import os
import subprocess as sp
import shutil
from os.path import join as pj
from collections import namedtuple, defaultdict

from sandbox import sdk2
import sandbox.common.types.misc as ctm
from sandbox.projects.common.BaseCompareYaMakeOutputsTask import BaseCompareYaMakeOutputsTask
try:
    import futures
except:
    from concurrent import futures

TEMPLATE = '''<html>
<head>
    <link type="text/css" href="//yandex.st/bootstrap/3.3.6/css/bootstrap.min.css">
</head>
<body>
    <div class="containter">
        <div class="col-md-4">
            {link}
            <pre>{diff_description}</pre>
        </div>
    </div>
</body>
</html>
'''


class JupiterDiff(sdk2.Resource):
    '''
        Набор файлов с диффом интеграционного теста
    '''
    auto_backup = True


class JupiterCompareIntegrationTestOutput(BaseCompareYaMakeOutputsTask):
    '''
    Сравнение двух выхлопов интеграционного теста Юпитера
    '''
    ShardInwards = namedtuple('ShardInwards', ['files', 'dumps'])
    PairToCompare = namedtuple('PairToCompare', ['left', 'right', 'name'])

    DUMPS_DIRNAME = '.dumps'
    DIFF_POSTFIX = '.diff'
    TIER_DIRS = ('Platinum', 'WebFreshTier', 'shards', 'index')
    JUPITER_DIFF_DIR = 'jupiter_diff'
    # this files can contain different data
    FILES_DIFF_CHECK_WHITELIST = (
        'indexcounts.info',
        'indexpanther.info',
        'indexpanther.offroad.info',
        'stamp.TAG',

        # SaaS indexer files depend on build path or current timestamp
        'files_info',
        'indexddk.rty',
        'indexfullarc.base.fat',
        'indexfullarc.base.part.0',
        'indexfullarc.full.fat',
        'indexfullarc.full.part.0',
        'indexfullarc.merge.fat',
        'indexfullarc.merge.part.0',
        'shard.conf',
        'timestamp',
        '*.doc_url_index_p.core.dmp',
        '*.idx_print.core.dmp',
    )
    # this files can appear and dissapear
    TEMP_FILES_CHECK_WHITELIST = (
        '*.doc_url_index_p.core.dmp',
        '*.idx_print.core.dmp',
    )
    DIFF_TOOL_PATH = 'diff_tools/difftool'
    TABLE_DUMPS_DIR = 'table_dumps'
    CUSTOM_FILES_DIFF_CHECK = (
        'file_sink.*',
    )
    DIFF_WORKER_COUNT = 32

    left_build_output = None
    right_build_output = None
    testing_out_stuff_dir = None
    diff_tool_path = None
    diff_description = defaultdict(list)

    def get_pair_of_path(self, path):
        return [
            pj(side, path) if os.path.exists(pj(side, path)) else None
            for side in (self.left_build_output, self.right_build_output)
        ]

    @classmethod
    def run_diff_with_options(cls, pair_to_compare, output_path=None, options=('-u',)):
        options = list(options)
        if output_path:
            output_file = open(output_path, 'w')
            diff_kwargs = {'stdout': output_file, 'stderr': output_file}
        else:
            options.append('-q')
            diff_kwargs = {}

        diff_cmd_list = ['diff', pair_to_compare.left, pair_to_compare.right] + options
        logging.debug('Running\n:%s', ' '.join(diff_cmd_list))
        returncode = sp.call(
            diff_cmd_list,
            **diff_kwargs
        )

        if output_path:
            output_file.close()

            if returncode == 0:
                os.remove(output_path)

        return returncode

    @classmethod
    def diff_on_sets(cls, left_set, right_set):
        if left_set == right_set:
            return []

        output = set()

        for item in sorted(left_set - right_set):
            if cls.match_by_masks(item, cls.TEMP_FILES_CHECK_WHITELIST):
                continue
            output.add('-{}'.format(item))

        for item in sorted(right_set - left_set):
            if cls.match_by_masks(item, cls.TEMP_FILES_CHECK_WHITELIST):
                continue
            output.add('+{}'.format(item))

        return sorted(list(output))

    def get_shard_inwards(self, shard_dir_path):
        shard_inwards_list = []
        for shard_dir in self.get_pair_of_path(shard_dir_path):
            shard_files = []
            for root, dirs, files in os.walk(shard_dir):
                for filename in files:
                    file = os.path.relpath(pj(root, filename), shard_dir)
                    assert '..' not in file
                    if ('/' + file).find('/.') == -1:
                        shard_files.append(file)
            shard_inwards = self.ShardInwards(
                files=set(shard_files),
                dumps=set(os.listdir(pj(shard_dir, self.DUMPS_DIRNAME)))
            )
            shard_inwards_list.append(shard_inwards)

        return self.PairToCompare(*shard_inwards_list, name=os.path.basename(shard_dir_path))

    @staticmethod
    def match_by_masks(filename, masks):
        for mask in masks:
            if fnmatch.fnmatch(filename, mask):
                return True
        return False

    def compare_index_files(self, shard_dir_path, shards_inwards):
        self.diff_description['Index files set changes'] = self.diff_on_sets(
            shards_inwards.left.files,
            shards_inwards.right.files
        )

        with futures.ThreadPoolExecutor(max_workers=self.DIFF_WORKER_COUNT) as pool:
            diff_futures = {}  # future -> (shard_name, filename)
            for filename in shards_inwards.left.files & shards_inwards.right.files:
                if self.match_by_masks(filename, self.FILES_DIFF_CHECK_WHITELIST):
                    continue

                pair_of_files = self.PairToCompare(*self.get_pair_of_path(pj(shard_dir_path, filename)), name=filename)
                diff_futures[pool.submit(self.run_diff_with_options, pair_of_files)] = (shards_inwards.name, filename)

            for future in futures.as_completed(diff_futures):
                if future.result():
                    shard_name, filename = diff_futures[future]
                    self.diff_description['{} index files differ'.format(shard_name)].append(filename)

    def compare_dump_files(self, shard_dir_path, shards_inwards):
        self.diff_description['Index dumps set changes'] = self.diff_on_sets(
            shards_inwards.left.dumps,
            shards_inwards.right.dumps
        )

        with futures.ThreadPoolExecutor(max_workers=self.DIFF_WORKER_COUNT) as pool:
            diff_futures = {}  # future -> (shard_name, filename)
            for filename in shards_inwards.left.dumps & shards_inwards.right.dumps:
                pair_of_dumps = self.PairToCompare(
                    *self.get_pair_of_path(pj(shard_dir_path, self.DUMPS_DIRNAME, filename)),
                    name=filename
                )
                output_path = pj(self.JUPITER_DIFF_DIR, shards_inwards.name + '_' + filename + self.DIFF_POSTFIX)
                diff_futures[pool.submit(self.run_diff_with_options, pair_of_dumps, output_path)] = (shards_inwards.name, filename)

            for future in futures.as_completed(diff_futures):
                if future.result():
                    shard_name, filename = diff_futures[future]
                    self.diff_description['{} index dumps differ'.format(shard_name)].append(filename)

    def compare_shards(self):
        left_tiers = set()
        right_tiers = set()
        for tier in self.TIER_DIRS:
            tier_dir_path = pj(self.testing_out_stuff_dir, tier)
            pair_of_tier_dirs = self.get_pair_of_path(tier_dir_path)
            for tier_dir, tier_set in zip(pair_of_tier_dirs, (left_tiers, right_tiers)):
                if tier_dir:
                    tier_set.add(os.path.basename(tier_dir))

        self.diff_description['Tier set changes'] = self.diff_on_sets(left_tiers, right_tiers)

        for tier in sorted(left_tiers & right_tiers):
            tier_dir_path = pj(self.testing_out_stuff_dir, tier)
            single_shard = tier == 'index'
            if single_shard:
                left_shards = right_shards = set([tier])
            else:
                left_shards, right_shards = (
                    set(os.listdir(tier_dir))
                    for tier_dir in self.get_pair_of_path(tier_dir_path)
                )

            self.diff_description['Shard set changes'] = self.diff_on_sets(left_shards, right_shards)

            for shard in sorted(left_shards & right_shards):
                shard_dir_path = tier_dir_path if single_shard else pj(tier_dir_path, shard)
                shard_inwards_pair = self.get_shard_inwards(shard_dir_path)

                self.compare_index_files(shard_dir_path, shard_inwards_pair)
                self.compare_dump_files(shard_dir_path, shard_inwards_pair)

    @staticmethod
    def get_files_relative_paths(directory):
        if directory is None:
            return
        directory_absolute_path = os.path.abspath(directory)
        for dir_path, _, filenames in os.walk(directory_absolute_path):
            for filename in filenames:
                yield pj(dir_path[len(directory_absolute_path):].strip('/'), filename)

    def compare_table_dumps(self):
        """
        We expect that in self.TABLE_DUMPS_DIR table dumps are stored
        with paths relative to mr_prefix and tables data format is yson.
        Diff tool needs table id argument which consists of component name
        and table name. For example local path can be:
        selectionrank/20170703-072325/0/url_to_shard
        Component name would be "selectionrank", and table name would be "url_to_shard".
        """
        tables_dir = pj(self.testing_out_stuff_dir, self.TABLE_DUMPS_DIR)

        left_tables, right_tables = [
            set(self.get_files_relative_paths(table_dumps_dir))
            for table_dumps_dir in self.get_pair_of_path(tables_dir)]

        self.diff_description['Tables set changes'] = self.diff_on_sets(left_tables, right_tables)

        for file_relative_path in sorted(left_tables & right_tables):
            if os.path.dirname(file_relative_path) == '':
                continue

            filename = file_relative_path.replace('/', '__')
            filename_parts = file_relative_path.split(os.sep)
            output_path = pj(self.JUPITER_DIFF_DIR, filename + self.DIFF_POSTFIX + '.html')
            with open(output_path, 'w') as output_file:
                diff_cmd_list = (
                    [
                        pj(self.right_build_output, self.testing_out_stuff_dir, self.DIFF_TOOL_PATH),
                        'FilesDiff',
                        '--table-id', '{}:{}'.format(filename_parts[0], filename_parts[-1])
                    ] +
                    self.get_pair_of_path(pj(tables_dir, file_relative_path)))
                logging.debug('Running:\n%s', ' '.join(diff_cmd_list))
                returncode = sp.call(
                    diff_cmd_list,
                    stdout=output_file,
                    stderr=output_file
                )

            if returncode == 0:
                os.remove(output_path)
            elif returncode == 1:
                self.diff_description['Tables differ'].append(file_relative_path)
            else:
                raise sp.CalledProcessError(
                    returncode=returncode,
                    cmd=' '.join(diff_cmd_list),
                    output=open(output_path).read())

    def get_custom_files_set(self, dir_path):
        result = set()
        for filename in os.listdir(dir_path):
            if self.match_by_masks(filename, self.CUSTOM_FILES_DIFF_CHECK):
                result.add(filename)
        return result

    def compare_custom_files(self):
        left_files, right_files = [
            self.get_custom_files_set(dir_path)
            for dir_path in self.get_pair_of_path(self.testing_out_stuff_dir)
        ]

        self.diff_description['Custom files set changes'] = self.diff_on_sets(left_files, right_files)

        for filename in sorted(left_files & right_files):
            pair_of_files = self.PairToCompare(
                *self.get_pair_of_path(pj(self.testing_out_stuff_dir, filename)),
                name=filename
            )
            if self.run_diff_with_options(
                    pair_of_files,
                    output_path=pj(self.JUPITER_DIFF_DIR, filename + self.DIFF_POSTFIX)
            ):
                self.diff_description['Custom files differ'].append(filename)

    def compare(self, build_output1, build_output2, testing_out_stuff_dir):
        self.left_build_output = build_output1
        self.right_build_output = build_output2
        self.testing_out_stuff_dir = testing_out_stuff_dir
        self.diff_tool_path = pj(testing_out_stuff_dir, 'diff_tools', 'difftool')

        if os.path.exists(self.JUPITER_DIFF_DIR):
            shutil.rmtree(self.JUPITER_DIFF_DIR)

        os.mkdir(self.JUPITER_DIFF_DIR)

        self.compare_shards()
        self.compare_table_dumps()
        self.compare_custom_files()

        diff_description_text = ''
        link = ''

        if os.listdir(self.JUPITER_DIFF_DIR):
            diff_resource = JupiterDiff(
                self,
                'Jupiter integration test diff',
                self.JUPITER_DIFF_DIR,
                ttl=14
            )
            diff_data = sdk2.ResourceData(diff_resource)
            diff_data.path.mkdir(0o755, parents=True, exist_ok=True)
            diff_data.ready()
            link = 'If you strive to see detailed diff follow this link <a href="{0}">{0}</a>\n'.format(
                self.server.resource[diff_resource.id][:]["http"]["proxy"])

        for diff_section, item_list in sorted(self.diff_description.iteritems()):
            if not item_list:
                continue

            diff_description_text += '{}:\n'.format(diff_section)
            for item in sorted(item_list):
                diff_description_text += '\t{}\n'.format(item)
            diff_description_text += '\n'
        if diff_description_text:
            self.Context.diff_description = self.diff_description
            return TEMPLATE.format(link=link, diff_description=diff_description_text)
        else:
            return None

    @property
    def footer(self):
        diff_description = self.Context.diff_description
        output_html = ''
        if diff_description != ctm.NotExists:
            for diff_section, item_list in diff_description.iteritems():
                if not item_list:
                    continue

                output_html += '<table><thead><tr><h4>{}:</h4><tr></thead><tbody>'.format(diff_section)
                for item in sorted(item_list):
                    output_html += '<tr><td>{}</td></tr>'.format(item)
                output_html += '</tbody></table><br>'

            if output_html:
                return output_html
