# -*- coding: utf-8 -*-

import logging
import os
import sandbox.sandboxsdk.task as sdk_task
import sandbox.sandboxsdk.parameters as sdk_parameters
import sandbox.sandboxsdk.paths as sdk_paths

import sandbox.projects.resource_types as resource_types
import sandbox.projects.common.mapreduce_stored_tables as mrst
import sandbox.sandboxsdk.errors as sdk_errors
from sandbox.sandboxsdk.process import run_process


class ProgramParameter(sdk_parameters.ResourceSelector):
    name = 'program_resid'
    description = 'program resource'
    resource_type = resource_types.IMAGES_MR_INDEX_METADOC


class InputATablesParameter(sdk_parameters.ResourceSelector):
    name = 'input_tablesA'
    description = 'input tables A'
    resource_type = resource_types.USERDATA_TABLES_ARCHIVE


class InputBTablesParameter(sdk_parameters.ResourceSelector):
    name = 'input_tablesB'
    description = 'input tables B'
    resource_type = resource_types.USERDATA_TABLES_ARCHIVE


class ResultResourceDescriptionParameter(sdk_parameters.SandboxStringParameter):
    name = 'resource_description'
    description = 'Resource name for diff'


class ImagesTestMrindexCompareTables(sdk_task.SandboxTask):
    """
        Таск для сравнения результатов тасков IMAGES_TEST_MRINDEX
    """
    type = 'IMAGES_TEST_MRINDEX_COMPARE_TABLES'
    input_parameters = [
        ProgramParameter,
        InputATablesParameter,
        InputBTablesParameter,
        ResultResourceDescriptionParameter,
    ]
    server_runner = None

    def on_execute(self):
        self.run()

    @staticmethod
    def tabledesc2dict(desc):
        result = {}
        for t in desc:
            result[t['tablename']] = t
        return result

    def run(self):
        self.program = self.sync_resource(self.ctx[ProgramParameter.name])
        self.dump_dir = sdk_paths.make_folder('dump_dir')
        self.diff_dir = sdk_paths.make_folder('diff_dir')

        with open('diff_dir/.placeholder', 'w') as f:
            f.write("placeholder\n")

        with open('dump_dir/.placeholder', 'w') as f:
            f.write("placeholder\n")

        mr_syncer = mrst.MapreduceStoredTables(None, None)
        descs_a = self.tabledesc2dict(mr_syncer.resource2tabledescs(self, self.ctx[InputATablesParameter.name]))
        descs_b = self.tabledesc2dict(mr_syncer.resource2tabledescs(self, self.ctx[InputBTablesParameter.name]))

        all_tables = sorted(list(set(descs_a.keys()).union(descs_b.keys())))

        has_diff = False
        for t in all_tables:
            if t not in descs_a:
                logging.info("table %s not in set A" % t)
                with open('diff_dir/changed_files', 'a') as f:
                    f.write("table %s not in set A\n" % t)
                has_diff = True
                continue
            if t not in descs_b:
                logging.info("table %s not in set B" % t)

                with open('diff_dir/changed_files', 'a') as f:
                    f.write("table %s not in set B\n" % t)
                has_diff = True
                continue
            logging.info("comparing %s" % t)

            if self.compare_tables(t, descs_a[t], descs_b[t]):
                has_diff = True

        self.ctx['has_diff'] = has_diff

        dump_resource = self.create_resource("Dir with dumps",
                                             self.dump_dir,
                                             resource_types.OTHER_RESOURCE)
        self.mark_resource_ready(dump_resource.id)

        diff_resource = self.create_resource("Dir with diffs",
                                             "diff_dir",
                                             resource_types.IMAGES_MR_INDEX_DIFF)
        self.mark_resource_ready(diff_resource.id)

    def compare_tables(self, name, desc_a, desc_b):
        mode = 'DumpNIndex_TMetaDocPB'
        modes_for_suffixes = [
            ('metadoc',                     '$bike DumpNIndex_TMetaDocPB'),
            ('erf',                         '$bike DumpNIndex_TErfPB'),
            ('url',                         '$bike DumpNImageDB_TImageUrlPB'),
            ('unknown',                     '$bike DumpNIndex_TAnnPB'),
            ('unknown',                     '$bike DumpNIndex_TErfPB'),
            ('unknown',                     '$bike DumpNIndex_TGroupsPB'),
            ('unknown',                     '$bike DumpNIndex_TImgDlErfPB'),
            ('inputdoc.links',              '$bike DumpNIndex_TInputDocPB'),
            ('inputdoc.images.url',         '$bike DumpNIndex_TInputDocPB'),
            ('inputdoc.images',             '$bike DumpNIndex_TInputDocPB'),
            ('unknown',                     '$bike DumpNIndex_TLinkPB'),
            ('unknown',                     '$bike DumpNIndex_TPornoPB'),
            ('unknown',                     '$bike DumpNIndex_TRegErfPB'),
            ('unknown',                     '$bike DumpNIndex_TSemidupSignaturesPB'),
            ('unknown',                     '$bike DumpNIndex_TThumbIdentitiesPB'),
            ('unknown',                     '$bike DumpNIndex_TThumbSubgroupInfoPB'),
            ('unknown',                     '$bike DumpNIndex_TUrlSeqPB'),
            ('portion',                     '$bike DumpNIndex_TPortionPB'),
            ('selectionrank',               '$bike DumpNIndex_TSRDocumentPB'),
            ('input:images.rankingentry',   '$bike DumpNPlanner_TRankingEntryListPB'),
            ('ranked.image.page',           '$bike DumpNPlanner_TRankingEntryPB'),
            ('unknown',                     '$bike DumpNPlanner_TRankingFactorsPB'),
            ('unknown',                     '$bike DumpNPlanner_TRankingIdentityPB'),
            ('images.url.key',              '$bike DumpHex'),
            ('images.key',                  '$bike DumpHex'),
            ('images.documents.key',        '$bike DumpHex'),
            ('imageurl2document',           '$bike DumpHex'),
            ('selecteddocuments',           '$bike DumpHex'),
            ('statistics.definedocument',   '$bike DumpHex'),
            ('result:userdoc',              '$bike DumpHex'),
            ('result:queryinfo',            '$bike DumpHex'),
            ('factors:showsclicks',         '$bike DumpHex'),
            ('parse.mapped',                '$bike DumpNKiwiWorm_TRecord'),
            ('parse.export',                '$bike DumpNKiwiWorm_TRecord -K'),
            ('links.0',                     '$bike DumpNLinkDB_TLinkBasePB'),
            ('links.1',                     '$bike DumpNLinkDB_TLinkBasePB'),
            ('links.2',                     '$bike DumpNLinkDB_TLinkBasePB'),
            ('links.3',                     '$bike DumpNLinkDB_TLinkBasePB'),
            ('0:rankingentry',              '$bike DumpNLinkDB_TRankingEntryPB'),
            ('full_table_list',             'sort'),
            ('cbir:shard',                  '$bike DumpNIndex_TCbirShardDataPB'),
            ('cbir:shard.pics',             '$bike DumpHex'),
            ('cbir:shard.pics.ranged',      '$bike DumpNIndex_TCbirShardDataPB'),
            ('url.0',                       '$bike DumpNImageDB_TImageUrlPB'),
            ('url.1',                       '$bike DumpNImageDB_TImageUrlPB'),
            ('url.2',                       '$bike DumpNImageDB_TImageUrlPB'),
            ('url.delta',                   '$bike DumpNImageDB_TImageUrlPB'),
            ('url.deleted',                 '$bike DumpNImageDB_TImageUrlPB'),
            ('url.redirects',               '$bike DumpNImageDB_TImageUrlPB'),
            ('url.sources',                 '$bike DumpHex'),
            ('url.delta',                   '$bike DumpNImageDB_TImageUrlPB'),
        ]
        logging.info("Selecting dumper for table %s" % name)
        for (suffix, m) in modes_for_suffixes:
            if name.endswith(suffix + '.sorted.lenval.gz') or name.endswith(suffix + '.lenval.gz') or name.endswith(suffix):
                logging.info("name %s matched suffix %s" % (name, suffix))
                mode = m
                break
        logging.info("mode %s" % mode)
        diff_name = name.replace('/', ':')

        run_process('''
            PS4='+\\t> '
            set -x
            bike={bike}
            zcat {A} | {mode} > dump_dir/A-{name}.txt
            zcat {B} | {mode} > dump_dir/B-{name}.txt

            diff -U3 dump_dir/A-{name}.txt dump_dir/B-{name}.txt > diff_dir/{name}.diff
            diff --width=300 -y --suppress-common-lines dump_dir/A-{name}.txt dump_dir/B-{name}.txt > diff_dir/{name}.sbs.diff
            true
            '''.format(bike=self.program,
                       mode=mode,
                       A=desc_a['fullpath'],
                       B=desc_b['fullpath'],
                       name=diff_name),
                    shell=True,
                    log_prefix='diffs')
        try:
            run_process('''
            PS4='+\\t> '
            python -c '
import difflib
hd = difflib.HtmlDiff()
fa = open("dump_dir/A-{name}.txt")
fb = open("dump_dir/B-{name}.txt")
fo = open("diff_dir/{name}.diff.html", "w")
fo.write(hd.make_file(list(fa), list(fb), context=True))
fo.close()
                '
                '''.format(name=diff_name),
                        shell=True,
                        timeout=60,
                        log_prefix='diffs')
        except sdk_errors.SandboxSubprocessTimeoutError:
            pass

        has_diff = (os.path.getsize('diff_dir/{name}.diff'.format(name=diff_name)) > 0)
        if has_diff:
            logging.info("has diff")
        else:
            logging.info("no diff")
        return has_diff


__Task__ = ImagesTestMrindexCompareTables
