import datetime
import gzip
import json
import logging
import os
import posixpath
import re
import sys
import textwrap

from sandbox import common
from sandbox import sdk2
from sandbox.projects.common.yabs.server.requestlog import iterate
from sandbox.projects.common.yabs.server.util.general import try_get_from_vault
from sandbox.projects.yabs.nanpu import BS_NANPU_DIFF_RESULT, BS_NANPU_DIFF_REPORT
from sandbox.projects.yabs.qa.resource_types import YABS_REPORT_RESOURCE, YABS_SERVER_REQUEST_LOG_GZ
from sandbox.projects.yabs.qa.tasks.BsNanpuSimpleShootTask import BsNanpuResponseDump
from sandbox.projects.yabs.qa.tasks.YabsServerB2BFuncShootCmp import YabsServerB2BFuncShootCmp
from sandbox.projects.yabs.qa.tasks.YabsServerB2BFuncShootCmp.report import CmpReport
from sandbox.projects.yabs.qa.utils.resource import sync_resource
from sandbox.sandboxsdk import environments
from sandbox.sdk2.helpers import subprocess as sp


from processing import process_results
from default_painting import (
    DEFAULT_BASE64_PREFIXES,
    DEFAULT_BODY_SUBSTITUTES,
    DEFAULT_HEADERS_PAINTING,
    DEFAULT_LOGS_LIST,
    DEFAULT_JSON_PARSING_FIELDS_RE,
    DEFAULT_JSON_KEYS_TO_DELETE,
    DEFAULT_ENTITY_SUBSTITUTES,
    DEFAULT_BAD_REQUEST_IDS,
    DEFAULT_LOG_FIELDS_TO_IGNORE,
)


REPORT_DIR = 'report_dir'


class BsNanpuDiffTask(sdk2.Task):
    '''Diff task for Nanpu shoot results'''

    diff_results = []
    pre_codes = set()
    test_codes = set()
    handlers = set()
    tags = set()

    class Requirements(sdk2.Task.Requirements):
        environments = (
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet'),
        )

    class Context(sdk2.Task.Context):
        has_diff = True
        num_of_requests_with_diff = 0
        num_of_requests = 0

    class Parameters(sdk2.Task.Parameters):
        description = 'CMP task for results of Nanpu shoots.'

        with sdk2.parameters.Group('Response data') as response_data:
            preshoot_result = sdk2.parameters.Resource('Preshoot result dump', resource_type=BsNanpuResponseDump)
            patched_shoot_result = sdk2.parameters.Resource(
                'Patched shoot result dump', resource_type=BsNanpuResponseDump
            )

        with sdk2.parameters.Group('Other resources') as resources:
            template_resource = sdk2.parameters.Resource('Template resource', resource_type=YABS_REPORT_RESOURCE)
            requestlog_resource = sdk2.parameters.Resource(
                'Requestlog resource', resource_type=YABS_SERVER_REQUEST_LOG_GZ
            )

        with sdk2.parameters.Group('Diff settings') as diff_settings:
            headers_to_replace = sdk2.parameters.JSON('Headers painting', default=DEFAULT_HEADERS_PAINTING)
            body_substitutes = sdk2.parameters.JSON(
                'Substitutes applied to entity, logs, exts before json parsing', default=DEFAULT_BODY_SUBSTITUTES
            )
            base64_prefixes = sdk2.parameters.JSON(
                'Prefixes followed with base64 string in commas to decode in entity', default=DEFAULT_BASE64_PREFIXES
            )
            entity_substitutes = sdk2.parameters.JSON(
                'Substitutes applied to entity after all other processing right before comparison.'
                'You should use them, if you want to paint diff in base64 decoded parts, etc.',
                default=DEFAULT_ENTITY_SUBSTITUTES,
            )
            logs_to_compare = sdk2.parameters.List('List of logs to be compared', default=DEFAULT_LOGS_LIST)
            log_fields_to_ignore = sdk2.parameters.JSON('Log fields to ignore in comparison',
                                                        default=DEFAULT_LOG_FIELDS_TO_IGNORE)
            json_keys_to_delete = sdk2.parameters.JSON(
                'List of keys to be deleted from json entity during comparison', default=DEFAULT_JSON_KEYS_TO_DELETE
            )
            json_parsing_fields_re = sdk2.parameters.JSON(
                'List of additional entity regex to be parsed as json. Use lookbehind and lookahead to match only json.'
                'If json is string field in another json regex must catch " symbol',
                default=DEFAULT_JSON_PARSING_FIELDS_RE
            )
            bad_requests_ids = sdk2.parameters.List('Request IDs to ignore in comparison',
                                                    default=DEFAULT_BAD_REQUEST_IDS)

        with sdk2.parameters.Group('YT settings') as yt_settings:
            yt_token = sdk2.parameters.String('YT token name in Sandbox vault', default='nanpu_tests_yt_token')
            yt_ttl = sdk2.parameters.Integer('TTL for YT table', default=7)
            yt_job_count = sdk2.parameters.Integer('YT job count for reduce operation', default=12)
            yt_data_size_per_job = sdk2.parameters.Integer('YT data size per job', default=32 * 1024 * 1024)

        with sdk2.parameters.Group('Common settings') as common_settings:
            ttl = sdk2.parameters.Integer('TTL for diff results', default=7)
            n_jobs = sdk2.parameters.Integer('Number of threads for report processing', default=8)

        with sdk2.parameters.Output:
            diff_table_path = sdk2.parameters.String('YT path to table with diff')
            report = sdk2.parameters.Resource('Report resource', resource_type=BS_NANPU_DIFF_RESULT)

    @staticmethod
    def unpack_request_log(resource):
        path = str(sdk2.ResourceData(resource).path)
        with gzip.GzipFile(filename=path, mode='rb', mtime=0) as gz:
            requests = [req for req, _ in iterate(gz, '\n')]
        request_ids = [re.search(r'X-Yabs-Nanpu-Req-Id: (\d*)', x).group(1) for x in requests]

        for i in range(len(requests)):
            try:
                json.dumps(requests[i])
            except:
                requests[i] = 'Failed to parse request. Look for it in request-data-log resource.'

        return dict(zip(request_ids, requests))

    def read_yt_table(self, table_path, key_column, columns=None):
        columns_to_read = [key_column] + columns if columns is not None else None
        result = {}
        for row in self.yt.read_table(self.yt.TablePath(table_path, columns=columns_to_read)):
            key_value = row[key_column]
            columns_value = {col: row[col] for col in columns} if columns is not None else row
            result[key_value] = columns_value
        return result

    def create_report(self, preshoot_result_path, patched_shoot_result_path, diff_table_path):
        logging.info('Reading pre shoot results for report.')
        pre_data = self.read_yt_table(preshoot_result_path, 'RequestID', ['HttpCode', 'Url'])

        logging.info('Reading test shoot results for report.')
        test_data = self.read_yt_table(patched_shoot_result_path, 'RequestID', ['HttpCode', 'Url'])

        logging.info('Reading diff table for report.')
        diff_data = self.read_yt_table(diff_table_path, 'RequestID')

        logging.info('Unpaccking request log.')
        requests = self.unpack_request_log(self.Parameters.requestlog_resource)
        bad_requests_ids = self.Parameters.bad_requests_ids
        n_jobs = self.Parameters.n_jobs

        logging.info('Processing results.')
        results = process_results(requests, pre_data, test_data, diff_data, bad_requests_ids, n_jobs, REPORT_DIR)

        self.Context.num_of_requests = len(results)

        for result in results:
            if result['has_diff']:
                self.Context.num_of_requests_with_diff += 1

            self.pre_codes.add(result['pre_code'])
            self.test_codes.add(result['test_code'])
            self.handlers.add(result['handler'])
            self.tags.update(result['tags'])

            self.diff_results.append(
                {
                    'status': ('failed' if result['has_diff'] else 'passed'),
                    'search': {
                        'pre.code': result['pre_code'],
                        'test.code': result['test_code'],
                        'handler': result['handler'],
                        'tags': list(result['tags']),
                    },
                    'name': str(int(result['test_id'])),
                    'id': int(result['test_id']),
                    'diffLinesCount': 20,
                }
            )

        metadata = {
            "Tests": len(self.diff_results),
            "Failures": self.Context.num_of_requests_with_diff,
            "Default header paint": json.dumps(DEFAULT_HEADERS_PAINTING),
            "Current header paint": json.dumps(self.Parameters.headers_to_replace),
            "Default user substitutes": json.dumps(DEFAULT_BODY_SUBSTITUTES),
            "Current user substitutes": json.dumps(self.Parameters.body_substitutes),
            "Default logs list": json.dumps(DEFAULT_LOGS_LIST),
            "Current logs list": json.dumps(self.Parameters.logs_to_compare),
            "Default log fields to ignore": json.dumps(DEFAULT_LOG_FIELDS_TO_IGNORE),
            "Current log fields to ignore": json.dumps(self.Parameters.log_fields_to_ignore),
            "Default json keys to delete": json.dumps(DEFAULT_JSON_KEYS_TO_DELETE),
            "Current json keys to delete": json.dumps(self.Parameters.json_keys_to_delete),
            "Default additional entity regex to parse": json.dumps(DEFAULT_JSON_PARSING_FIELDS_RE),
            "Current additional entity regex to parse": json.dumps(self.Parameters.json_parsing_fields_re),
        }

        report = {
            'search': {
                'pre.code': list(self.pre_codes),
                'test.code': list(self.test_codes),
                'handler': list(self.handlers),
                'tags': list(self.tags),
            },
            'meta': [{'title': title, 'value': value} for title, value in metadata.iteritems()],
            'results': self.diff_results,
        }

        report_file_path = os.path.join(REPORT_DIR, 'report.json')
        with open(report_file_path, 'w') as report_file:
            json.dump(report, report_file)

        logging.info(
            'Finished. {} out of {} requests has diff.'.format(
                self.Context.num_of_requests_with_diff, self.Context.num_of_requests
            )
        )

        self.Context.has_diff = bool(self.Context.num_of_requests_with_diff)

        report_resource = BS_NANPU_DIFF_REPORT(self, 'Report resource', REPORT_DIR, ttl=self.Parameters.ttl)
        sdk2.ResourceData(report_resource).ready()

        report_url = YabsServerB2BFuncShootCmp.get_resource_url(self.id, REPORT_DIR, report_resource.id)
        diff_url = posixpath.join(report_url, 'index.html')

        execution_report = textwrap.dedent(
            '''\
            {failures} out of {total_tests} tests failed.
            <a href="{diff_url}" target="_blank">Diff viewer</a>
            <a href='{diff_table_url}' target='_blank'>Diff table</a>
        '''
        ).format(
            failures=self.Context.num_of_requests_with_diff,
            total_tests=self.Context.num_of_requests,
            diff_url=diff_url,
            diff_table_url='https://yt.yandex-team.ru/hahn/navigation?path={}'.format(diff_table_path),
        )

        self.set_info(execution_report, do_escape=False)

        with open('report.html', 'w') as file:
            file.write(execution_report.replace('\n', '<br>'))

        self.Parameters.report = BS_NANPU_DIFF_RESULT(self, 'Report resource', 'report.html', ttl=self.Parameters.ttl)
        sdk2.ResourceData(self.Parameters.report).ready()
        logging.info('Created resource with YT path.')

    def on_execute(self):
        import yt.wrapper as yt

        yt.config['proxy']['url'] = 'hahn'
        yt.config['token'] = try_get_from_vault(self, self.Parameters.yt_token)
        self.yt = yt

        results_directory = '//home/yabs-nanpu-sandbox-tests/diff_tasks_results'
        if not yt.exists(results_directory):
            logging.info('Creating directory for results in YT: {}'.format(results_directory))
            yt.mkdir(results_directory, recursive=True)

        preshoot_result_path = self.Parameters.preshoot_result.yt_table_path
        logging.info('Preshoot results path: {}'.format(preshoot_result_path))

        patched_shoot_result_path = self.Parameters.patched_shoot_result.yt_table_path
        logging.info('Patched shoot results path: {}'.format(patched_shoot_result_path))

        diff_path = yt.ypath_join(results_directory, '{task_id}_shoot_diff'.format(task_id=self.id))
        logging.info('Diff path: {}'.format(diff_path))

        if not os.path.exists(REPORT_DIR):
            os.mkdir(REPORT_DIR)
        template_path = sync_resource(resource=self.Parameters.template_resource, resource_type=YABS_REPORT_RESOURCE)
        CmpReport(REPORT_DIR).prepare(template_path)

        with yt.TempTable() as preshoot_result_path_sorted, yt.TempTable() as patched_shoot_result_path_sorted:
            logging.info('Sorting preshoot results.')
            yt.run_sort(
                source_table=preshoot_result_path, destination_table=preshoot_result_path_sorted, sort_by=['RequestID']
            )
            logging.info('Finished sorting.')

            logging.info('Sorting patched shoot results.')
            yt.run_sort(
                source_table=patched_shoot_result_path,
                destination_table=patched_shoot_result_path_sorted,
                sort_by=['RequestID'],
            )
            logging.info('Finished sorting.')

            logging.info('Comparing results.')
            file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'main.py')
            with sdk2.helpers.ProcessLog(self, logger='subprocess') as pl:
                pl.logger.propagate = 1
                headers_to_replace_json = json.dumps(self.Parameters.headers_to_replace)
                body_substitutes_json = json.dumps(self.Parameters.body_substitutes)
                base64_prefixes_json = json.dumps(self.Parameters.base64_prefixes)
                entity_substitutes_json = json.dumps(self.Parameters.entity_substitutes)
                logs_to_compare_json = json.dumps(self.Parameters.logs_to_compare)
                log_fields_to_ignore_json = json.dumps(self.Parameters.log_fields_to_ignore)
                json_keys_to_delete_json = json.dumps(self.Parameters.json_keys_to_delete)
                json_parsing_fields_re_json = json.dumps(self.Parameters.json_parsing_fields_re)
                return_code = sp.Popen(
                    [
                        sys.executable,
                        file_path,
                        str([preshoot_result_path_sorted, patched_shoot_result_path_sorted]),
                        diff_path,
                        str(self.Parameters.yt_job_count),
                        str(self.Parameters.yt_data_size_per_job),
                        headers_to_replace_json,
                        body_substitutes_json,
                        base64_prefixes_json,
                        entity_substitutes_json,
                        logs_to_compare_json,
                        log_fields_to_ignore_json,
                        json_keys_to_delete_json,
                        json_parsing_fields_re_json,
                    ],
                    stdout=pl.stdout,
                    stderr=pl.stderr,
                    env=dict(os.environ, YT_TOKEN=try_get_from_vault(self, self.Parameters.yt_token)),
                ).wait()
                if return_code != 0:
                    self.Context.has_diff = True
                    raise common.errors.TaskError('Failed subprocess with diff reduce task.')

            logging.info('Finished comparison.')

        logging.info('Setting expiration time for result table.')
        expiration_time = datetime.datetime.utcnow() + datetime.timedelta(days=self.Parameters.yt_ttl)
        self.yt.set(diff_path + '/@expiration_time', expiration_time.isoformat() + 'Z')
        logging.info('Set expiration time for result table to {}.'.format(expiration_time.isoformat()))
        self.Parameters.diff_table_path = diff_path

        self.create_report(preshoot_result_path, patched_shoot_result_path, diff_path)
