import logging
import time
from collections import defaultdict, OrderedDict
import json
import re
import posixpath
import os
import textwrap

import requests
from requests.exceptions import RequestException
from sandbox import sdk2
from sandbox.sandboxsdk import environments
from sandbox.projects.yabs.qa.utils.general import makedirs_exist


logger = logging.getLogger(__name__)


YT_INPUT_ROW_LIMIT = 10 ** 8
YT_HEAVY_REQUEST_TIMEOUT = 10 * 60 * 1000
SECONDS_IN_HOUR = 3600
DEFAULT_YT_TOKEN_VAULT_NAME = 'yabs-cs-sb-yt-token'
DEFAULT_YT_TABLE_PATH = '//home/yabs-cs/monitoring/BinBasesStructureHash'
JUGGLER_PUSH_API_URL = 'http://juggler-push.search.yandex.net/events'
REPORT_DIR = 'report_dir'

SELECT_ROWS_QUERY = """
    BaseName,
    Path,
    CARDINALITY(Hash) AS UniqueHashCount,
    SUM(1) AS Count
FROM [{table_path}]
WHERE
    Timestamp >= {lower_timestamp} {other_conditions}
GROUP BY
    BaseName, Path
"""


def seq_to_csv(seq):
    return ','.join(map(lambda s: '"{}"'.format(s), seq))


def get_select_rows_query(table_path, lower_timestamp, deploy_ids=None):
    conds = []
    if deploy_ids:
        conds.append('AND DeployID IN ({})'.format(seq_to_csv(deploy_ids)))
    other_conditions = ' '.join(conds)
    return SELECT_ROWS_QUERY.format(**locals())


def send_raw_event_to_juggler(raw_event):
    """
    Send raw event to juggler
    https://wiki.yandex-team.ru/sm/juggler/objects/#raw-event

    :param dict raw_event: Raw event
        {
            "host": host,
            "service": service,
            "status": status,
            "description": description
        }
    :return: JSON response as dict or None
    :rtype: dict or NoneType
    """
    try:
        resp = requests.post(JUGGLER_PUSH_API_URL, json={
            'source': 'sandbox',
            'events': [raw_event]
        })
        resp.raise_for_status()
        resp_json = resp.json()
    except (RequestException, ValueError) as e:
        logger.exception(e)
        return None
    return resp_json


class YabsBasesStructuresHashesCheckReport(sdk2.Resource):
    """
    Resource with a report on unmodified base structures
    """


class YabsBasesStructuresHashesCheck(sdk2.Task):
    """
    Check Yabs bases structures hashes.
    """
    class Requirements(sdk2.Requirements):
        cores = 1
        ram = 8192
        environments = (
            environments.PipEnvironment('yandex-yt', use_wheel=True),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet', use_wheel=True),
            environments.PipEnvironment('jinja2'),
        )

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        hash_unmodified_time_threshold = sdk2.parameters.Integer(
            'Hash unmodified time threshold in hours', default=10, required=True)
        bases_path_blacklist = sdk2.parameters.List('Blacklist of base path prefixes')
        cs_deploy_ids = sdk2.parameters.List(
            'CS deploy id list', default=['yabscs_1', 'yabscs_2'], required=True)
        juggler_host = sdk2.parameters.String('Juggler host', required=True)
        juggler_service = sdk2.parameters.String('Juggler service', required=True)
        juggler_tags = sdk2.parameters.List('List of juggler tags')
        yt_table_path = sdk2.parameters.String(
            'Table path', default=DEFAULT_YT_TABLE_PATH, required=True)
        yt_clusters = sdk2.parameters.List(
            'YT clusters list', default=['hahn', 'arnold'], required=True)
        yt_token_vault_name = sdk2.parameters.String(
            'Vault name for YT token',
            default=DEFAULT_YT_TOKEN_VAULT_NAME)

    def on_execute(self):
        time_threshold_seconds = \
            self.Parameters.hash_unmodified_time_threshold * SECONDS_IN_HOUR
        query = get_select_rows_query(
            table_path=self.Parameters.yt_table_path,
            lower_timestamp=int(time.time()) - time_threshold_seconds,
            deploy_ids=self.Parameters.cs_deploy_ids
        )

        unmodified_paths_by_base = {}
        # unmodified_paths_by_base = {
        #    'db1': {
        #        'path1': ['hahn']  -> unmodified on hahn
        #    },
        #    'db2': {
        #        'path2': ['hahn', 'arnold']  -> unmodified on hahn and arnold
        #    },
        #    'db3': {}  -> OK
        # }
        for cluster in self.Parameters.yt_clusters:
            self.check_on_cluster(cluster, query, unmodified_paths_by_base)

        logger.debug(
            'Unmodified paths by base: %s',
            json.dumps(unmodified_paths_by_base, indent=4, sort_keys=True)
        )

        path_count, aggregated_rows = self.aggregate_rows(unmodified_paths_by_base)
        report_url = self.make_report(
            REPORT_DIR,
            aggregated_rows,
            unmodified_paths_by_base
        )
        juggler_response = self.send_status_to_juggler(report_url, path_count)
        if juggler_response is None:
            logger.error('Failed to send Juggler event')

    def aggregate_rows(self, unmodified_paths_by_base):
        aggregated = defaultdict(set)
        path_regexp = re.compile(r'\[\d+\]')
        base_tag_regexp = re.compile(r'\_?\d+$')
        for base, paths_dict in unmodified_paths_by_base.iteritems():
            paths = set()
            # path, cluster_list in loop
            for path, _ in paths_dict.iteritems():
                # bases.linear_model_index.factor[1].raw_hash_compact.Data ->
                # bases.linear_model_index.factor[100500].raw_hash_compact.Data ->
                # -> bases.linear_model_index.factor.raw_hash_compact.Data
                paths.add(path_regexp.sub('', path))
            # st004 -> st
            # dblm_04 -> dblm
            base_tag = base_tag_regexp.sub('', base)
            aggregated[base_tag] |= paths

        aggregated_ordered = OrderedDict(
            sorted(aggregated.iteritems(), key=lambda t: t[0])
        )
        path_count = 0
        bases_path_blacklist = tuple(self.Parameters.bases_path_blacklist)
        for base_tag, paths_set in aggregated_ordered.iteritems():
            paths = [p for p in paths_set if not p.startswith(bases_path_blacklist)]
            path_count += len(paths)
            aggregated_ordered[base_tag] = sorted(paths)
        return path_count, aggregated_ordered

    def check_on_cluster(self, cluster, query, unmodified_paths_by_base):
        import yt.wrapper as yt

        yt_client = yt.YtClient(proxy=cluster, token=self.yt_token, config={
            'proxy': {
                'heavy_request_timeout': YT_HEAVY_REQUEST_TIMEOUT,
            },
        })
        for row in yt_client.select_rows(query, input_row_limit=YT_INPUT_ROW_LIMIT):
            base, path = row['BaseName'], row['Path']
            if base not in unmodified_paths_by_base:
                unmodified_paths_by_base[base] = defaultdict(list)

            if row['UniqueHashCount'] == 1 and row['Count'] > 1:
                unmodified_paths_by_base[base][path].append(cluster)

    def send_status_to_juggler(self, report_url, unmodified_path_count):
        juggler_event = {
            'host': self.Parameters.juggler_host,
            'service': self.Parameters.juggler_service,
            'tags': self.Parameters.juggler_tags
        }
        if unmodified_path_count > 0:
            juggler_event.update({
                'status': 'WARN',
                'description': self.get_warning_description(
                    report_url, unmodified_path_count
                )
            })
        else:
            juggler_event.update({
                'status': 'OK',
                'description': 'All structures up-to-date'
            })

        logger.debug(
            'Juggler event: %s',
            json.dumps(juggler_event, indent=4, sort_keys=True)
        )
        return send_raw_event_to_juggler(juggler_event)

    @property
    def yt_token(self):
        return sdk2.Vault.data(self.Parameters.yt_token_vault_name)

    def get_warning_description(self, report_url, unmodified_paths_count):
        """
        :param report_url str: HTML report url
        :param unmodified_paths_count int: Total count of unmodified paths
        :return: Juggler warning description
        :rtype: str or unicode
        """
        template = '{unmodified_paths_count} structures unmodified ' \
                   'for more than {time_threshold} hours. Report url: {report_url}'
        return template.format(
            unmodified_paths_count=unmodified_paths_count,
            time_threshold=self.Parameters.hash_unmodified_time_threshold,
            report_url=report_url
        )

    def make_report(self, report_dir, aggregated_rows, unmodified_paths_by_base):
        html_report_filename = 'index.html'
        json_report_filename = 'unmodified_bases_structures.json'

        makedirs_exist(report_dir)
        resource = YabsBasesStructuresHashesCheckReport(
            self, 'Report resource', report_dir
        )

        html_report_url = posixpath.join(resource.http_proxy, html_report_filename)
        html_report_path = os.path.join(report_dir, html_report_filename)
        json_report_path = os.path.join(report_dir, json_report_filename)

        with open(html_report_path, 'w') as f:
            f.write(self.get_report_html(aggregated_rows, json_report_filename))

        with open(json_report_path, 'w') as f:
            json.dump(unmodified_paths_by_base, f, indent=4, sort_keys=True)

        self.Context.short_report_text = 'Link: {}'.format(html_report_url)
        self.Context.short_report_link = html_report_url
        return html_report_url

    def get_report_html(self, aggregated_rows, json_report_filename):
        import jinja2

        jinja_environment = jinja2.Environment(
            lstrip_blocks=True,
            trim_blocks=True
        )

        template_text = textwrap.dedent('''\
            <html>
            <head>
            <style>
                #container {
                    width: 1060px;
                    margin: 20px auto;
                }
                .flat-table {
                    display: block;
                    font-family: sans-serif;
                    -webkit-font-smoothing: antialiased;
                    font-size: 105%;
                    overflow: auto;
                    width: auto;
                }
                th {
                    background-color: #576e73;
                    color: white;
                    font-weight: normal;
                    padding: 10px 20px;
                    text-align: center;
                }
                td {
                    font-family: monospace;
                    font-weight: 800;
                    color: #3a3a3a;
                    padding: 8px 15px;
                }
                .hidden {
                    display: none;
                }
                .ok {
                    background: #e1fdef;
                }
                .fail {
                    background: #ffe5d9;
                }
            </style>
            </head>
            <body>
                <div id="container">
                    <h2>YABS bases unmodified structure hashes aggregated report</h2>
                    <p>To see full info follow <a href="{{ json_report_filename }}" target="_blank">this link</a></p>
                    <p>
                        <input type="checkbox" checked id="hide-checkbox">
                        <label for="hide-checkbox">Hide up-to-date bases</label>
                    </p>
                    <table class="flat-table">
                        <tr>
                            <th>Base</th>
                            <th>Path</th>
                            <th>Delta, hours</th>
                        </tr>
                        {% for base, paths in aggregated_rows.iteritems() %}
                            {% if not paths %}
                            <tr class="ok hidden">
                                <td colspan=>{{ base }}</td>
                                <td colspan="2">All structures up-to-date</td>
                            </tr>
                            {% else %}
                                {% for path in paths %}
                                    <tr class="fail">
                                        <td>{{ base }}</td>
                                        <td>{{ path }}</td>
                                        <td>&gt;{{ time_threshold }}</td>
                                    </tr>
                                {% endfor %}
                            {% endif %}
                        {% endfor %}
                    </table>
                </div>
                <script>
                    (function() {
                        const okRows = document.querySelectorAll('.ok');
                        const checkbox = document.getElementById('hide-checkbox');
                        checkbox.addEventListener('change', (e) => {
                            okRows.forEach((row) => {
                                row.classList.toggle('hidden', e.target.checked);
                            });
                        });
                    })();
                </script>
            </body>
            </html>
        ''')
        template = jinja_environment.from_string(template_text)
        return template.render(
            aggregated_rows=aggregated_rows,
            json_report_filename=json_report_filename,
            time_threshold=self.Parameters.hash_unmodified_time_threshold,
        )
