# ! /usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import json
import requests
from future.standard_library import install_aliases
install_aliases()
from urllib.parse import urlencode
from copy import deepcopy
import argparse
import hashlib
import copy
import datetime
import logging
import color
from saas.tools.devops.lib23.saas_ctype import SaaSCtype
from tvmauth import TvmClient, TvmApiClientSettings
from saas.library.python.token_store import TokenStore

SAAS_SP_TVMS = {
    "prestable": 2010902,
    "stable": 2011468,
    "stable_kv": 2011468,
    "stable_ydo": 2021358,
    "stable_ymusic": 2023466,
    "stable_drive": 2026981,
    "stable_hamster_turbo": 2026977,
    "stable_lavka": 2026042,
    "stable_market_idx": 2023672,
    "stable_zen": 2024715,
    "stable_turbo": 2021261,
    "stable_hamster_ydo": 2021135
}

OAUTH_APP_ID = 'af399115126043619012f286d5fb91ef'
OAUTH_APP_SECRET = '019b80c90f0d494b89a8419339d395df'

IS_KV_SERVICE = dict(
    stable_kv=True,
    stable_market=True,
    stable=False,
)

BALANCER_TIMEOUT_PARAM = 'balancertimeout=5000'

IS_TTY = os.isatty(sys.stdout.fileno())

response_template = {
    'backends':
        {
            'Kps': 0,
            'RtyServer':
                {
                    'Backends': [],
                    'Other errors': [],
                    'Shard range': ()},
            'Url': u''
        },
    'ferryman':
        {
            'Ferryman':
                {
                    'Shard index': u'',
                    'Tables':
                        {
                            'Table_map': [],
                            'final_states':
                                {
                                    'Documents': [],
                                    'common_prefix': u'',
                                    'longest_table_path': 0
                                },
                            'input_proto':
                                {
                                    'Documents': [],
                                    'common_prefix': u'',
                                    'longest_table_path': 0
                                },
                            'prev_state':
                                {
                                    'Documents': [],
                                    'common_prefix': u'',
                                    'longest_table_path': 0},
                            'state':
                                {
                                    'Documents': [],
                                    'common_prefix': u'',
                                    'longest_table_path': 0
                                },
                            'ytpull':
                                {
                                    'Documents': [],
                                    'common_prefix': u'',
                                    'longest_table_path': 0
                                }
                        }
                },
            'Ferryman hosts': [],
            'Ferryman urls': {},
            'Ferryman urls exception': {}
        }
}


def colored_header(str):
    return color.colored(str, 'blue', attrs=['bold'])


def parse_args(string_to_parse=None, get_defaults=False):
    p = argparse.ArgumentParser()
    p.add_argument('-s', '--service', required=True)
    p.add_argument('-c', '--ctype', required=True)
    p.add_argument('-u', '--url', required=True)
    p.add_argument('-k', '--kps', required=False, type=int, default=0)
    p.add_argument('--use-tvm', required=False, default=False, action='store_true',
                   help='Включение tvm-функциональности для хождения в бекенды')
    p.add_argument('--src-tvm-id', required=False, type=int, default=2011468,
                   help='Укажите tvm-id своего сервиса, если получаете ошибки, связанные с tvm')
    p.add_argument('--dst-tvm-id', required=False, type=int, default=None,
                   help='(advanced) Ручной выбор searchproxy tvm-id. Если опция не указана, тулза постарается угадать (и это должно работать)')
    p.add_argument('--yql-day-count', type=int, required=False, default=0, metavar='DAYS',
                   help='Временной период, за который будет осуществлён grep по логам rtyserver')
    p.add_argument('--yql-df-dst-table', required=False, metavar='PATH',
                   help='Таблица для записи строк из docfetcher-лога rtyserver')
    p.add_argument('--yql-index-dst-table', required=False, metavar='PATH',
                   help='Таблица для записи строк из index-лога rtyserver')
    p.add_argument('--ferryman-host', required=False, metavar='HOST:PORT',
                   help='Хост Ferryman для использования вместо автоматически определяемого')
    p.add_argument('--ferryman-show-not-found', action='store_true', default=False, required=False,
                   help='Выводить все таблицы Ferryman ytpull, даже те, в которых не был найден документ')
    p.add_argument('--ignore-dc', required=False, action='append', default=[], metavar='DATA_CENTER',
                   help='Исключать датацентры при обходе бэкендов rtyserver')

    # for prestable only
    group = p.add_mutually_exclusive_group()
    group.add_argument('--kv', action='store_true', dest='is_kv', required=False,
                       help='Возможность указать, что сервис является kv-сервисом (для --ctype=prestable)')
    group.add_argument('--full-text', action='store_true', dest='is_full_text', required=False,
                       help='Возможность указать, что сервис является full-text-сервисом (для --ctype=prestable)')

    default_actions = {}
    for action in p._actions:
        default_actions[action.dest] = action.default

    if get_defaults:
        return default_actions
    else:
        return p.parse_args(string_to_parse)


class HostReponse(object):
    def __init__(self, json_data, is_kv):
        self.is_kv = is_kv
        self.hostname = json_data['Host']
        self.shard_range = tuple(map(int, json_data['Shards'].split('-')))
        self.document = None
        self.document_hash = None

        self.error = json_data.get('Error')

        resp = json_data.get('Response')
        assert resp or self.error

        if not resp:
            return

        self.document = resp['Grouping'][0]['Group'][0]['Document'][0] if 'Grouping' in resp else None
        self.document_hash = self.__calc_document_hash(self.document) if self.document else None

    def __calc_document_hash(self, doc):
        return self.__do_calc_document_hash(self.__normalize_fields_for_digest(doc))

    def __normalize_fields_for_digest(self, src_doc):
        doc = copy.deepcopy(src_doc)
        doc.pop('DocId')
        if not self.is_kv:
            doc.pop('Relevance')
            doc.pop('SRelevance')
        for attr in doc.get('ArchiveInfo', {}).get('GtaRelatedAttribute', []):
            if attr['Key'] == '_MimeType':
                attr['Value'] = 'XXX'
        return doc

    def __do_calc_document_hash(self, doc):
        m = hashlib.md5()
        m.update(json.dumps(doc, sort_keys=True).encode())
        return m.hexdigest()


def handle_backends(opts, result):
    results = result['backends']
    is_kv = IS_KV_SERVICE.get(opts.ctype)
    if is_kv is None:
        if opts.is_kv:
            is_kv = True
        elif opts.is_full_text:
            is_kv = False
        else:
            raise RuntimeError("Can't determine if service is Kv or FullText. Use --kv or --full-text")

    if is_kv:
        cgi_params = {
            'meta_search': 'first_found',
            'normal_kv_report': 'da',
            'sp_meta_search': 'proxy',
            'text': opts.url,
            'sgkps': opts.kps,
        }
    else:
        cgi_params = {
            'text': 'url:' + opts.url,
            'kps': opts.kps,
            'hr': 'da',
            'ms': 'proto',
        }

    cgi_params.update({
        'skip-wizard': 1,
        'timeout': 9999999,
        'service': opts.service,
    })

    saas_ctype = SaaSCtype(opts.ctype)

    all_responses = []
    headers = {}
    if opts.use_tvm:
        logging.debug('trying to get tvm ticket')
        dst_tvm_id = opts.dst_tvm_id
        if dst_tvm_id is None:
            dst_tvm_id = SAAS_SP_TVMS.get(opts.ctype, 2011468)
        tvm_token = TokenStore.get_token_from_store_or_env('tvm_searchproxy_stable')
        tvm_settings = TvmApiClientSettings(self_secret=tvm_token, self_tvm_id=opts.src_tvm_id,
                                            dsts={'service': dst_tvm_id})
        tvm_client = TvmClient(settings=tvm_settings)
        ticket = tvm_client.get_service_ticket_for('service')
        headers['X-Ya-Service-Ticket'] = ticket

    for dc in ['sas', 'vla']:
        if dc in opts.ignore_dc:
            continue

        url = ''
        for proxy in saas_ctype.get_all_proxies_from_dc(dc):
            test_url = 'http://' + proxy['id'] + '/broadcast/?' + urlencode(cgi_params)
            if url != '':
                break
            try:
                requests.get(test_url)
                url = test_url
            except Exception:
                continue

        data = {}
        try:
            resp = requests.get(url, headers=headers)
            data = resp.json()
        except Exception as e:
            logging.exception("Can't get response for '%s': %s" % (url, e))
        all_responses.append(data)

    response_by_host = {}

    shards_with_document = set()
    has_errors = False
    for dc_resp in all_responses:
        for host_resp in dc_resp:
            host_data = HostReponse(host_resp, is_kv=is_kv)
            if host_data.error:
                has_errors = True
            if '@' not in host_data.hostname:  # Avoid endpointsets
                response_by_host[host_data.hostname] = host_data
                if host_data.document:
                    shards_with_document.add(host_data.shard_range)

    assert len(shards_with_document) <= 1  # FIXME

    results['Url'] = opts.url
    results['Kps'] = opts.kps
    results["RtyServer"] = {}

    found_shard_range = list(shards_with_document)[0] if shards_with_document else None
    if found_shard_range:
        results["RtyServer"]["Shard range"] = found_shard_range
    else:
        results["RtyServer"]["Shard range"] = (-1, -1)
        logging.error('Document not found')

    hosts_reps = sorted(response_by_host.values(), key=lambda h: h.hostname)

    if found_shard_range:
        has_errors_in_other_shards = False
        results["RtyServer"]["Backends"] = []
        for host_data in hosts_reps:
            if host_data.shard_range == found_shard_range:
                results["RtyServer"]["Backends"].append(host_data)
            elif host_data.error:
                has_errors_in_other_shards = True

        if has_errors_in_other_shards:
            results["RtyServer"]["Other errors"] = []
            for host_data in hosts_reps:
                if host_data.shard_range != found_shard_range and host_data.error:
                    results["RtyServer"]["Other errors"].append(host_data)

    elif has_errors:
        results["RtyServer"]["Errors"] = []
        for host_data in hosts_reps:
            if host_data.error:
                results["RtyServer"]["Errors"].append(host_data)
    return results


def detect_ferryman_instances(service, ctype):
    dm_info = requests.get('http://saas-dm.yandex.net/process_sla_description?' +
                           urlencode({
                               'action': 'get',
                               'service': service,
                               'ctype': ctype,
                               'service_type': 'rtyserver'
                           })).json()
    if not dm_info.get('ferrymans'):
        return list()
    nanny_service_name = dm_info['ferrymans'][0]

    nanny_instances = requests.get(
        'http://nanny.yandex-team.ru/v2/services/%s/current_state/instances/' % nanny_service_name).json()

    return ['%s:%d' % (i['container_hostname'], i['port']) for i in nanny_instances['result']]


def format_time(ts):
    return datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%dT%H:%M:%S")


def handle_ferryman(opts, result):
    results = result['ferryman']
    # ferryman_host = opts.ferryman_host if opts.ferryman_host else opts.service + ".ferryman.n.yandex-team.ru"
    ferryman_hosts = [opts.ferryman_host] \
        if opts.ferryman_host \
        else detect_ferryman_instances(service=opts.service, ctype=opts.ctype)

    if not ferryman_hosts:
        return results

    # TODO Insert slb first

    ferryman_response = None
    results["Ferryman hosts"] = ferryman_hosts
    results['Ferryman urls exception'] = {}
    results['Ferryman urls'] = {}

    for host in ferryman_hosts:
        results['Ferryman urls exception'][host] = False
        try:
            url = "http://" + host + "/find_url?" \
                  + urlencode({
                'url': opts.url,
                'thread_count': 100,
                'kps': opts.kps,
                'show_not_found': int(opts.ferryman_show_not_found),
            })

            results["Ferryman urls"][host] = url

            ferryman_response = requests.get(url)
            ferryman_response.raise_for_status()
        except requests.exceptions.RequestException as ferryman_exception:
            results['Ferryman urls exception'][host] = True
            logging.error("Failed for ferryman on host %s: %s" % (host, ferryman_exception))
        else:
            break

    if not ferryman_response and ferryman_hosts:
        raise requests.exceptions.RequestException("Can't connect to ferrymans")

    ferryman_answer = ferryman_response.json()

    results["Ferryman"] = {}
    results["Ferryman"]["Shard index"] = ferryman_answer['document']['shard_index']

    def skip_prefix(str, prefix):
        if not str.startswith(prefix):
            raise RuntimeError("%s and %s" % (str, prefix))
        return str[len(prefix):]

    root_path = ferryman_answer['root_path']
    ytpull_path = ferryman_answer.get('ytpull_path') or ferryman_answer.get('yt_pull_path')

    def output_ferryman_table(table, longest_table_path=None, common_prefix=None, ):
        # table_exists = not ('has no child' in table.get('error', ''))  # TODO Better

        path_padding = longest_table_path - len(table['path']) if longest_table_path is not None else 0
        path = skip_prefix(table['path'], common_prefix + '/')

        mtime = datetime.datetime.fromtimestamp(table['table_mtime']) if 'table_mtime' in table else None  # TODO Better
        doc = table.get('doc')

        color_by_message_type = {
            'MODIFY_DOCUMENT': 'green',
            'ADD_DOCUMENT': 'yellow',
            'DEPRECATED__UPDATE_DOCUMENT': 'blue',
            'DELETE_DOCUMENT': 'magenta',
        }

        fields = {
            'table_mtime': mtime.strftime("%Y-%m-%dT%H:%M:%S") if mtime else 'error',
            'table_path': path,
            'path_padding': ' ' * path_padding,
            'deadline': '-',
        }

        if doc:
            assert 'kps' not in doc or opts.kps == doc['kps']
            message_type = doc['message_type']
            fields['doc_mtime'] = doc['timestamp']
            fields['action'] = message_type
            fields['action_color'] = None if path.startswith('reverse.') else color_by_message_type[message_type]
            if 'row_ttl' in doc:
                fields['deadline'] = format_time(doc['timestamp'] + doc['row_ttl']) + color.colored('F', 'blue')
            elif 'doc_deadline' in doc:
                fields['deadline'] = format_time(doc['doc_deadline']) + color.colored('B', 'magenta')
        else:
            fields['doc_mtime'] = '-'
            fields['action'] = '-'

        return fields

    results["Ferryman"]["Tables"] = {}

    ferryman_table_map = [
        ('input_proto', 'input_proto'),
        ('state', 'state'),
        ('prev_state', 'state_prev'),
        ('final_states', 'final'),
        ('ytpull', 'ytpull'),
    ]

    results["Ferryman"]["Tables"]['Table_map'] = ferryman_table_map
    for table_group_name, table_group_title in ferryman_table_map:
        results["Ferryman"]["Tables"][table_group_name] = {}

        data = ferryman_answer['tables'][table_group_name]

        common_prefix = ytpull_path if table_group_name == 'ytpull' else root_path

        # TODO
        if table_group_name == 'final_states':
            data.sort(key=lambda t: t['path'], reverse=True)

        if isinstance(data, dict):
            longest_table_path = len(data['path'])
        elif isinstance(data, list):
            longest_table_path = 0
            for table in data:
                longest_table_path = max(longest_table_path, len(table['path']))
        else:
            raise ValueError()

        results["Ferryman"]["Tables"][table_group_name]['longest_table_path'] = longest_table_path
        results["Ferryman"]["Tables"][table_group_name]['common_prefix'] = common_prefix
        results["Ferryman"]["Tables"][table_group_name]['Documents'] = []

        if isinstance(data, dict):
            results["Ferryman"]["Tables"][table_group_name]['Documents'].append(output_ferryman_table(data, common_prefix=common_prefix))
        elif isinstance(data, list):
            for table in data:
                results["Ferryman"]["Tables"][table_group_name]['Documents'].append(output_ferryman_table(table, longest_table_path, common_prefix))
        else:
            raise ValueError()

    return results


def output_backend_results(results, console=True, colored=True):
    string_to_return = ''
    string_to_return += (colored_header("Url: ") if colored else "Url: ") + str(results['Url']) + "\n"
    string_to_return += (colored_header("Kps: ") if colored else "Kps: ") + str(results['Kps']) + "\n\n"
    string_to_return += (colored_header("RtyServer") if colored else "RtyServer") + '\n'
    if results['RtyServer']['Shard range'] != (-1, -1):
        string_to_return += ' Shard range: ' + '-'.join(map(str, results['RtyServer']['Shard range'])) + '\n'
    else:
        string_to_return += ' Shard range: NO' + '\n'
    color_by_document_hash = {}

    def get_color(document_hash):
        available_doc_colors = list(color.COLORS.keys())
        doc_color = color_by_document_hash.get(document_hash)
        if not doc_color:
            doc_color = available_doc_colors.pop()
            color_by_document_hash[document_hash] = doc_color
        return doc_color

    def output_host(host_data):

        data = '  {} '.format(host_data.hostname)

        if host_data.document:
            doc_color = get_color(host_data.document_hash)
            data += (color.colored(host_data.document_hash, doc_color) if colored else host_data.document_hash) + '\n'
        elif host_data.error:
            data += (color.colored("Error:{0}".format(host_data.error), "red") if colored else "Error:{0}".format(host_data.error)) + '\n'
        else:
            data += (color.colored('Not Found', 'magenta') if colored else 'Not Found') + '\n'

        return data

    string_to_return += "\n"
    if results["RtyServer"]["Shard range"] != (-1, -1):
        string_to_return += ' Backends:\n'
        for host_data in results['RtyServer']['Backends']:
            string_to_return += output_host(host_data)

        if results["RtyServer"].get('Other errors') is not None:
            string_to_return += '\n'
            string_to_return += ' Other errors:\n'
            for host_data in results["RtyServer"]['Other errors']:
                string_to_return += output_host(host_data)

    elif results["RtyServer"].get('Errors') is not None:
        string_to_return += ' Errors:\n'
        for host_data in results["RtyServer"]['Errors']:
            string_to_return += output_host(host_data)

    if console:
        print(string_to_return)
    return string_to_return


def output_ferryman_results(results, console=True, colored=True):
    string_to_return = ''
    for host in results["Ferryman hosts"]:
        if not results['Ferryman urls exception'].get(host):
            string_to_return += host + '\n'
            string_to_return += results["Ferryman urls"][host] + '\n'
            break

    string_to_return += (colored_header("Ferryman") if colored else "Ferryman") + '\n'
    string_to_return += " Shard index:" + results["Ferryman"]["Shard index"] + '\n'

    def output_ferryman_table(fields):
        ca = fields['action']
        if ca != '-' and fields['action_color'] is not None:
            ca = color.colored(fields['action'], fields['action_color']) if colored else fields['action']
        return '   {table_mtime}  {table_path}{path_padding} {doc_mtime:11}  {colored_action:15}  {deadline}'.format(colored_action=ca, **fields) + '\n'

    string_to_return += "\n Tables:\n"

    for table_group_name, table_group_title in results["Ferryman"]["Tables"]['Table_map']:
        string_to_return += (color.colored('  %s:' % table_group_title, 'yellow') if colored else '  %s:' % table_group_title) + '\n'

        longest_table_path = results["Ferryman"]["Tables"][table_group_name]['longest_table_path']
        common_prefix = results["Ferryman"]["Tables"][table_group_name]['common_prefix']

        string_to_return += '   ' + 'table_mtime'.ljust(19) + '  ' + 'table_path'.ljust(
            longest_table_path - len(common_prefix)) + ' ' + 'doc_mtime   action           deadline' + '\n'

        for el in results["Ferryman"]["Tables"][table_group_name]['Documents']:
            string_to_return += output_ferryman_table(el)
    if console:
        print(string_to_return)
    return string_to_return


def format_yql_query(logs_dir, service, key, day_count, dst_table=None):
    now = datetime.datetime.now()

    query = '''
USE hahn;
PRAGMA yt.ForceInferSchema = '1000';

'''

    if dst_table:
        query += 'INSERT INTO `%s` WITH TRUNCATE' % dst_table

    yql_args = dict(
        key=key,
        service=service,
        logs_dir=logs_dir,
        today=now.strftime('%Y-%m-%d'),
        days='|'.join([(now + datetime.timedelta(offset)).strftime('%Y-%m-%d') for offset in range(-1, -day_count, -1)])

    )

    query += '''
SELECT * FROM (
    SELECT *
    FROM LIKE(`{logs_dir}/30min`, '{today}T%')
    WHERE key = '{key}' AND service = '{service}'
'''.format(**yql_args)

    if day_count > 1:
        query += '''
    UNION ALL

    SELECT *
    FROM REGEXP(`{logs_dir}/1d`, '^({days})$')
    WHERE key = '{key}' AND service = '{service}'
'''.format(**yql_args)

    query += '''
)
ORDER BY iso_eventtime
'''

    return query


def handle_yql(opts, result):
    import library.python.oauth as lpo
    from yql.api.v1.client import YqlClient
    results = result

    yql_oauth = ""
    try:
        yql_oauth = TokenStore.get_token_from_store_or_env('YQL')
    except:
        logging.info("Can't get YQL token, falling back to YT")

    try:
        if not yql_oauth:
            yql_oauth = TokenStore.get_token_from_store_or_env('YT')
    except:
        logging.info("Can't get YT token")

    if not yql_oauth:
        oauth_token = lpo.get_token(OAUTH_APP_ID, OAUTH_APP_SECRET)

    client = YqlClient(token=yql_oauth)

    def run_query(logs_dir, dst_table):
        query = format_yql_query(
            logs_dir=logs_dir,
            service=opts.service,
            key=opts.url,
            day_count=opts.yql_day_count,
            dst_table=dst_table,
        )
        logging.debug(query)

        request = client.query(query, syntax_version=1)
        request.run()
        return request.web_url

    url = run_query(
        logs_dir='//logs/saas/{ctype}/docfetcher-log'.format(ctype=opts.ctype),
        dst_table=opts.yql_df_dst_table,
    )
    results['docfetcher'] = url

    url = run_query(
        logs_dir='//logs/saas/{ctype}/index-log'.format(ctype=opts.ctype),
        dst_table=opts.yql_index_dst_table,
    )
    results['index'] = url

    return results


def output_handle_yql(result, console=True):
    if console:
        print(result['docfetcher'])
        print(result['index'])
    return str(result['docfetcher']) + '\n' + str(result['index'])


def main(namespace=None,  console=False, return_string=False, colored=True, **opts):
    if namespace is None:
        res = parse_args(get_defaults=True)
        res.update(opts)
        namespace = argparse.Namespace(**res)
    result = deepcopy(response_template)
    res_string = ''
    result['backends'] = handle_backends(namespace, result)
    res_string += output_backend_results(result['backends'], console, colored)

    result['ferryman'] = handle_ferryman(namespace, result)
    if result['ferryman']:
        res_string += output_ferryman_results(result['ferryman'], console, colored)

    if namespace.yql_day_count:
        result['yql'] = handle_yql(namespace, result)
        res_string += output_handle_yql(result['yql'], console)
    elif namespace.yql_df_dst_table or namespace.yql_index_dst_table:
        logging.error("Please specify non-zero --yql-day-count=N")
        sys.exit(1)

    if return_string:
        return result, res_string
    else:
        return result


if __name__ == '__main__':
    opts = parse_args()
    main(opts, console=True)
