# -*- coding: utf-8 -*-
"""
Install:
1. Setup your YT token: https://oauth.yt.yandex.net/
2. Install requirements: pip install -i https://pypi.yandex-team.ru/simple/ -r requirements.txt
3. Run script: python fast_idd.py 2019-12-08

Active IDD problems:
https://st.yandex-team.ru/CHEMODAN/order:updated:false/filter?tags=%D0%B2%D0%B5%D1%80%D1%82%D0%B8%D0%BA%D0%B0%D0%BB%D1%8C%D0%BD%D1%8B%D0%B9_%D0%BC%D0%BE%D0%BD%D0%B8%D1%82%D0%BE%D1%80%D0%B8%D0%BD%D0%B3&resolution=empty()
"""
import json
import os
import re
from argparse import ArgumentParser
from collections import defaultdict
from datetime import timedelta, datetime
from yt.wrapper import YtClient


class YTIDDClient(object):
    cache_dir = '/tmp/fast_idd'
    base_dir = '//home/disk-stat/vertical/%i/%0.2i/%0.2i'
    yql_token = os.getenv("DISK_STAT_YQLTOKEN")
    client = YtClient(proxy='hahn', token=yql_token)

    @classmethod
    def get_long(cls, date):
        return cls.get(date, 'mpfs_requests')

    @classmethod
    def get_error(cls, date):
        return cls.get(date, 'mpfs_joined')

    @classmethod
    def get_all_nginx(cls, date):
        return cls.get(date, 'all_nginx')

    @classmethod
    def get_mpfs_envoy_requests(cls, date):
        return cls.get(date, 'mpfs_envoy_requests')

    @classmethod
    def get(cls, date, table_name):
        if not os.path.exists(cls.cache_dir):
            os.mkdir(cls.cache_dir)
        cache_path = os.path.join(cls.cache_dir, "%s_%s" % (date, table_name))
        if os.path.exists(cache_path):
            with open(cache_path) as fh:
                return json.loads(fh.read())
        yt_path = os.path.join(cls.base_dir % (date.year, date.month, date.day), table_name)
        result = list(cls.client.read_table(yt_path))
        with open(cache_path, 'w') as fh:
            encoded = []
            for line in result:
                message = line.get('message')
                if message:
                    line['message'] = message.decode('utf8', 'replace')
                encoded.append(line)
            fh.write(json.dumps(encoded, indent=2, ))
        return result


class Grouper(object):
    def __init__(self, group_fields=None):
        self._data = defaultdict(list)
        self.group_fields = group_fields

    def add(self, data):
        key = tuple(data[f] for f in self.group_fields)
        self._data[key].append(data)

    def get_groups(self):
        return self._data.items()


def message_pp(message, max_len=300):
    # common
    message = re.sub(r"\s+", ' ', message)
    # mongo
    message = re.sub(r"Binary\('.*', 2\)", 'BIN(...)', message)
    message = re.sub(r"^completed ", '', message)
    # PG
    if message.startswith('dbname'):
        message = re.sub("dbname=", "", message)
        message = re.sub(r'user=.* " /\* uid', '"/* uid', message)
        if len(message) > max_len:
            placeholder = '<TRUNC>'
            message = message[:max_len/2-len(placeholder)] + placeholder + message[-max_len/2:]
    return message[:max_len]


def format_delta(d):
    return "%i.%0.3i" % (d.seconds, d.microseconds / 1000)


def profile_slow(request_lines):
    out = []
    request_lines.sort(key=lambda x: x['request_time'], reverse=True)
    access_request_time = timedelta(seconds=request_lines[0]['a.request_time'])
    access_request = request_lines[0]['request']
    req_log_time = timedelta()
    out.append(access_request)
    fast_queries_bound = 0.02  # 20ms - fast query
    fast_queries_stat = [0, timedelta()]
    for line in request_lines:
        assert line['request'] == access_request
        request_time = timedelta(seconds=line['request_time'])
        req_log_time += request_time
        if line['request_time'] <= fast_queries_bound:
            fast_queries_stat[0] += 1
            fast_queries_stat[1] += request_time
        else:
            out.append("> %s %s" % (format_delta(request_time), message_pp(line['message'])))
    out.append("> Other queries num: %i, sum time: %s. (faster then %s)." % (
        fast_queries_stat[0],
        format_delta(fast_queries_stat[1]),
        fast_queries_bound
    ))
    in_code_time = access_request_time - req_log_time
    if in_code_time < timedelta():
        fmt_in_code_time = '(negative)'
    else:
        fmt_in_code_time = format_delta(in_code_time)
    out.append("%s/%s/%s (access log time/request log time/in code time)" % (
        format_delta(access_request_time),
        format_delta(req_log_time),
        fmt_in_code_time
    ))
    return "\n".join(out)


def profile_error(request_lines):
    out = []
    access_request = request_lines[0]['request']
    out.append(access_request)
    error_messages = [l['message'].replace('\\n', '\n') for l in request_lines]
    # for error_messages = [l['message'].replace('\\n', '\n') for l in request_lines]
    for i, error_message in enumerate(error_messages):
        if 'Traceback (most recent call last)' in error_message:
            tb_lines = error_message.split("\n")
            cropped_tb_parts = []
            for tb_line in tb_lines:
                cropped_tb_parts.append(tb_line)
                if 'Traceback (most recent call last):' in tb_line:
                    break
            tb_tail = []
            for tb_line in tb_lines[::-1]:
                tb_tail.append(tb_line)
                if '  File "' in tb_line:
                    tb_tail.append('  ...')
                    break
            cropped_tb_parts += tb_tail[::-1]
            cropped_tb_parts = filter(None, cropped_tb_parts)

            for tb_line in cropped_tb_parts:
                out.append("%i %s" % (i, tb_line))
        else:
            out.append("%i %s" % (i, error_message))
    return "\n".join(out)


def contains_one_of(message, *substrings):
    for substring in substrings:
        if substring in message:
            return True
    return False


def to_pretty_json(obj):
    return json.dumps(obj, indent=2)


def _find_related_access_log(services_nginx_log_lines, request_log):
    # для запроса из requests log пытаемся найти соответсвующую запись в access log сервиса, куда делали запрос
    for line in services_nginx_log_lines:
        if line['ycrid'] == request_log['ycrid'] and line['request'] in request_log['message']:
            return line


def resolve_slow(lines, nginx):
    access_request_time = lines[0]['a.request_time']
    access_request = lines[0]['request']
    requests_log_time = sum((l['request_time'] for l in lines))
    longest_req = max(lines, key=lambda x: x['request_time'])
    # print to_pretty_json(longest_req)
    longest_req_msg = longest_req['message']
    longest_req_time = longest_req['request_time']

    longest_req_related_access_log = _find_related_access_log(nginx, longest_req)
    # print to_pretty_json(longest_req_related_access_log)
    if longest_req_related_access_log:
        service_req_time = float(longest_req_related_access_log["request_time"])
        if longest_req_time - service_req_time > 0.5:
            # случай, когда сервис ответил быстро, а клиент долго получал ответ
            return 'SLOW HTTP CLIENT', 'https://st.yandex-team.ru/CHEMODAN-58302'

    if longest_req_time > 1.2:
        # ONE LONG QUERY
        if 'http://ratelimiter.localhost:12701' in longest_req_msg:
            return ('Ratelimiter', 'https://st.yandex-team.ru/CHEMODAN-64414')
        # Search
        if contains_one_of(longest_req_msg,
                           'http://search-disk.localhost:12701',
                           'http://mpfs.localhost:12701/json/new_search',
                           'http://mpfs.localhost:12701/json/image_metadata_by_file_id',
                           ):
            return ('Search', 'https://st.yandex-team.ru/PS-3236')
        # MPFS
        if access_request.startswith('/json/timeline?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-57754')
        if access_request.startswith('/json/list_public?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-57750')
        if access_request.startswith('/json/deltas?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-58689')
        # Java IDD
        if 'http://dataapi.localhost:12701' in longest_req_msg:
            return ('Data API', 'Check Java IDD')
        if 'http://loader.localhost:12701' in longest_req_msg:
            return ('Kladun', 'Check Java IDD')
        if 'http://smartcache.localhost:12701' in longest_req_msg:
            return ('Smartcache', 'Check Java IDD')
        if 'https://uaas.qloud.disk.yandex.net' in longest_req_msg:
            return ('UAAS', 'Check Java IDD')
        if contains_one_of(longest_req_msg,
                           'http://streaming.localhost:12701',
                           'http://mpfs.localhost:12701/json/video_streams',
                           ):
            return ('Video streaming', 'Check Java IDD')
        if 'http://notes.localhost:12701/' in longest_req_msg:
            return ('Notes', 'Check Java IDD')
        # common shard
        if contains_one_of(longest_req_msg,
                           'common.groups.groups.',
                           'common.support_blocked_hids.support_blocked_hids.',
                           'common.group_links.group_links.',
                           ):
            return ('Mongo common shard', 'https://st.yandex-team.ru/CHEMODAN-58164')
        # MPFS from REST
        if 'http://mpfs.localhost:12701/json/snapshot?' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-69263')
        if 'http://mpfs.localhost:12701/json/list' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-57751')
        if 'http://mpfs.localhost:12701/json/lenta_block_list' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-62359')
        if 'http://mpfs.localhost:12701/json/user_info' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-63943')
        if 'http://mpfs-stable.dst.yandex.net/json' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-65642')
        if access_request.startswith('/json/indexer_snapshot?') and 'SELECT * FROM (' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-58060')
        if 'http://mpfs.localhost:12701/json/deltas' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-58689')
        if access_request.startswith('/json/new_search?') and 'dbname=diskdb user=disk_mpfs' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-69213')
        if access_request == '/v1/disk':
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-69215')
        # DJFS
        if 'https://djfs-albums.qloud.disk.yandex.net/api/v1/albums/snapshot' in longest_req_msg:
            return ('DJFS-ALBUMS', 'https://st.yandex-team.ru/CHEMODAN-71030')
        if 'https://djfs-albums.qloud.disk.yandex.net/api/v1/albums/deltas' in longest_req_msg:
            return ('DJFS-ALBUMS', 'https://st.yandex-team.ru/CHEMODAN-71379')
        if 'http://djfs-api.qloud.disk.yandex.net/api/legacy' in longest_req_msg or \
                access_request.startswith('/api/legacy/json/bulk_info'):
            return ('DJFS', 'https://st.yandex-team.ru/CHEMODAN-69211')
        if access_request.startswith('/json/bulk_info_by_resource_ids?'):
            return ('DJFS', 'https://st.yandex-team.ru/CHEMODAN-69211')
        if access_request.startswith('/json/bulk_info?'):
            return ('DJFS', 'https://st.yandex-team.ru/CHEMODAN-69211')
        if 'http://mpfs.localhost:12701/json/bulk_info_by_resource_ids' in longest_req_msg:
            return ('DJFS', 'https://st.yandex-team.ru/CHEMODAN-69211')
        if 'http://mpfs.localhost:12701/json/bulk_info?' in longest_req_msg:
            return ('DJFS', 'https://st.yandex-team.ru/CHEMODAN-69211')
        # Blackbox
        if '://blackbox.yandex.net/blackbox' in longest_req_msg:
            return ('Blackbox', 'https://st.yandex-team.ru/CHEMODAN-58311')

    elif access_request_time - requests_log_time > 1.0:
        # SLOW CODE
        if access_request.startswith('/json/list?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-57751')
        if access_request.startswith('/service/kladun_callback?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-67548')
        if access_request.startswith('/json/store?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-67548')
        if access_request.startswith('/json/new_get_last_files?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-69200')
        if access_request.startswith('/json/list_public?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-57750')
        if access_request.startswith('/json/image_metadata_by_file_id?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-69204')
        if access_request.startswith('/json/lenta_block_list?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-62359')
        if access_request.startswith('/v1/'):
            return ('REST', 'https://st.yandex-team.ru/CHEMODAN-58302')
        if access_request.startswith('/json/albums_list?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-58240')
        if access_request.startswith('/json/new_search?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-70735')
        if access_request.startswith('/json/timeline?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-57754')

        return ('Slow code', 'https://st.yandex-team.ru/CHEMODAN-70735')

    else:
        if access_request.startswith('/json/new_search?') and 'dbname=diskdb user=disk_mpfs' in longest_req_msg:
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-69213')
        if access_request == '/v1/disk':
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-69215')
        if access_request.startswith('/json/albums_list?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-58240')
        if access_request.startswith('/json/store?'):
            return ('MPFS', 'https://st.yandex-team.ru/CHEMODAN-67548')


def resolve_error(lines):
    access_request = lines[0]['request']
    error_messages = [l['message'] for l in lines]
    joined_error_messages = '\t'.join(error_messages)
    if access_request.startswith('/v1/case/lenta/report_block_visit'):
        return ('REST', 'https://st.yandex-team.ru/CHEMODAN-69220')
    if "Timeout: HTTPSConnectionPool(host='uaas.qloud.disk.yandex.net" in joined_error_messages:
        return ('UAAS', 'Check Java IDD')

    if "raise JSONDecodeError('unknown keyword or identifier'" in joined_error_messages:
        return ('DJFS', 'https://st.yandex-team.ru/CHEMODAN-69509')


def format_st_table(title, rows):

    def request_pp(req):
        parts = req.split('?', 1)
        if len(parts) == 1:
            return req
        path, qs = parts
        for i in qs.split('&'):
            if len(i) > 80:
                i = "%s..." % i[:80]
            path += "\n  &%s" % i
        return path

    print "===== %s" % title
    if not rows:
        print 'No problems'
        return
    print "#|"
    for i, r in enumerate(rows):
        comment, ticket, ycrid, request, req_time, debug_info, profile_info = r
        print '|| %i | %s | %s | <{%s\n %%%% %s %%%% }> | %s | %s ||' % (
            i + 1, comment, ticket, ycrid, profile_info, req_time, request_pp(request)
        )
    print "|#"


def make_safe_for_st(text):
    return text.replace('%%', '</>')


def process_slow(dt):
    grouper = Grouper(group_fields=('ycrid', 'request_id', 'request', 'a.request_time'))
    rows = YTIDDClient.get_long(dt)
    if not rows:
        return []
    for row in rows:
        grouper.add(row)

    nginx = YTIDDClient.get_all_nginx(dt)

    result = []
    for group in grouper.get_groups():
        key, data = group
        res = resolve_slow(data, nginx)
        debug = [message_pp(x['message']) for x in data]
        profile_info = make_safe_for_st(profile_slow(data))
        if not res:
            res = ('?', '?')
        result.append((res[0], res[1], key[0], key[2], key[3], debug, profile_info))
    return result


def process_errors(dt):
    grouper = Grouper(group_fields=('ycrid', 'request_id', 'request', 'a.iso_eventtime'))
    rows = YTIDDClient.get_error(dt)
    if not rows:
        return []
    for row in rows:
        grouper.add(row)

    result = []
    for group in grouper.get_groups():
        key, data = group
        debug = [message_pp(x['message']) for x in data]
        res = resolve_error(data)
        profile_info = make_safe_for_st(profile_error(data))
        if not res:
            res = ('?', '?')
        result.append((res[0], res[1], key[0], key[2], key[3], debug, profile_info))
    return result


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('date', type=lambda s: datetime.strptime(s, '%Y-%m-%d'), help='Дата IDD в формате: 2019-11-29')
    namespace = parser.parse_args()

    dt = namespace.date
    error = process_errors(dt)
    error.sort()
    slow = process_slow(dt)
    slow.sort()
    print '==== IDD Result'
    format_st_table('Error %s' % dt, error)
    format_st_table('Slow %s' % dt, slow)
