# -*- coding: utf-8 -*-

import argparse
import os
import sys
import time

import warnings
warnings.simplefilter("ignore", UserWarning)

import logging
logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s %(name)s %(message)s")
logger = logging.getLogger(__name__)


import pandas as pd
# from pandas.plotting import register_matplotlib_converters
# register_matplotlib_converters()
#sys.path.append('.')
#from lib import conf, st


import re
from startrek_client import Startrek
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import numpy as np
from pathlib import Path

from yql.api.v1.client import YqlClient


def get_token(name):
    token = os.getenv(name)
    if not token:
        raise EnvironmentError("Token %s is undefined" % name)
    return token

YQL_TOKEN = get_token("YQL_TOKEN")
ST_TOKEN = get_token("STARTREK_TOKEN")

yql_time_format = '%Y-%m-%dT%H:%M:%S'
day = '1d'


image_formats = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')

class ST():
    def __init__(self, token, issue):
        self.client = Startrek(useragent='python', token=token)
        if '/' in issue:
            issue = issue.split('/')[-1]
        self.issue = self.client.issues[issue]

    def add_comment(self, spoiler_name='Conference logs', attachments=(), conf_links=()):
        '''

        :param text: comment header
        :param attachments: list of paths to attachments
        :return:
        '''
        self.comment = self.issue.comments.create()
        added_attachments = []
        error_attachments = []
        for att in attachments:
            P = Path(att)
            try:
                self.comment.update(attachments=[att])
                added_attachments.append(P.name)
            except Exception as e:
                logger.exception('Unable to upload attachment %s' % P)
                error_attachments.append(P.name)

        messages = []
        for att in sorted(list(self.issue.attachments), key=lambda x: x.name):
            # skip unadded attachments
            if not (att.name in added_attachments):
                continue

            # place image thumbnail for pic attachments
            if any(att.name.endswith(fmt) for fmt in image_formats):
                messages.append(
                    '''<{{ {name_without_fmt}
                    (({link} 0x0:{link} ))
                    }}>'''.format(
                        name_without_fmt=att.name,
                        link=self._api_to_human_link(att.self)
                    )
                )
            else:
                messages.append(
                    '''
                    (({link} {name}))
                    '''.format(
                        name=att.name,
                        link=self._api_to_human_link(att.self)
                    )
                )

        message = '''<{{ {spoiler_name}
            {msg}
        }}>'''.format(spoiler_name=spoiler_name, msg='\n'.join(messages))

        if conf_links:
            links_msg = '\n'.join(['%s — %s' % (name, link) for name, link in conf_links])
            message += '\n' + links_msg

        message = re.sub(r'^[ \t]+', '', message, flags=re.MULTILINE)
        self.comment.update(text=message)
        return True

    def _api_to_human_link(self, link):
        return link.replace('st-api.yandex-team.ru/v2/issues', 'st.yandex-team.ru')


class ConfID():
    URL = 'url'
    CONF_ID = 'conf_id'
    def __init__(self, conf_id):
        ''' Example:
            $url = "https://telemost.yandex.ru/j/53108884213157";
            $conf_id = "2188c7494424492583fe14aadd1442ec";'''
        self.conf_id = conf_id

        if re.match(r'[\dabcdef]{32}', conf_id):
            self.id_type = self.CONF_ID
        elif re.match(r'^https://telemost.yandex.ru/j/\d+', conf_id):
            self.id_type = self.URL
        elif re.match(r'\d+', conf_id):
            self.conf_id = 'https://telemost.yandex.ru/j/%s' % conf_id
            self.id_type = self.URL

    @property
    def is_url(self):
        return self.id_type == self.URL

    @property
    def short_id(self):
        if self.is_url:
            return self.conf_id.split('/')[-1]
        else:
            return self.conf_id


def parse_issues(issues):
    messages = []
    for iss in issues:
        if iss.message:
            messages.append(iss.message)
        if iss.issues:
            messages.extend(parse_issues(iss.issues))
    return messages


def yql_request(request, refresh_interval=5, name=''):
    client = YqlClient(token=YQL_TOKEN)
    req = client.query(request, syntax_version=1,
                       title='YQL: %s' % name if name else None)
    req.run()
    logger.info('Starting request %s -- %s' % (name, req.share_url))
    time.sleep(refresh_interval)
    while req.status == u'RUNNING':
        time.sleep(refresh_interval)
        continue
    if not req.is_success:
        logger.info('Request failed: %s' % req.share_url)
        raise Exception(str(parse_issues(req._request.get_issues())))
    return req.table.full_dataframe, req.share_url


def get_mpfs_conf_actions(conf_id:ConfID, date_from, date_to, mpfs_default='30min'):
    if conf_id.is_url:
        req = r'''use hahn;

        $from = "{date_from}";
        $to = "{date_to}";
        $url = "{url}";

        $capture_details = Re2::Capture(@@event_type: (?P<type>[^;]+); uid: (?P<uid>[^;]+); user-agent: (?P<user_agent>.*); conf_id: (?P<conf_id>[^;]+); hashed_url: sha256-(?P<hashed_url>.*)@@);

        $sha256_hex_digest = ($value) -> {{
            return String::ToLower(String::HexEncode(Digest::Sha256($value))) ?? "";
        }};

        SELECT
            iso_eventtime,
            $capture_details(message).uid AS uid,
            $capture_details(message).type AS action,
            $capture_details(message).user_agent AS user_agent,
            $capture_details(message).conf_id AS conf_id,
            ycrid,
            message
        FROM RANGE(`//logs/ydisk-mpfs-default-log/{mpfs_default}`, $from, $to)
        WHERE appname = 'platform' AND
              message LIKE "event_type: %; hashed_url: sha256-" || $sha256_hex_digest($url)
        ORDER BY iso_eventtime
        ;'''.format(url=conf_id.conf_id, date_from=date_from, date_to=date_to, mpfs_default=mpfs_default)

    else:
        req = r'''use hahn;

        $from = "{date_from}";
        $to = "{date_to}";
        $conf_id = "{conf_id}";

        $capture_details = Re2::Capture(@@event_type: (?P<type>[^;]+); uid: (?P<uid>[^;]+); user-agent: (?P<user_agent>.*); conf_id: (?P<conf_id>[^;]+); hashed_url: sha256-(?P<hashed_url>.*)@@);

        $sha256_hex_digest = ($value) -> {{
            return String::ToLower(String::HexEncode(Digest::Sha256($value))) ?? "";
        }};

        SELECT
            iso_eventtime,
            $capture_details(message).uid AS uid,
            $capture_details(message).type AS action,
            $capture_details(message).user_agent AS user_agent,
            $capture_details(message).conf_id AS conf_id,
            ycrid,
            message
        FROM RANGE(`//logs/ydisk-mpfs-default-log/{mpfs_default}`, $from, $to)
        WHERE appname = 'platform'
              AND $capture_details(message).conf_id = $conf_id
        ORDER BY iso_eventtime
        ;'''.format(conf_id=conf_id.conf_id, date_from=date_from, date_to=date_to, mpfs_default=mpfs_default)

    return yql_request(req, name="api actions %s" % conf_id.short_id)


def get_mpfs_client_stat(conf_id, uids, date_from, date_to, mpfs_default='30min', mpfs_access='1h'):

    req = r'''
    use hahn;

    $from = "{date_from}";
    $to = "{date_to}";
    $conf_id = "{conf_id}";

    SELECT
        mpfs_access.iso_eventtime as iso_eventtime,
        mpfs_access.uid as uid,
        ListSum(Yson::ConvertToUint64List(JSON_QUERY(CAST(message AS JSON), '$.standard.inbound_rtp_audio_streams.packets_lost' WITH CONDITIONAL WRAPPER))) AS audio_packets_lost,
        ListSum(Yson::ConvertToUint64List(JSON_QUERY(CAST(message AS JSON), '$.standard.inbound_rtp_audio_streams.packets_received' WITH CONDITIONAL WRAPPER))) AS audio_packets_received,
        ListSum(Yson::ConvertToUint64List(JSON_QUERY(CAST(message AS JSON), '$.standard.inbound_rtp_video_streams.packets_lost' WITH CONDITIONAL WRAPPER))) AS video_packets_lost,
        ListSum(Yson::ConvertToUint64List(JSON_QUERY(CAST(message AS JSON), '$.standard.inbound_rtp_video_streams.packets_received' WITH CONDITIONAL WRAPPER))) AS video_packets_received,
        JSON_VALUE(CAST(message AS JSON), '$.standard.transport.selected_candidate_pair.current_round_trip_time') as current_rtt,
        JSON_VALUE(CAST(message AS JSON), '$.standard.transport.selected_candidate_pair.total_round_trip_time') as total_rtt
    FROM RANGE(`//logs/ydisk-mpfs-default-log/{mpfs_default}`, $from, $to) AS mpfs_default
    INNER JOIN RANGE(`//logs/ydisk-mpfs-access-log/{mpfs_access}`, $from, $to) AS mpfs_access
    ON mpfs_access.ycrid = mpfs_default.ycrid
    WHERE
        mpfs_access.appname = 'platform'
        AND mpfs_access.uid in ({uid})
        AND (
            uri like '/telemost_front/v1/telemost/stat/log%'
            OR uri like '/v1/telemost/stat/log%'
        )
        AND String::Contains(mpfs_access.uri, $conf_id)
    ORDER BY iso_eventtime
    ;'''.format(conf_id=conf_id, uid=', '.join(["'{}'".format(u) for u in uids]),
                date_from=date_from, date_to=date_to,
                mpfs_default=mpfs_default, mpfs_access=mpfs_access)
    return yql_request(req, name='client stat %s' % conf_id)


def get_jvb_logs(conf_id, date_from, date_to, jvb_log='30min'):
    req = r'''
    use hahn;

    $from = "{date_from}";
    $to = "{date_to}";
    $conf_id='{conf_id}';

    $endpoint_re=Re2::Capture("endpoint='([a-z0-9]+)'");

    SELECT
        *
    FROM
        RANGE(`//logs/telemost-jvb-log/{jvb_log}`, $from, $to)
    WHERE
        $endpoint_re(message)._1 IN
        (
            SELECT DISTINCT
            $endpoint_re(message)._1 as endpoint
            FROM RANGE(`//logs/telemost-jvb-log/{jvb_log}`, $from, $to)
            where
                String::Contains(message, $conf_id)
                AND $endpoint_re(message)._1 is NOT NULL
        )
    ;'''.format(date_from=date_from, date_to=date_to, conf_id=conf_id, jvb_log=jvb_log)
    return yql_request(req, name='jvb logs %s' % conf_id)


def get_jiconfo_logs(conf_id, date_from, date_to, jiconfo_log='30min'):
    req = '''use hahn;

    $from = "{date_from}";
    $to = "{date_to}";
    $conf_id='{conf_id}';

    select *
    FROM RANGE(`//logs/telemost-jicofo-log/{jiconfo_log}`, $from, $to)
    where String::Contains(message, $conf_id)
    order by iso_eventtime
    ;'''.format(date_from=date_from, date_to=date_to, conf_id=conf_id, jiconfo_log=jiconfo_log)
    return yql_request(req, name='jiconfo logs %s' % conf_id)


def get_time_range(dataframes, column, extend=pd.to_timedelta(0)):
    min_time, max_time = pd.to_datetime(sys.maxsize), pd.to_datetime(0)
    for df in dataframes:
        col = pd.to_datetime(df[column])
        min_time = min((min_time, col.min()))
        max_time = max((max_time, col.max()))
    return min_time - extend, max_time + extend


def get_filtered_names(df, t_delta):
    sample = df[df['uid'] != '-'][['uid', 'iso_eventtime']]
    sample.iso_eventtime = pd.to_datetime(sample.iso_eventtime)

    sample.sort_values(by=['uid', 'iso_eventtime'], inplace=True)
    sample[['uid_p', 'iso_eventtime_p']] = sample.shift(1)
    sample['skip'] = \
        (sample['uid'] == sample['uid_p']) & \
        (sample.iso_eventtime - sample.iso_eventtime_p < t_delta)
    sample['f_uid'] = sample.apply(lambda x: ('' if x.skip else x.uid), axis=1)
    return sample['f_uid']


def timeline_client_stat(client_stat, uid, min_time, max_time, save_path=None):
    iso_eventtime = 'iso_eventtime'
    audio_packets_lost = 'audio_packets_lost'
    audio_packets_received = 'audio_packets_received'
    video_packets_lost = 'video_packets_lost'
    video_packets_received = 'video_packets_received'
    current_rtt = 'current_rtt'
    total_rtt = 'total_rtt'

    client_stat = client_stat[[iso_eventtime, 'uid',
                               audio_packets_lost, audio_packets_received,
                               video_packets_lost, video_packets_received,
                               current_rtt, total_rtt]]
    client_stat[iso_eventtime] = pd.to_datetime(client_stat[iso_eventtime])
    client_stat['audio_loss_rate'] = client_stat.apply(
        lambda x: x.audio_packets_lost / (x.audio_packets_lost + x.audio_packets_received + 10)
        # if x.audio_packets_received > 2000
        # else 0
        , axis=1)
    client_stat['video_loss_rate'] = client_stat.apply(
        lambda x: x.video_packets_lost / (x.video_packets_lost + x.video_packets_received + 10)
        # if x.video_packets_received > 2000
        # else 0
        , axis=1)

    client_stat[current_rtt] = client_stat[current_rtt].astype(float).fillna(0.0)
    client_stat[total_rtt] = client_stat[total_rtt].astype(float).fillna(0.0)

    stat = client_stat[client_stat['uid'] == uid]
    stat.set_index(iso_eventtime, inplace=True)

    fig, axes = plt.subplots(3, figsize=(12, 4 * 3), constrained_layout=True)
    fig.suptitle('uid %s' % uid, fontsize=16)

    stat[[audio_packets_received, audio_packets_lost, video_packets_received, video_packets_lost]] = stat[
        [audio_packets_received, audio_packets_lost, video_packets_received, video_packets_lost]
    ].fillna(0)
    stat[[audio_packets_received, audio_packets_lost, video_packets_received, video_packets_lost]].plot(ax=axes[0])
    stat[['audio_loss_rate', 'video_loss_rate']] = stat[['audio_loss_rate', 'video_loss_rate']].fillna(0)
    stat[['audio_loss_rate', 'video_loss_rate']].plot(ax=axes[1])

    axes[2].plot(stat.index, stat.current_rtt)
    axes_t = axes[2].twinx()
    # axes_t.plot(stat.index, stat.total_rtt)
    stat[[total_rtt]].plot(ax=axes_t)
    stat[[current_rtt]].plot(ax=axes[2])

    for ax in np.append(axes, axes_t):
        ax.xaxis.label.set_visible(False)
        ax.legend(loc=0)
        ax.set_xlim([min_time, max_time])
        ax.set_xticklabels(ax.get_xticklabels(), rotation=0)
        ax.get_xaxis().set_major_locator(mdates.AutoDateLocator())
        ax.get_xaxis().set_minor_locator(ticker.AutoMinorLocator())
        ax.get_xaxis().set_major_formatter(mdates.DateFormatter("%H:%M:%S"))
        ax.get_yaxis().grid(True, linestyle=':')
        ax.get_xaxis().grid(True)
        ax.get_xaxis().grid(True, which='minor', linestyle=':')
        for spine in ["left", "top", "right"]:
            ax.spines[spine].set_visible(False)

    # Fix twinned chart legend
    axes_t.get_yaxis().grid(False)
    axes_t.legend().set_visible(False)
    lines_1, labels_1 = axes[2].get_legend_handles_labels()
    lines_2, labels_2 = axes_t.get_legend_handles_labels()
    axes[2].legend(lines_1 + lines_2, labels_1 + labels_2, loc=0)

    plt.tight_layout()
    #plt.show()
    if save_path:
        fig.savefig(save_path)
    return fig


def timeline_conf_actions(events, min_time, max_time, save_path=None):
    g_events = events[events.uid != '-'][['uid', 'iso_eventtime']].sort_values(['uid', 'iso_eventtime'])
    g_events['iso_eventtime'] = pd.to_datetime(g_events['iso_eventtime'])

    names = g_events['uid']
    dates = g_events['iso_eventtime']

    u_names = sorted(names.unique())
    # level_map = dict((b, 2 * (a - len(u_names) / 2) - (len(u_names)) % 2) for a, b in enumerate(u_names))
    level_map = dict((b, (((a + 1) * ((a % 2) * 2 - 1)) // 2) * 2) for a, b in enumerate(u_names))
    levels = g_events['uid'].apply(lambda x: level_map[x])

    f_names = get_filtered_names(g_events, (max_time - min_time) / 8)
    names = f_names

    # Create figure and plot a stem plot with the date
    fig, ax = plt.subplots(1, figsize=(12, 6), constrained_layout=True)
    ax.set(title="Users joins")
    markerline, stemline, baseline = ax.stem(dates, levels,
                                             linefmt="C3-", basefmt="k-",
                                             use_line_collection=True)
    plt.setp(markerline, mec="k", mfc="w", zorder=3)

    # Shift the markers to the baseline by replacing the y-data by zeros.
    markerline.set_ydata(np.zeros(len(dates)))

    # annotate lines
    vert = np.array(['top', 'bottom'])[(levels > 0).astype(int)]
    for d, l, r, va in zip(dates, levels, names, vert):
        ax.annotate(r, xy=(d, l), xytext=(-3, np.sign(l) * 3),
                    textcoords="offset points", va=va, ha="left")

    # format xaxis
    ax.get_xaxis().set_major_locator(mdates.AutoDateLocator())
    ax.get_xaxis().set_minor_locator(ticker.AutoMinorLocator())
    ax.get_xaxis().set_major_formatter(mdates.DateFormatter("%H:%M:%S"))
    plt.setp(ax.get_xticklabels(), rotation=30, ha="right")
    ax.set_xlim([min_time, max_time])

    # hide level ticks
    plt.setp(ax.get_yticklabels(), visible=False)
    ax.get_yaxis().set_major_locator(ticker.MultipleLocator(base=2))

    for spine in ["left", "top", "right"]:
        ax.spines[spine].set_visible(False)

    ax.margins(y=0.1)
    ax.get_yaxis().grid(True, linestyle=':')
    ax.get_xaxis().grid(True, which='minor', linestyle=':')
    ax.grid(True)
    plt.tight_layout()
    #plt.show()
    if save_path:
        fig.savefig(save_path)
    return fig


def main():
    parser = argparse.ArgumentParser(description='Run YQL queries to collect conference logs')
    parser.add_argument('url', type=str, help='Conference join link or conf_id')
    parser.add_argument('start_time', type=str, help='Conference start time in format "1969-12-31T23:59:59"')
    parser.add_argument('end_time', type=str, help='Conference end time')
    parser.add_argument('-d', '--daily', action='store_true', required=False, help='Grep over daily logs')
    parser.add_argument('-o', '--out', required=False, default='~', help='Output directory')
    parser.add_argument('-c', '--csv', action='store_true', required=False, help='Save result as csv')
    parser.add_argument('-t', '--ticket', required=False, help='Startrek ticket QUEUE-123')

    args = parser.parse_args()

    start_time = pd.to_datetime(args.start_time.replace('T', ' '))
    end_time = pd.to_datetime(args.end_time.replace('T', ' '))
    delta = end_time - start_time
    # extend time interval if time interval doesn't cover YQL range
    if args.daily and delta.seconds / 3600 < 24.:
        start_time = end_time - pd.Timedelta(days=1)
        logger.info('Set start time to %s' % start_time)
    elif delta.seconds / 3600. < 0.5:
        end_time = start_time + pd.Timedelta(hours=0.5)
        logger.info('Set end time to %s' % end_time)
    start_time, end_time = start_time.strftime(yql_time_format), end_time.strftime(yql_time_format)

    Conf_ID = ConfID(args.url)
    path = Path(args.out if args.out != '~' else os.path.expanduser('~')) / ('j_%s' % Conf_ID.short_id)

    daily = args.daily

    actions, mpfs_actions_link = get_mpfs_conf_actions(Conf_ID, start_time, end_time, mpfs_default=day if daily else '30min')
    assert not actions.empty
    conf_id = actions.conf_id.mode()[0]
    uids = [u for u in actions.uid if u != '-']
    uids = list(sorted(set(uids)))

    stat, client_stat_link = get_mpfs_client_stat(conf_id, uids, start_time, end_time, mpfs_default=day if daily else '30min', mpfs_access=day if daily else '1h')

    jvb, jvb_link = get_jvb_logs(conf_id, start_time, end_time, jvb_log=day if daily else '30min')
    jiconfo, jicofo_link = get_jiconfo_logs(conf_id, start_time, end_time, jiconfo_log=day if daily else '30min')

    min_time, max_time = get_time_range([actions, stat, jvb, jiconfo], 'iso_eventtime', pd.to_timedelta('5m'))

    links = [
        ['api conf actions', mpfs_actions_link],
        ['client stat', client_stat_link],
        ['jvb logs', jvb_link],
        ['jicofo logs', jicofo_link]
    ]


    path.mkdir(parents=True, exist_ok=True)
    logger.info('Working directory: %s' % path)
    saved_files = []

    if args.csv:
        for df, name in [
            (actions, 'api_events'),
            (stat, 'clients_stat'),
            (jvb, 'jvb'),
            (jiconfo, 'jiconfo'),
        ]:
            fpath = path / ('%s.csv' % name)
            logger.info('Saving logs to %s' % fpath)
            df.to_csv(fpath)
            saved_files.append(fpath)

    try:
        logger.info('Saving actions plot')
        fpath = path / ('actions_%s.png' % Conf_ID.short_id)
        timeline_conf_actions(actions, min_time, max_time, fpath)
        saved_files.append(fpath)
        logger.info('Client actions saved to file %s' % fpath)
    except Exception as e:
        logger.exception(e)

    for uid in [u for u in stat.uid.unique() if u != '-']:
        try:
            fpath = path / ('cs_uid_%s.png' % uid)
            timeline_client_stat(stat, uid, min_time, max_time, fpath)
            saved_files.append(fpath)
            logger.info('Client stat %s saved to file %s' % (uid, fpath))
        except Exception as e:
            logger.exception(e)

    if args.ticket:
        startrek = ST(ST_TOKEN, args.ticket)
        startrek.add_comment("Conference %s" % Conf_ID.conf_id, attachments=[str(s) for s in saved_files], conf_links=links)
        logger.info('Files uploaded to ST %s' % args.ticket)

    for name, link in links:
        logger.info('YQL request for %s: %s' % (name, link))


if __name__ == "__main__":
    main()
