import warnings

warnings.simplefilter("ignore", UserWarning)

import logging

logging.basicConfig(level=logging.INFO, format="[%(asctime)s] %(levelname)s %(name)s %(message)s")
logger = logging.getLogger(__name__)

import argparse
import json
import os
import pandas as pd
import time
import traceback

import statface_client
from yql.api.v1.client import YqlClient

YQL_TOKEN = os.getenv("YQL_TOKEN")
assert YQL_TOKEN
STAT_TOKEN = os.getenv('STAT_TOKEN')
assert STAT_TOKEN


# ST_TOKEN = os.getenv("STARTREK_TOKEN")

def parse_issues(issues):
    messages = []
    for iss in issues:
        if iss.message:
            messages.append(iss.message)
        if iss.issues:
            messages.extend(parse_issues(iss.issues))
    return messages


def yql_request(request, refresh_interval=5, name=''):
    client = YqlClient(token=YQL_TOKEN)
    req = client.query(request, syntax_version=1, title='YQL: %s' % name if name else None)
    req.run()
    logger.info('Starting request %s -- %s' % (name, req.share_url))
    time.sleep(refresh_interval)
    while req.status == u'RUNNING':
        time.sleep(refresh_interval)
        continue
    if not req.is_success:
        logger.info('Request failed: %s' % req.share_url)
        raise Exception(str(parse_issues(req._request.get_issues())))
    return req.table.full_dataframe


def get_metrica_stat(date_from, date_to, limit=''):
    req = r'''
    USE hahn;
    $recent_appmetrica_folder = 'home/logfeller/logs/appmetrica-yandex-events/1d';
    $start_date_time = '{date_from}';
    $end_date_time = '{date_to}';

    $disk_prod_api_key = 2151871;
    --$disk_test_api_key = "2087491";
    select
        --OperatingSystem,
        --OSVersion,
        --AppBuildNumber,
        --AppVersionName,
        --AccountID,
        --EventName,
        EventValue,
        --iso_eventtime
    from
        range($recent_appmetrica_folder, $start_date_time, $end_date_time)
    where
        (APIKey = $disk_prod_api_key)
        and EventValue like '%(5433)%'
    --order by iso_eventtime desc
    {limit}
    ;'''.format(date_from=date_from, date_to=date_to, limit=limit)

    req = '''
    USE hahn;
    $recent_appmetrica_folder = 'home/logfeller/logs/appmetrica-yandex-events/1d';
    $start_date_time = '{date_from}';
    $end_date_time = '{date_to}';

    $disk_prod_api_key = 2151871;
    --$disk_test_api_key = "2087491";
    select
        --AccountID,
        --OperatingSystem,
        --OSVersion,
        --AppBuildNumber,
        --AppVersionName,
        --EventName,
        EventValue
    from
        range($recent_appmetrica_folder, $start_date_time, $end_date_time)
    where
        (APIKey = $disk_prod_api_key)
        and EventValue like '%(5433)%'
    --order by iso_eventtime desc
    {limit}
    ;'''.format(date_from=date_from, date_to=date_to, limit=('limit %s' % limit) if limit else '')
    return yql_request(req, name="Metric download stat %s" % date_from)


def parse_row(row):
    '''
    :param row: dict, example {'measurements: 59; DOWN last/median/average: 361813/380928/373615; UP last/median/average: 0/0/0': ''}
    :return:
    '''
    try:
        if isinstance(row, str):
            try:
                row = json.loads(row)
            except json.decoder.JSONDecodeError:
                logger.error(traceback.format_exc())
                return {}
        dt = row["Скачивание/загрузка файлов(5433)"]
        dt = list(dt.keys())[0].split(';')

        meas = int(dt[0].split(':')[1])
        if meas == 0:
            return {}

        down = list(map(int, dt[1].replace('DOWN ', '').split(': ')[1].split('/')))
        up = list(map(int, dt[2].replace('UP ', '').split(': ')[1].split('/')))

        data = {
            'measures': meas,
            'download_median': down[1],
            'download_average': down[2],
            'upload_median': up[1],
            'upload_average': up[2],
        }
        if sum(down) == 0:
            data.pop('download_median')
            data.pop('download_average')
        if sum(up) == 0:
            data.pop('upload_median')
            data.pop('upload_average')

        return data

    except KeyError:
        traceback.print_exc()
        return {}


def calc_average(df, measure_col, value_col, skip_null=True):
    sdf = df[[measure_col, value_col]][df[value_col].notna()]
    if skip_null:
        sdf = sdf[sdf[value_col] != 0]
    meas_cnt = sdf[measure_col].sum()
    # Calculate weighted average, where weight is measure amount
    aver = (sdf[measure_col] / meas_cnt) * df[value_col]
    return aver.sum()


def calc_median(df, measure_col, value_col, skip_null=True):
    sdf = df[[measure_col, value_col]][df[value_col].notna()]
    if skip_null:
        sdf = sdf[sdf[value_col] != 0]
    sdf = sdf.sort_values(value_col)
    sdf['cumsum'] = sdf[measure_col].cumsum()
    median = sdf[sdf['cumsum'] >= sdf['cumsum'].median()].iloc[0]
    return median[value_col]


def normalize_df(df):
    # Convert list of jsons to columns
    norm = pd.io.json.json_normalize(df['EventValue'].apply(parse_row))
    return pd.concat([df, norm], axis=1)


def gen_data(date, df, dl_type):
    assert dl_type in ('upload', 'download')
    data = {'fielddate': pd.to_datetime(date).strftime('%Y-%m-%d')}
    data['average_speed'] = calc_average(df, 'measures', '%s_average' % dl_type)
    data['median_speed'] = calc_median(df, 'measures', '%s_median' % dl_type)
    return [data]


def process_day(date):
    date = pd.to_datetime(date).strftime("%Y-%m-%d")
    logger.info('Processing %s' % date)
    df = normalize_df(get_metrica_stat(date, date))
    stat_client = statface_client.StatfaceClient(oauth_token=STAT_TOKEN, host=statface_client.STATFACE_PRODUCTION)
    stat_dl_report = stat_client.get_report('Disk/Admin/Speed/DownloadMetricaSpeed')
    stat_dl_report.upload_data('d', data=gen_data(date, df, 'download'))
    stat_up_report = stat_client.get_report('Disk/Admin/Speed/UploadMetricaSpeed')
    stat_up_report.upload_data('d', data=gen_data(date, df, 'upload'))


def main():
    yesterday = (pd.to_datetime('today') + pd.to_timedelta('-1 day')).strftime("%Y-%m-%d")
    parser = argparse.ArgumentParser(description='Process metric data and upload to stat')
    parser.add_argument('-s', '--start', type=str, default=yesterday, help='Start date Y-m-d; Yesterday by default')
    parser.add_argument('-d', '--day', type=int, help='Collect N days from start date', default=1)

    args = parser.parse_args()
    start_date = pd.to_datetime(args.start)
    for day in range(args.day):
        date = start_date + pd.to_timedelta('%s day' % day)

        process_day(date)


if __name__ == '__main__':
    main()
