from digger.digger import DataDigger
from pprint import pprint
import itertools
from common.util import ClickhouseClient
import requests
import json
from django.core.management.base import BaseCommand
from datetime import datetime
import time
import calendar
import re
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter("ignore")


def escape_string(s):
    return s.replace('\\', '\\\\').replace("'", "\\'")


CH_LOADDB_COLS = {
    'monitoring_verbose_data_buffer': ['time', 'target_host', 'metric_name', 'value'],
    'net_codes_buffer': ['time', 'tag', 'code', 'cnt'],
    'proto_codes_buffer': ['time', 'tag', 'code', 'cnt'],
    'rt_microsecond_details_buffer': [
        'time', 'tag', 'connect_time_sum', 'send_time_sum', 'latency_sum', 'receive_time_sum',
        'reqps', 'resps', 'threads', 'igress', 'egress', 'self_load'
    ],
    'rt_microsecond_histograms_buffer': ['time', 'tag', 'bin', 'cnt'],
    'rt_quantiles_buffer': ['time', 'tag', 'q50', 'q75', 'q80', 'q85', 'q90', 'q95', 'q98', 'q99', 'q100']
}


CHUNK_SIZE = 10000
volta_url = 'http://volta-back-testing.common-int.yandex-team.ru/api/v1/'  # 'http://localhost:8000/api/v1/'

ch_client = ClickhouseClient()


def fetch_meta(dd):
    lp_job_meta = dd.meta
    lp_job_meta['_original_job_id'] = dd.lunapark_job
    return lp_job_meta


def create_job_and_meta(job_meta):
    fd = datetime.strptime(job_meta['fd'], '%Y-%m-%dT%H:%M:%S.%f')
    job = {
        'status': job_meta['status'],
        'test_start': str(int(time.mktime(fd.timetuple())*1000000 + fd.microsecond))
    }

    create_job = requests.post(volta_url + 'jobs/', data=json.dumps(job),
                               headers={'Content-Type': 'application/json'}).json()

    for k, v in job_meta.items():
        if k not in ('n', 'resource_uri', 'fd', 'finalized', 'td', 'status', 'test_start'):
            jobmetadata = {
                'job': create_job['id'],
                'key': k,
                'value': v,
            }
            create_meta = requests.post(volta_url + 'job_meta/', data=json.dumps(jobmetadata),
                                        headers={'Content-Type': 'application/json'})
            pprint(create_meta.json())

    return create_job


def grouper(n, iterable):
    it = iterable
    while True:
        yield itertools.islice(it, n)


def convert_quantiles(dd, job):
    """

    :param dd:
    :param job:
    :return:
    """

    test_start = datetime.strptime(dd.meta['fd'], '%Y-%m-%dT%H:%M:%S.%f')
    test_start = calendar.timegm(test_start.timetuple()) * 10**6 + test_start.microsecond
    quantiles = dd.get_rt_quantiles()
    quantiles.tag.fillna('', inplace=True)
    # luna_aggregates_table_struct = 'key_date,tag,ts,q0,q10,q25,q50,q75,q80,q85,q90,q95,q98,q99,q100,average,stddev'

    for case in quantiles.tag.unique():
        case_data = quantiles[quantiles.tag == case]
        if case_data.empty:
            continue
        first_time = case_data.iloc[0].name
        first_time = int(calendar.timegm(datetime.strptime(first_time, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6

        offset = first_time - test_start

        def subtract_first_time(val):
            """
            converts value into timestamp, makes it microsecond-ish
            :param val: str '%Y-%m-%d %H:%M:%S'
            :return:
            """
            return int(calendar.timegm(datetime.strptime(val, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6 - first_time

        data = {
            'job': job['id'],
            'type': 'metrics',
            'lunapark_entity': 'case_quantiles',
            'name': case,
            'offset': offset
        }

        create_data = requests.post(volta_url + 'data/', data=json.dumps(data),
                                    headers={'Content-Type': 'application/json'}).json()

        uniq = create_data['uniq_id']
        len = case_data.index.size
        values = zip(*(
            (str(datetime.fromtimestamp(test_start // 10 ** 6).date()) for _ in range(len)),
            (uniq for _ in range(len)),
            map(subtract_first_time, case_data.index),
            (0 for _ in case_data.values),  # q0 (min)
            (0 for _ in case_data.values),  # q10
            (0 for _ in case_data.values),  # q25
            case_data['q50'].values,
            case_data['q75'].values,
            case_data['q80'].values,
            case_data['q85'].values,
            case_data['q90'].values,
            case_data['q95'].values,
            case_data['q98'].values,
            case_data['q99'].values,
            case_data['q100'].values,
            (0 for _ in case_data.values),  # average
            (0 for _ in case_data.values),  # stddev
        ))

        for chunk in grouper(CHUNK_SIZE, values):
            chunk_str = ','.join(repr(v) for v in chunk)
            if not chunk_str:
                break
            sql = '''
                insert into aggregates values {}
            '''.format(chunk_str)
            sql = sql.encode('utf-8')

            ch_client.insert(sql)
            time.sleep(1)


def convert_proto_codes(dd, job):
    test_start = datetime.strptime(dd.meta['fd'], '%Y-%m-%dT%H:%M:%S.%f')
    test_start = calendar.timegm(test_start.timetuple()) * 10 ** 6 + test_start.microsecond
    proto_codes = dd.get_proto_codes()
    proto_codes.tag.fillna('', inplace=True)

    for case in proto_codes.tag.unique():
        case_data = proto_codes[proto_codes.tag == case]
        if case_data.empty:
            continue
        first_time = case_data.iloc[0].name
        first_time = int(calendar.timegm(datetime.strptime(first_time, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6

        offset = first_time - test_start

        def subtract_first_time(val):
            """
            converts value into timestamp, makes it microsecond-ish
            :param val: str '%Y-%m-%d %H:%M:%S'
            :return:
            """
            return int(calendar.timegm(datetime.strptime(val, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6 - first_time

        data = {
            'job': job['id'],
            'type': 'metrics',
            'lunapark_entity': 'case_proto_codes',
            'name': case,
            'offset': offset
        }

        create_data = requests.post(volta_url + 'data/', data=json.dumps(data),
                                    headers={'Content-Type': 'application/json'}).json()

        uniq = create_data['uniq_id']
        len = case_data.index.size
        values = zip(*(
            (str(datetime.fromtimestamp(test_start // 10 ** 6).date()) for _ in range(len)),
            (uniq for _ in range(len)),
            map(subtract_first_time, case_data.index),
            (str(v) for v in case_data['code'].values),
            case_data['cnt'].values,
        ))

        for chunk in grouper(CHUNK_SIZE, values):
            chunk_str = ','.join(repr(v) for v in chunk)
            if not chunk_str:
                break
            sql = '''
                insert into histograms values {}
            '''.format(chunk_str)
            sql = sql.encode('utf-8')

            ch_client.insert(sql)
            time.sleep(1)


def convert_net_codes(dd, job):
    test_start = datetime.strptime(dd.meta['fd'], '%Y-%m-%dT%H:%M:%S.%f')
    test_start = calendar.timegm(test_start.timetuple()) * 10 ** 6 + test_start.microsecond
    net_codes = dd.get_net_codes()
    net_codes.tag.fillna('', inplace=True)

    for case in net_codes.tag.unique():
        case_data = net_codes[net_codes.tag == case]

        if case_data.empty:
            continue
        first_time = case_data.iloc[0].name
        first_time = int(calendar.timegm(datetime.strptime(first_time, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6

        offset = first_time - test_start

        def subtract_first_time(val):
            """
            converts value into timestamp, makes it microsecond-ish
            :param val: str '%Y-%m-%d %H:%M:%S'
            :return:
            """
            return int(calendar.timegm(datetime.strptime(val, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6 - first_time

        data = {
            'job': job['id'],
            'type': 'metrics',
            'lunapark_entity': 'case_net_codes',
            'name': case,
            'offset': offset
        }

        create_data = requests.post(volta_url + 'data/', data=json.dumps(data),
                                    headers={'Content-Type': 'application/json'}).json()

        uniq = create_data['uniq_id']
        len = case_data.index.size
        values = zip(*(
            (str(datetime.fromtimestamp(test_start // 10 ** 6).date()) for _ in range(len)),
            (uniq for _ in range(len)),
            map(subtract_first_time, case_data.index),
            (str(v) for v in case_data['code'].values),
            case_data['cnt'].values,
        ))

        for chunk in grouper(CHUNK_SIZE, values):
            chunk_str = ','.join(repr(v) for v in chunk)
            if not chunk_str:
                break
            sql = '''
                insert into histograms values {}
            '''.format(chunk_str)
            sql = sql.encode('utf-8')

            ch_client.insert(sql)
            time.sleep(1)


def convert_rt_histograms(dd, job):
    test_start = datetime.strptime(dd.meta['fd'], '%Y-%m-%dT%H:%M:%S.%f')
    test_start = calendar.timegm(test_start.timetuple()) * 10 ** 6 + test_start.microsecond
    net_codes = dd.get_rt_ms_hist()
    net_codes.tag.fillna('', inplace=True)

    for case in net_codes.tag.unique():
        case_data = net_codes[net_codes.tag == case]

        if case_data.empty:
            continue
        first_time = case_data.iloc[0].name
        first_time = int(calendar.timegm(datetime.strptime(first_time, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6

        offset = first_time - test_start

        def subtract_first_time(val):
            """
            converts value into timestamp, makes it microsecond-ish
            :param val: str '%Y-%m-%d %H:%M:%S'
            :return:
            """
            return int(calendar.timegm(datetime.strptime(val, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6 - first_time

        data = {
            'job': job['id'],
            'type': 'metrics',
            'lunapark_entity': 'case_rt_histograms',
            'name': case,
            'offset': offset
        }

        create_data = requests.post(volta_url + 'data/', data=json.dumps(data),
                                    headers={'Content-Type': 'application/json'}).json()

        uniq = create_data['uniq_id']
        len = case_data.index.size
        values = zip(*(
            (str(datetime.fromtimestamp(test_start // 10 ** 6).date()) for _ in range(len)),
            (uniq for _ in range(len)),
            map(subtract_first_time, case_data.index),
            case_data['bin'].values,  # l - left bin border
            case_data['bin'].values,  # r - right bin border
            case_data['cnt'].values,
        ))

        for chunk in grouper(CHUNK_SIZE, values):
            chunk_str = ','.join(repr(v) for v in chunk)
            if not chunk_str:
                break
            sql = '''
                insert into distributions values {}
            '''.format(chunk_str)
            sql = sql.encode('utf-8')

            ch_client.insert(sql)
            time.sleep(1)


def convert_monitoring(dd, job):
    """
    (5558, 5750, 7055, 7056, 7057, 7058, 13610, 13808, 13810, 13812, 13831, 13832, 13834, 16574, 16602, 16603, 16638,
    16649, 16672, 16679, 17256, 17754, 17755, 19578, 18524, 19078, 19577)

    :param dd:
    :param job: Job object
    :return:
    """
    test_start = datetime.strptime(dd.meta['fd'], '%Y-%m-%dT%H:%M:%S.%f')
    test_start = calendar.timegm(test_start.timetuple()) * 10**6 + test_start.microsecond
    monitoring = dd.get_monitoring()

    for metric in list(monitoring.groupby(['target_host', 'metric_name'])):

        target_host = metric[0][0]
        metric_name = metric[0][1]

        print('uploading {}@{}'.format(metric_name, target_host))

        metric_data = metric[1]
        if metric_data.empty:
            continue
        first_time = metric_data.iloc[0].name
        first_time = int(calendar.timegm(datetime.strptime(first_time, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6

        offset = first_time - test_start

        def subtract_first_time(val):
            """
            converts value into timestamp, makes it microsecond-ish
            :param val: str '%Y-%m-%d %H:%M:%S'
            :return:
            """
            return int(calendar.timegm(datetime.strptime(val, '%Y-%m-%d %H:%M:%S').timetuple())) * 10**6 - first_time

        data = {
            'job': job['id'],
            'type': 'metrics',
            'name': re.sub('^custom:', '', metric_name),  # RE: remove "custom:" prefix if any
            'group': re.sub('^custom:', '', metric_name).split('_', 1)[0],  # RE: remove "custom:" prefix if any
            'host': target_host,
            'offset': offset
        }

        create_data = requests.post(volta_url + 'data/', data=json.dumps(data),
                                    headers={'Content-Type': 'application/json'}).json()

        uniq = create_data['uniq_id']
        len = metric_data.index.size
        values = zip(*(
            (str(datetime.fromtimestamp(test_start//10**6).date()) for _ in range(len)),
            (uniq for _ in range(len)),
            map(subtract_first_time, metric_data.index),
            (v.tolist()[2] for v in metric_data.values)
        ))

        for chunk in grouper(CHUNK_SIZE, values):
            chunk_str = ','.join(repr(v) for v in chunk)
            if not chunk_str:
                break
            sql = '''
                insert into metrics values {}
            '''.format(chunk_str)
            sql = sql.encode('utf-8')

            ch_client.insert(sql)
            time.sleep(1)


class Command(BaseCommand):
    help = 'migrates lunapark job data to luna db'

    def add_arguments(self, parser):
        parser.add_argument('lunapark_test_id', nargs='+', type=int)

    def handle(self, *args, **options):
        """
        massive test id = 1945334
        smaller test id = 1959694
        :param args:
        :param options:
        :return:
        """
        for lunapark_test_id in options['lunapark_test_id']:
            dd = DataDigger(test_id=lunapark_test_id, mobile_test_id=1, backend='https://lunapark.yandex-team.ru/')
            job_meta = fetch_meta(dd)
            job = create_job_and_meta(job_meta)
            # job = Job.objects.get(id=mobile_test_id)
            # convert_currents(dd, job, job_meta)
            convert_quantiles(dd, job)
            convert_net_codes(dd, job)
            convert_proto_codes(dd, job)
            convert_rt_histograms(dd, job)
            convert_monitoring(dd, job)
            # convert_events(dd, job, job_meta)
