from digger.digger import DataDigger
from pprint import pprint
import itertools
from common.util import ClickhouseClient
from common.models import Job
import requests
import json
from django.core.management.base import BaseCommand
from datetime import datetime
import time
import calendar
import re

# ==============================================================

import pandas as pd
from io import StringIO

LOADDB_NAME = 'loaddb'
CH_LOADDB_COLS = {
    'monitoring_verbose_data_buffer': ['time', 'target_host', 'metric_name', 'value'],
    'net_codes_buffer': ['time', 'tag', 'code', 'cnt'],
    'proto_codes_buffer': ['time', 'tag', 'code', 'cnt'],
    'rt_microsecond_details_buffer': [
        'time', 'tag', 'connect_time_sum', 'send_time_sum', 'latency_sum', 'receive_time_sum',
        'reqps', 'resps', 'threads', 'igress', 'egress', 'self_load'
    ],
    'rt_microsecond_histograms_buffer': ['time', 'tag', 'bin', 'cnt'],
    'rt_quantiles_buffer': ['time', 'tag', 'q50', 'q75', 'q80', 'q85', 'q90', 'q95', 'q98', 'q99', 'q100']
}

VOLTADB_NAME = 'volta'
CH_VOLTA_COLS = {
    'currents': ['uts', 'value'],
    'current': ['time', 'value'],
    'syncs': ['sys_uts', 'log_uts', 'app', 'tag', 'message'],
    'events': ['sys_uts', 'log_uts', 'app', 'tag', 'message'],
    'metrics': ['sys_uts', 'log_uts', 'app', 'tag', 'value'],
    'fragments': ['sys_uts', 'log_uts', 'app', 'tag', 'message'],
    'logentries': ['sys_uts', 'message']
}


class Digger(DataDigger):

    def detect_job_id(self):
        assert any((self.lunapark_mobile_job, self.lunapark_job)), \
            'At least one id required: test_id or mobile_test_id'
        if not self.lunapark_mobile_job and self.lunapark_job:
            self.lunapark_mobile_job = self.meta.get('mobile_job')
            assert self.lunapark_mobile_job, \
                'Failed to automatically detect mobile job id for job: {}'.format(self.lunapark_job)
        elif self.lunapark_mobile_job and not self.lunapark_job:
            self.lunapark_job = self.mobile_meta.get('job')

    def get_volta_data_by_type(self, type):

        if self.mobile_meta.get('version') == '2':
            type = 'currents'
        else:
            type = 'current'
        if type not in CH_VOLTA_COLS:
            return
        path = '/api/dungeon/'
        sys_uts_offset = self.mobile_meta.get('sys_uts_offset', None)
        log_uts_offset = self.mobile_meta.get('log_uts_offset', None)
        if not sys_uts_offset:
            sys_uts_offset = 0
        if not sys_uts_offset:
            log_uts_offset = 0
        payload = """
            SELECT {cols}
            FROM volta.{type}
            WHERE test_id = '{id}'
            FORMAT TSV
        """.format(
            cols=", ".join(
                [self.prepare_payload(col, sys_uts_offset, log_uts_offset) for col in CH_VOLTA_COLS[type]]
            ),
            id=self.mobile_job_id,
            sys_uts_offset=sys_uts_offset,
            type=type,
        )
        query = '{proto}://{host}{path}'.format(
            proto=self.PROTO,
            host=self.backend,
            path=path
        )
        r = requests.post(query, data=payload, verify=False)
        try:
            df = pd.read_csv(
                StringIO(r.text), sep='\t', names=CH_VOLTA_COLS[type], index_col=0
            )
        except pd.errors.EmptyDataError:
            print('Empty data for job: {}'.format(self.lunapark_mobile_job))
        else:
            return df

    def get_lunapark_data_by_type(self, type):
        if not self.job_id:
            return None
        path = '/api/dungeon/'
        payload = """
            SELECT {cols}
            FROM {loaddb_name}.{type}
            WHERE job_id={id}
            FORMAT TSV
        """.format(
            cols=", ".join([col for col in CH_LOADDB_COLS[type]]),
            id=self.lunapark_job,
            loaddb_name=LOADDB_NAME,
            type=type,
        )
        query = '{proto}://{host}{path}'.format(
            proto=self.PROTO,
            host=self.backend,
            path=path
        )
        r = requests.post(query, data=payload, verify=False)
        try:
            df = pd.read_csv(
                StringIO(r.text), sep='\t', names=CH_LOADDB_COLS[type], index_col=0
            )
        except pd.errors.EmptyDataError:
            print('Empty data for job: {}'.format(self.lunapark_mobile_job))
        else:
            return df


# ==============================================================


CHUNK_SIZE = 10000
volta_url = 'http://volta-back.common-int.yandex-team.ru/api/v1/'  # 'http://localhost:8000/api/v1/'

ch_client = ClickhouseClient()


def fetch_meta(dd):
    mobile_job_meta = dd.mobile_meta
    lp_job_meta = dd.meta

    for k in lp_job_meta.keys():
        mobile_job_meta[k] = mobile_job_meta.get(k) or lp_job_meta.get(k)

    return mobile_job_meta


def create_job_meta(job_obj, job_meta):
    for k, v in job_meta.items():
        if k not in ('n', 'resource_uri', 'fd', 'finalized', 'td', 'status', 'test_start'):
            jobmetadata = {
                'job': job_obj.id,
                'key': k,
                'value': v,
            }
            create_meta = requests.post(volta_url + 'job_meta/', data=json.dumps(jobmetadata),
                                        headers={'Content-Type': 'application/json'})
            pprint(create_meta.json())


def grouper(n, iterable):
    it = iterable
    while True:
        yield itertools.islice(it, n)


def convert_currents(dd, job_obj, job_meta):
    test_start = job_obj.test_start

    data = {
        'job': job_obj.id,
        'type': 'metrics',
        'name': 'current',
        'group': 'current',
        'offset': int(job_meta.get('sys_uts_offset', 0)),
    }

    create_data = requests.post(volta_url + 'data/', data=json.dumps(data),
                                headers={'Content-Type': 'application/json'})

    create_data = create_data.json()
    print(create_data)
    currents = dd.get_currents()
    uniq = create_data['uniq_id']
    len = currents.index.__len__()
    values = zip(*(
        (str(datetime.fromtimestamp(test_start // 10 ** 6).date()) for _ in range(len)),
        (uniq for _ in range(len)),
        currents.index,
        (v.tolist()[0] for v in currents.values)
    ))

    for chunk in grouper(CHUNK_SIZE, values):
        chunk_str = ','.join(repr(v) for v in chunk)
        if not chunk_str:
            break
        sql = '''
            insert into metrics values {}
        '''.format(chunk_str)
        sql = sql.encode('utf-8')

        ch_client.insert(sql)
        time.sleep(0.3)


def convert_monitoring(dd, job_obj):
    test_start = job_obj.test_start

    monitoring = dd.get_monitoring()
    if not monitoring:
        return

    for metric in list(monitoring.groupby(['target_host', 'metric_name'])):

        target_host = metric[0][0]
        metric_name = metric[0][1]

        print('uploading {}@{}'.format(metric_name, target_host))

        metric_data = metric[1]

        first_time = metric_data.iloc[1].name
        first_time = int(calendar.timegm(datetime.strptime(first_time, '%Y-%m-%d %H:%M:%S').timetuple())) * 10 ** 6

        offset = first_time - test_start

        def subtract_first_time(val):
            """
            converts value into timestamp, makes it microsecond-ish
            :param val: str '%Y-%m-%d %H:%M:%S'
            :return:
            """
            return int(calendar.timegm(datetime.strptime(val, '%Y-%m-%d %H:%M:%S').timetuple())) * 10 ** 6 - first_time

        data = {
            'job': job_obj.id,
            'type': 'metrics',
            'name': re.sub('^custom:', '', metric_name),  # RE: remove "custom:" prefix if any
            'group': re.sub('^custom:', '', metric_name).split('_', 1)[0],  # RE: remove "custom:" prefix if any
            'host': target_host,
            'offset': offset
        }

        create_data = requests.post(volta_url + 'data/', data=json.dumps(data),
                                    headers={'Content-Type': 'application/json'}).json()

        uniq = create_data['uniq_id']
        len = metric_data.index.__len__()
        values = zip(*(
            (str(datetime.fromtimestamp(test_start // 10 ** 6).date()) for _ in range(len)),
            (uniq for _ in range(len)),
            map(subtract_first_time, metric_data.index),
            (v.tolist()[2] for v in metric_data.values)
        ))

        for chunk in grouper(CHUNK_SIZE, values):
            chunk_str = ','.join(repr(v) for v in chunk)
            if not chunk_str:
                break
            sql = '''
                insert into metrics values {}
            '''.format(chunk_str)
            sql = sql.encode('utf-8')

            ch_client.insert(sql)
            time.sleep(0.3)


class Command(BaseCommand):
    help = 'migrates lunapark+volta job data to luna db'

    def add_arguments(self, parser):
        parser.add_argument('mobile_test_id', nargs='+', type=int)

    def handle(self, *args, **options):
        """
        START WITH 12671
        ENDWITH 21458
        :param args:
        :param options:
        :return:
        """
        for j_n in options['mobile_test_id']:
            dd = Digger(mobile_test_id=j_n, backend='https://lunapark.yandex-team.ru/')
            job_meta = fetch_meta(dd)
            job_obj = Job.objects.get(id=j_n)
            job_obj.test_start = int(job_meta.get('test_start', 0))
            job_obj.status = 'uploaded'
            job_obj.save()
            create_job_meta(job_obj, job_meta)

            convert_currents(dd, job_obj, job_meta)
            convert_monitoring(dd, job_obj)

            print(time.time())
