#!/usr/bin/env python
# -*- coding: utf-8 -*-
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record,
    files as nfi
)

from qb2.api.v1 import extractors as se, filters as sf, resources as qr

import datetime
# import uatraits
import itertools
import json
import requests
import argparse
import copy
import os
import re
from pytils import tabulate, date_range
from collections import Counter, defaultdict


cluster = None
report = 'Video.All/offline_strm'


def parse_tasks(recs):
    import uatraits
    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for rec in recs:
        data = rec.Data
        try:
            obj = json.loads(data)
        except:
            continue

        if not obj:
            continue

        if obj.get('service') == 'YangStreamMonitor':
            platform = 'desktop'
        elif obj.get('service') == 'YangMobileStreamMonitor':
            platform = 'mobile'
        else:
            continue

        tsc = obj.get('ts_client')
        date = str(datetime.datetime.fromtimestamp(
            float(tsc)).isoformat()).split('T')[0]

        ts = obj.get('ts', '-')

        ip = obj.get('ip', '-')
        ua = str(obj.get('ua', '-'))

        d = detector.detect(ua)
        BR = d.get('BrowserName', '-')
        OSF = d.get('OSFamily', '-')

        stream_id = obj.get('stream_id', '-')
        stream_title = obj.get('stream_title', '-')
        assignmentId = obj.get('assignmentId', '-')
        vsid = obj.get('vsid', '-')

        error = obj.get('error', 'no errors')
        error_other = obj.get('error_other', '-')

        ru_errors = [
            'Вместо трансляции - все время заставка канала с музыкой '
            'и рекламные блоки',
            'Нет трансляции, висит заглушка телеканала', 'Ошибка загрузки',
            'висит заглушка канала', 'другой канал',
            'заглушка вместо трансляции',
            'нет звука', 'тормозила картинка'
        ]

        error_ = error
        if isinstance(error, unicode):
            error_ = error.encode('utf8')
        if error_ in ru_errors:
            error_other = error
            error = 'other'

        har = obj.get('har', '-')
        screenshot = obj.get('screenshot', '-')
        comment = obj.get('description', '-')

        yield Record(
            ip=ip,
            ua=ua,
            source='assessors',
            parsed_ua=d,
            browser=BR,
            OS=OSF,
            stream_id=stream_id,
            platform=platform,
            vsid=vsid,
            date=date,
            ts=ts,
            ts_client=tsc,
            error=error,
            har=har,
            screenshot=screenshot,
            comment=comment[:100],
            parsed_jstracer_data=obj,
            stream_title=stream_title,
            assignment_id=assignmentId,
            raw_ips=[ip],
            error_other=error_other
        )


def generate_totals(lst):
    try:
        return [
            tabulate(*x) for x in itertools.product(
                *list(zip(lst, ['_total_'] * len(lst)))
            )
        ]
    except:
        raise Exception(repr(lst))


def to_unicode(lst):
    for e, x in enumerate(lst):
        if isinstance(x, str):
            lst[e] = x.decode('utf8', errors='replace')


def reduce_report(groups):

    checkable = ['Матч!', 'НТВ', 'Пятый канал', 'РЕН ТВ', 'ТНТ']
    error_types = {
        'wrong_schedule': 'CMS', 'dark_screen': 'other', 'seek_fail': 'плеер',
        'wrong_start': 'плеер', 'playlist_slow': 'поток',
        'chunk_slow': 'поток',
        'desync': 'поток', 'chunk_missing': 'поток',
        'playlist_wrong': 'поток',
        'picture_fail': 'поток', 'ads_start': 'реклама',
        'ads_error': 'реклама', 'other': 'other',
        'no errors': 'no errors', 'playing_slow': 'other'
    }

    result = {}
    # assignments_dict = {}
    for key, recs in groups:
        assignments = []
        assignments_with_errors = []
        # errors = 0

        for rec in recs:
            provider = rec.network
            os = rec.OS
            browser = rec.browser
            moderated = rec.moderated
            platform = rec.platform
            source = rec.source

            stream_title = rec.stream_title
            if stream_title in checkable:
                view_time = '20 mins'
            else:
                view_time = '7 mins'

            error_other = rec.error_other
            comment = rec.comment

            try:
                error = rec.error
            except:
                error = 'no errors'

            if not error:
                error = 'no errors'

            error_from_mod = getattr(rec, 'error_from_mod', '')
            if error_from_mod and error_from_mod != error:
                error = error_from_mod

            error_type = error_types.get(error)
            if not error_type:
                error_type = 'unknown'

            if not error_other:
                error_other = '-'

            if not comment:
                comment = error_other
            comment = comment.replace('\t', ' ')
            comment = comment.strip()

            # assID = rec.assignment_id
            # if not assID in assignments:
            #     assignments.append(assID)
            #     ass = 1
            # else:
            #     ass = 0

            # ass_e = 0
            # if error != 'no errors':
            #     errors = 1
            #     if assID not in assignments_with_errors:
            #         assignments_with_errors.append(assID)
            #         ass_e = 1
            # else:
            #     errors = 0

            current_keys = [
                moderated, stream_title, provider,
                os, browser, error, comment, view_time, error_type, platform,
                source
            ]
            to_unicode(current_keys)

            keys = generate_totals(current_keys)

            for k in keys:
                assId = rec.assignment_id
                if k not in result:
                    result[k] = defaultdict(set)
                    result[k]['reported_errors'] = 0
                result[k]['assignments'].add(assId)
                if error != 'no errors':
                    result[k]['reported_errors'] += 1
                    result[k]['assignments_with_errors'].add(assId)

            # keys2 = []
            # ass_keys = [
            #     stream_title, provider,
            #     os, browser, comment, view_time, platform, source
            # ]
            # to_unicode(ass_keys)
            # keys2 = generate_totals(ass_keys)
            # for k in keys2:
            #     if k not in assignments_dict:
            #         assignments_dict[k] = ass
            #     else:
            #         assignments_dict[k] += ass

    # return
    for k in result:
        kk = k.split(u'\t')
        moderated = kk[0]
        stream_title = kk[1]
        network = kk[2]
        os = kk[3]
        browser = kk[4]
        error = kk[5]
        comment = kk[6]
        view_time = kk[7]
        error_type = kk[8]
        platform = kk[9]
        source = kk[10]
        if error_type == u'':
            raise Exception(kk.encode('utf8', errors='replace'))

        t = u'_total_'

        kk1 = copy.deepcopy(kk)
        kk1[0] = t  # moderated
        kk1[4] = t  # browser
        kk1[5] = t  # error
        kk1[8] = t  # error_type
        k1 = tabulate(*kk1)

        assignments = len(result[k1]['assignments'])

        reported_errors = result[k]['reported_errors']
        assignments_with_errors = len(result[k]['assignments_with_errors'])

        yield Record(
            moderated=moderated, stream_title=stream_title,
            network=network, os=os, browser=browser, platform=platform,
            error=error, comment=comment[:100], source=source,
            channel_type=view_time, error_type=error_type,
            assignments=assignments, reported_errors=reported_errors,
            assignments_with_errors=assignments_with_errors
        )


def add_weights(groups):
    for key, recs in groups:

        d = {}

        for rec in recs:
            moderated = rec.moderated
            stream_title = rec.stream_title
            network = rec.network
            os = rec.os
            browser = rec.browser
            platform = rec.platform
            error = rec.error
            comment = rec.comment
            channel_type = rec.channel_type
            error_type = rec.error_type
            assignments = rec.assignments
            reported_errors = rec.reported_errors
            assignments_with_errors = rec.assignments_with_errors
            vsid_share = rec.vsid_share
            source = rec.source

            k_ = [
                moderated,
                stream_title,
                network,
                os,
                browser,
                error,
                comment,
                channel_type,
                error_type,
                platform,
                source
            ]

            to_unicode(k_)
            k = tabulate(*k_)

            weighted_assignments_with_errors = round(
                float(assignments_with_errors) * vsid_share, 2
            )

            if assignments > 0:
                weighted_total_error_share = round(
                    weighted_assignments_with_errors / float(assignments), 2
                )
            else:
                weighted_total_error_share = 0

            if k not in d:
                d[k] = {}
            d[k]['assignments'] = assignments
            d[k]['reported_errors'] = reported_errors
            d[k]['assignments_with_errors'] = assignments_with_errors
            d[k]['vsid_share'] = vsid_share

            if network != '_total_':
                d[k]['weighted_assignments_with_errors'] = (
                    weighted_assignments_with_errors
                )
                d[k]['weighted_total_error_share'] = weighted_total_error_share

                k_t = copy.deepcopy(k_)
                k_t[2] = u'_total_'  # network
                k1 = tabulate(*k_t)

                if k1 not in d:
                    d[k1] = {}
                if 'weighted_assignments_with_errors' in d[k1]:
                    d[k1]['weighted_assignments_with_errors'] += (
                        weighted_assignments_with_errors
                    )
                    d[k1]['weighted_total_error_share'] += (
                        weighted_total_error_share
                    )
                    d[k1]['vsid_share'] += vsid_share
                else:
                    d[k1]['weighted_assignments_with_errors'] = (
                        weighted_assignments_with_errors
                    )
                    d[k1]['weighted_total_error_share'] = (
                        weighted_total_error_share
                    )
                    d[k1]['vsid_share'] = vsid_share

        for kk in d:
            k = kk.split(u'\t')
            moderated = k[0]
            stream_title = k[1]
            network = k[2]
            os = k[3]
            browser = k[4]
            error = k[5]
            comment = k[6]
            channel_type = k[7]
            error_type = k[8]
            platform = k[9]
            source = k[10]
            if channel_type == "":
                raise Exception(kk.encode('utf8', errors='replace'))

            assignments = d[kk]['assignments']
            reported_errors = d[kk]['reported_errors']
            assignments_with_errors = d[kk]['assignments_with_errors']
            vsid_share = d[kk]['vsid_share']
            weighted_assignments_with_errors = d[kk][
                'weighted_assignments_with_errors']
            weighted_total_error_share = d[kk]['weighted_total_error_share']

            yield Record(
                moderated=moderated, stream_title=stream_title,
                network=network, os=os, browser=browser,
                source=source,
                error=error, comment=comment[:100], channel_type=channel_type,
                error_type=error_type,
                platform=platform,
                assignments=assignments, reported_errors=reported_errors,
                assignments_with_errors=assignments_with_errors,
                vsid_share=vsid_share,
                weighted_assignments_with_errors=(
                    weighted_assignments_with_errors
                ),
                weighted_total_error_share=weighted_total_error_share
            )


def get_as(ip, ip_origins):
    if ip:
        try:
            asn = ip_origins.region_by_ip(ip)[0]
        except:
            asn = '-'

        return asn
    else:
        return '-'


js_tracer_root = '//home/js_tracer/day_by_day'


def get_stat_headers():
    return {
        'StatRobotUser': os.environ['STAT_LOGIN'],
        'StatRobotPassword': os.environ['STAT_TOKEN']
    }


def process_date(date, report_):
    date_ = date.strftime('%Y-%m-%d')
    reversed_date = date.strftime('%d-%m-%Y')

    networks = {}
    with open('networks.txt') as f:
        tt = f.read().split('\n')

    for line in tt:
        try:
            t = line.split('\t')
            networks[t[0]] = t[1]
        except:
            continue

    job = cluster.job()

    jstracer = job.table(
        '{}/{}'.format(js_tracer_root, reversed_date)
    )

    t = jstracer.map(
        parse_tasks,
        intensity='large_data'
    ).project(
        ne.all(),
        AS=ne.custom(
            lambda x, y: get_as(x, y), 'ip', qr.resource('IpOrigins')
        ),
        network=ne.custom(
            lambda x: networks[x] if x in networks else 'other', 'AS'
        )
    ).put('$job_root/PARSED_TASKS/parsed_tasks_{}'.format(date))

    # tasks.groupby('vsid','date','error','assignment_id','stream_title')
    moderated = job.concat(
        job.table(
            'home/assessor-production/monitoring_tv/bugs/desktop'
            if date <= datetime.date(2018, 5, 2) else
            'home/assessor-production/monitoring_tv/all_bugs/desktop'
        ),
        job.table(
            'home/assessor-production/monitoring_tv/bugs/mobile'
            if date <= datetime.date(2018, 5, 2) else
            'home/assessor-production/monitoring_tv/all_bugs/mobile'
        )
    ).filter(
        nf.custom(
            lambda x: datetime.datetime.fromtimestamp(x or 0).date() == date,
            'ts_client'
        )
    ).project(
        'vsid', 'error', 'assignment_id', 'stream_title',
        'ts_client', moderated=ne.const('moderated')
    ).put('$job_root/PARSED_TASKS/parsed_tasks_{}_moderated'.format(date))

    j = t.join(
        moderated, by=(
            # 'vsid',
            'assignment_id', 'stream_title', 'ts_client'
        ),
        type='left'
    ).project(
        ne.all(),
        moderated=ne.custom(
            lambda x: x if x else 'not moderated', 'moderated'
        )
    )

    tasks = j.put('$job_root/with_moderated_flag')
    # tasks = job.table('$job_root/with_moderated_flag')

    tt = tasks.groupby(
        'stream_title', 'os', 'browser', 'platform',
        'error', 'comment', 'channel_type', 'error_type', 'source'
    ).reduce(
        reduce_report,
        files=[nfi.LocalFile('pytils.py')],
        memory_limit=8000
    ).put(
        '$job_root/REPORTS/report_{}'.format(date)
    )

    weight = job.table(
        'home/bs/users/myshell/videohosting/shares_OSFamily'
    ).filter(
        sf.equals('Date', date_)
    ).project(
        vsid_share='VSID_share',
        network=ne.custom(
            lambda x: '_total_' if x == 'all' else x, 'Provider'
        ),
        os=ne.custom(
            lambda x: '_total_' if x == 'all' else x, 'OS'
        )
    ).put('$job_root/current_weights')

    j = tt.join(
        weight, by=('network', 'os'), type='left'
    )
    j1 = j.filter(sf.defined('vsid_share'))
    j2 = j.filter(
        sf.not_(sf.defined('vsid_share'))).project(
        ne.all(), vsid_share=ne.const(1.0)
    )

    report = job.concat(
        j1, j2
    ).groupby(
        'moderated', 'stream_title', 'os', 'browser', 'platform',
        'error', 'comment', 'channel_type', 'error_type', 'source'
    ).reduce(
        add_weights, files=[nfi.LocalFile('pytils.py')],
        memory_limit=8000
    ).project(
        ne.all(),
        fielddate=ne.const(date_),
    ).put(
        '$job_root/REPORTS/report_{}'.format(date)
    )

    report.filter(
        sf.equals('moderated', '_total_'),
        sf.equals('stream_title', '_total_'),
        sf.equals('network', '_total_'),
        sf.equals('os', '_total_'),
        sf.equals('browser', '_total_'),
        sf.equals('error', '_total_'),
        sf.equals('comment', '_total_'),
        sf.equals('channel_type', '_total_'),
        sf.equals('error_type', '_total_')
    ).put('$job_root/check_report_final')

    job.run()

    client = ns.StatfaceClient(
        proxy='upload.stat.yandex-team.ru',
        username=os.environ['STAT_LOGIN'],
        password=os.environ['STAT_TOKEN']
    )

    report = ns.StatfaceReport().path(
        report_
    ).scale('daily')

    report = report.client(client)

    report = report.data(
        cluster.read('$job_root/REPORTS/report_{}'.format(date))
    )

    report.publish()


def get_date(s):
    try:
        return datetime.datetime.strptime(
            re.search(r'[0-9]{4}-[0-9]{2}-[0-9]{2}', s).group(0),
            '%Y-%m-%d'
        ).date()
    except (ValueError, TypeError, AttributeError):
        return


def jst_get_date(s):
    try:
        return datetime.datetime.strptime(
            re.search(r'[0-9]{2}-[0-9]{2}-[0-9]{4}', s).group(0),
            '%d-%m-%Y'
        ).date()
    except (ValueError, TypeError, AttributeError):
        return


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--pool', default='videolog')
    parser.add_argument('--report', default=report)
    parser.add_argument('--from')
    parser.add_argument('--to')
    args = parser.parse_args()
    global cluster

    from_ = getattr(args, 'from')
    to_ = getattr(args, 'to')

    cluster = clusters.yt.Hahn(
        token=os.environ['YT_TOKEN'],
        pool=args.pool
    ).env(
        templates=dict(
            job_root='//home/videolog/offline_tv_monitoring/assessors'
        ),
        parallel_operations_limit=10
    )

    if from_ and to_:
        dates_to_process = date_range(from_, to_)
    else:
        headers = get_stat_headers()
        print('getting dates from report')
        dimensions = [
            'platform', 'moderated', 'stream_title', 'channel_type',
            'network', 'os', 'browser', 'error_type', 'comment'
        ]
        dim_totals = '&'.join(
            '{}=_total_'.format(x) for x in dimensions
        )
        req = requests.get(
            'https://upload.stat.yandex-team.ru/{}?{}&_type=json'.format(
                report, dim_totals
            ),
            headers=headers, verify=False
        )
        print('parsing response')

        values = sorted(
            req.json()['values'], key=lambda x: x['fielddate'], reverse=True
        )
        last_date = get_date(
            values[0]['fielddate'].split(' ')[0]
        )

        print('last date: {}'.format(last_date))

        available_dates = sorted(
            jst_get_date(s) for s in cluster.driver.client.search(
                root=js_tracer_root, node_type="table"
            ) if jst_get_date(s) and jst_get_date(s) < datetime.date.today()
        )

        print('last available date: {}'.format(available_dates[-1]))

        if last_date:
            dates_to_process = [
                x for x in available_dates if x > last_date
            ]
        else:
            dates_to_process = available_dates

    print('dates to process: {}'.format(dates_to_process))

    for date in dates_to_process:
        process_date(date, args.report)


if __name__ == "__main__":
    main()
