# coding: utf-8

from __future__ import division

import datetime
import logging
import re
import time

import yt.wrapper as yt
from bm.yt_tools import get_attribute
from bannerland.yql.tools import do_yql, list_yql_result
from os.path import basename

from irt.bannerland.options import get_option
from irt.monitoring.solomon.sensors import SolomonAgentSensorsClient

from bannerland.archive_workers.common import BLYTWorker
from bannerland.archive_workers.full_state import FSWorker
from sandbox.projects.common import requests_wrapper

CHUNK_COUNT_YT_ATTR = 'chunk_count'
RESOURCE_USAGE_YT_ATTR = 'resource_usage'
DISK_SPACE_YT_SUBATTR = 'disk_space'


# Возвращает datetime сенсора в формате Соломона, принимаю на вход время-наименование папки
def get_ts_datetime(dir_time):
    time_format = get_option('bannerland_pocket_name_format')
    ts_datetime = datetime.datetime.strptime(dir_time, time_format)
    ts_datetime += datetime.timedelta(0, time.timezone)
    return str(ts_datetime)


def get_input_bf_query(bf_table):
    yql_query = """
        SELECT
            *
        FROM
            `{bf_table}`
    """.format(bf_table=bf_table)
    return yql_query


def get_active_fs_info(fs_info, task_type):
    if task_type == 'perf':
        active_tasks_yt_table_option = 'perf_tasks_source_table'
    elif task_type == 'dyn':
        active_tasks_yt_table_option = 'dyn_tasks_source_table'
    else:
        raise ValueError('bad task_type = %s', task_type)
    active_tasks_yt_table = get_option(active_tasks_yt_table_option)

    if task_type == 'perf' or task_type == 'dyn':
        active_tasks_yql_query = """
            SELECT
                Cast(OrderID as String) || '_' || Cast(GroupingID as String) as task_id
            FROM
                `{tasks_yt_table}`
        """.format(tasks_yt_table=active_tasks_yt_table)
    else:
        raise NotImplementedError('monitor_fs not implemented for task_type={}'.format(task_type))

    active_req_template = """
        SELECT
            *
        FROM
            `{fs_info}` as fs_info
            LEFT SEMI JOIN
            ({active_fs_info}) as active_fs_info
        USING(
            `{join_column}`
        )
    """

    res_info = {
        'banners': active_req_template.format(
            fs_info=fs_info['banners'],
            active_fs_info=active_tasks_yql_query,
            join_column='task_id'
        )
    }
    res_info['phrases'] = active_req_template.format(
        fs_info=fs_info['phrases'],
        active_fs_info=res_info['banners'],
        join_column='BannerID'
    )

    return res_info


def get_counts_query(banners_fs, phrases_fs):
    yql_query = """
        $tao_count = (
            SELECT
                COUNT(*) AS taskoffers_count
            FROM (
                SELECT
                    DISTINCT task_id, row_id
                FROM
                    ({banners_fs})
            )
        );

        $phrases_count = (
        SELECT
            COUNT(*)
        FROM
            ({phrases_fs})
        );

        $counts = (
            SELECT
                $phrases_count AS phrases_count,
                $tao_count as taskoffers_count,
                COUNT(DISTINCT Domain) AS domains_count,
                COUNT(DISTINCT OrderID) AS orders_count,
                COUNT(DISTINCT task_id) AS tasks_count,
                COUNT(DISTINCT BannerID) AS children_count
            FROM
                ({banners_fs}));
    """.format(banners_fs=banners_fs, phrases_fs=phrases_fs)
    yql_get_res_query = """
        SELECT TableRow() as res FROM $counts
    """
    return yql_query, yql_get_res_query


def get_sd_stat_query(phrase_fs):
    yql_query = """
        $phrases_count = (SELECT COUNT(*) FROM ({phrase_fs}));
        $sd_percents = (
            SELECT
                'SD_' || cast(SimDistance as String) || '_percent' AS key,
                100.0 * COUNT(*) / $phrases_count AS val
            FROM
                ({phrase_fs})
            GROUP BY SimDistance);
    """.format(phrase_fs=phrase_fs)
    yql_get_res_query = """
        SELECT TableRow() as res FROM $sd_percents
    """
    return yql_query, yql_get_res_query


def get_retargs_stat_query(phrase_fs):
    yql_query = """
        $phrases_count = (SELECT COUNT(*) FROM ({phrase_fs}));
        $retarg_percent = (
            SELECT
                100.0 * COUNT(*) / $phrases_count AS retargeting_percent
            FROM
                ({phrase_fs})
            WHERE
                `NormType` == "offer");
        $new_audience_percent = (SELECT  100.0 - $retarg_percent as new_audience_percent);
    """.format(phrase_fs=phrase_fs)
    yql_get_res_query = """
        SELECT TableRow() as res FROM $retarg_percent
        UNION ALL
        SELECT TableRow() as res FROM $new_audience_percent
    """
    return yql_query, yql_get_res_query


def get_new_auditory_retarg_banners_percent_stat_query(phrase_fs):
    yql_query = """
        $banners_count = (SELECT COUNT(DISTINCT BannerID) FROM ({phrase_fs}));
        $new_auditory_retarg_banners_percent = (
            SELECT
                100.0 * COUNT_IF(new_audience_phid_cc > 0) / $banners_count AS new_auditory_banners_percent,
                100.0 * COUNT_IF(retarg_phid_cc > 0) / $banners_count AS retarg_banners_percent
            FROM (
                SELECT
                    BannerID,
                    COUNT_IF(retarg_cc = 0) AS new_audience_phid_cc,
                    COUNT_IF(retarg_cc > 0) AS retarg_phid_cc
                FROM (
                    SELECT
                        BannerID,
                        PhraseID,
                        COUNT_IF(`NormType` == "offer") AS retarg_cc
                    FROM
                        ({phrase_fs})
                    GROUP BY
                        BannerID, PhraseID
                )
                GROUP BY
                    BannerID
            ));
    """.format(phrase_fs=phrase_fs)
    yql_get_res_query = """
        SELECT TableRow() as res FROM $new_auditory_retarg_banners_percent
    """
    return yql_query, yql_get_res_query


def gen_calc_query(get_metric_tuples):
    calc_metrics_yql_query_arr = []
    select_res_yql_query_arr = []
    for yql_q, yql_res_q in get_metric_tuples:
        calc_metrics_yql_query_arr.append(yql_q)
        select_res_yql_query_arr.append(yql_res_q)

    calc_metrics_yql_query = '\n'.join(calc_metrics_yql_query_arr)
    select_res_yql_query = '\nUNION ALL'.join(select_res_yql_query_arr)
    return calc_metrics_yql_query + select_res_yql_query


def get_metrics_from_yql_result(yql_result):
    cur_metrics = {}
    logging.debug('yql_res=%', yql_result)
    for row in yql_result:
        res = row['res'].__dict__
        if res.get('key'):
            key = res['key']
            cur_metrics[key] = res['val']
        else:
            cur_metrics.update({k: v for k, v in res.items() if v is not None})
    return cur_metrics


def send_metrics_to_solomon(metrics, fs_datetime):
    solomon_sensors = []
    for sensor, value in metrics.items():
        solomon_sensors.append({
            'ts_datetime': fs_datetime,
            'cluster': 'yt_hahn',
            'service': 'bannerland_yt',
            'sensor': sensor,
            'value': value,
        })

    solomon_client = SolomonAgentSensorsClient()
    push_solomon_res = solomon_client.push_sensors(solomon_sensors)
    if push_solomon_res != 0:
        raise ValueError('ERROR: Fail with sending FS sensors')


# Мониторинг карманов
class MonitorWorker(BLYTWorker):
    def __init__(self, attr_name, is_avatars_enabled=True, **kwargs):
        super(MonitorWorker, self).__init__(attr_name=attr_name, **kwargs)
        self.is_avatars_enabled = is_avatars_enabled

    def do_work(self, pocket):

        logging.warning('do_work for pocket %s', pocket)

        yt_client = self.yt_client
        sensor_prefix = '{}.make_banners'.format(self.task_type)

        cur_metrics = dict()

        tasks_yt_table = yt.ypath_join(pocket, 'tasks.final')
        tao_yt_table = yt.ypath_join(pocket, 'tasks_and_offers')
        bf_yt_table = yt.ypath_join(pocket, 'generated_banners.final')

        total_tasks = yt_client.row_count(tasks_yt_table)
        cur_metrics['pocket_tasks_count'] = total_tasks
        cur_metrics['pocket_offers_count'] = yt_client.row_count(tao_yt_table)
        cur_metrics['pocket_phrases_count'] = yt_client.row_count(bf_yt_table)

        del_tasks = 0
        for row in yt_client.read_table(tasks_yt_table):
            if row['banners_count'] == 0:
                del_tasks += 1
        cur_metrics['pocket_del_tasks_ratio'] = del_tasks / total_tasks if total_tasks > 0 else 0.0

        steps_duration = get_attribute(pocket, 'steps_duration', yt_client)
        for step, sec in steps_duration.items():
            name = 'pocket_step_{}_duration'.format(step)
            cur_metrics[name] = sec

        if self.is_avatars_enabled:
            avatars_cache_hit_yt_table = yt.ypath_join(pocket, 'avatars.cache_hit')
            avatars_cache_miss_yt_table = yt.ypath_join(pocket, 'avatars.cache_miss')
            if self.yt_client.exists(avatars_cache_hit_yt_table):
                avatars_hit = yt_client.row_count(avatars_cache_hit_yt_table)
                avatars_miss = yt_client.row_count(avatars_cache_miss_yt_table)
                if avatars_hit + avatars_miss > 0:
                    cur_metrics['pocket_avatars_miss_percent'] = 100.0 * avatars_miss / (avatars_hit + avatars_miss)
                else:
                    cur_metrics['pocket_avatars_miss_percent'] = 0
                    logging.warning('The number of avatars is 0')
            else:
                cur_metrics['pocket_avatars_miss_percent'] = 100

        logging.warning('Metrics for the pocket: %s', cur_metrics)

        metrics_with_prefix = {'{}.{}'.format(sensor_prefix, key): val for key, val in cur_metrics.items()}

        pocket_datetime = get_ts_datetime(basename(pocket))
        send_metrics_to_solomon(metrics_with_prefix, pocket_datetime)


# Мониторинг FS
class MonitorFSWorker(FSWorker):

    def do_work(self, fs):
        logging.warning('do_work for fullstate %s', fs)

        yt_client = self.yt_client
        task_type = self.task_type
        sensor_prefix = '{}.full_state'.format(task_type)

        fs_datetime = get_ts_datetime(basename(fs))
        fs_info = {
            'banners': yt.ypath_join(fs, 'final', 'banners'),
            'phrases': yt.ypath_join(fs, 'final', 'phrases')
        }

        # Массив, куда мы складываем все искомые метрики, которые после отправим единым батчем
        cur_metrics = {}

        # Кол-во занимаемых FullState-ом чанок и дискового пространства на YT
        cur_metrics['chunks_count'] = sum(get_attribute(fs_info[x], CHUNK_COUNT_YT_ATTR, yt_client) for x in fs_info)
        cur_metrics['gb_space'] = sum(get_attribute(fs_info[x], RESOURCE_USAGE_YT_ATTR, yt_client)[DISK_SPACE_YT_SUBATTR] for x in fs_info) / (2 ** 30)
        metrics_with_prefix = {'{}.{}'.format(sensor_prefix, key): val for key, val in cur_metrics.items()}

        active_fs_info = get_active_fs_info(fs_info, task_type)
        select_all_template = "SELECT * FROM `{}`"
        fs_info = {x: select_all_template.format(fs_info[x]) for x in fs_info}

        for sensor_header, fs_info in [
            [sensor_prefix, fs_info],
            [sensor_prefix + '_active', active_fs_info],
        ]:
            phrase_fs = fs_info['phrases']
            banners_fs = fs_info['banners']

            get_metric_tuples = [
                get_counts_query(banners_fs, phrase_fs),
                get_sd_stat_query(phrase_fs),
                get_retargs_stat_query(phrase_fs),
                get_new_auditory_retarg_banners_percent_stat_query(phrase_fs),
            ]

            calc_metrics_yql_query = gen_calc_query(get_metric_tuples)

            yql_result = list_yql_result(do_yql(self.yql_client, calc_metrics_yql_query, yt_pool=self.yt_pool))
            new_metrics = get_metrics_from_yql_result(yql_result)
            logging.warning('Metrics for the FS: %s', new_metrics)
            metrics_with_prefix.update({'{}.{}'.format(sensor_header, key): value for key, value in new_metrics.items()})

        send_metrics_to_solomon(metrics_with_prefix, fs_datetime)


# Статистика по количеству ошибок/warning-ов на YT и в Sandbox с точностью до секунды (с бó‎льшими точностями в Соломоне могут быть проблемы)
class LogWorker(BLYTWorker):

    def mapper(self, row):
        if row['log'] is None:
            return
        if (row['log'].startswith(('ERR:', 'ERROR:'))) or (row['ns'].startswith('error')):
            err_type = 'error'
        elif row['log'].startswith(('WARN:', 'WARNING:')):
            err_type = 'warning'
        else:
            return
        yield {'err_type': err_type}

    def reducer(self, key, rows):
        yield {'err_type': key['err_type'], 'cc': sum(1 for _ in rows)}

    def do_work(self, pocket):
        solomon_sensors = []
        pocket_ts_datetime = get_ts_datetime(basename(pocket))

        # Смотрим логи в YT (таблица 'log_merged')
        with self.yt_client.TempTable() as tmp:
            self.yt_client.run_map_reduce(
                self.mapper,
                self.reducer,
                yt.ypath_join(pocket, 'log_merged'),
                tmp,
                reduce_by=['err_type']
            )

            warn_err_counter = {'error': 0, 'warning': 0}
            for row in self.yt_client.read_table(tmp):
                warn_err_counter[row['err_type']] = row['cc']

            for err_type, cc in warn_err_counter.items():
                solomon_sensors.append({
                    'cluster': 'yt_hahn',
                    'service': 'scripts',
                    'sensor': 'logs_count',
                    'labels': {
                        'task_type': self.task_type,
                        'err_type': err_type,
                    },
                    'value': cc,
                    'ts_datetime': pocket_ts_datetime,
                })

        # Смотрим логи в тасках Sandbox-а, которые отрабатывали на этом кармане
        warn_cc, err_cc = 0, 0
        max_dt = None

        re_dt_format = re.compile(r'\d{4}-\d{2}-\d{2}\s+\d{2}\:\d{2}\:\d{2}')
        re_err_format = re.compile(r'\s+(ERROR(\s|\:)|ERR\:)')
        re_warn_format = re.compile(r'\s+(WARNING(\s|\:)|WARN\:)')

        for sandbox_task_id in self.yt_client.get(pocket, attributes=['sandbox_task_ids']).attributes.get('sandbox_task_ids', []):
            req_result = requests_wrapper.get('https://proxy.sandbox.yandex-team.ru/last/TASK_LOGS/common.log?task_id={}'.format(sandbox_task_id))

            if req_result.status_code == 404:
                logging.warning('Not found logs for Sandbox task with id %s: %s', sandbox_task_id, req_result.content)
                continue
            elif req_result.status_code != 200:
                raise Exception('Failed to download Sandbox log (task_id={}, response_code={})'.format(sandbox_task_id, req_result.status_code))

            for row in req_result.text.split('\n'):
                dt = re_dt_format.findall(row)
                if len(dt) == 0:
                    continue
                dt = datetime.datetime.strptime(dt[-1], '%Y-%m-%d %H:%M:%S') + datetime.timedelta(0, time.timezone)
                max_dt = max(max_dt, dt) if (max_dt is not None) else dt

                if re_err_format.search(row):
                    err_cc += 1
                elif re_warn_format.search(row):
                    warn_cc += 1

        max_dt = pocket_ts_datetime if max_dt is None else str(max_dt)
        for err_type, cc in [('error', err_cc), ('warning', warn_cc)]:
            solomon_sensors.append({
                'cluster': 'sandbox',
                'service': 'scripts',
                'sensor': 'logs_count',
                'labels': {
                    'task_type': self.task_type,
                    'err_type': err_type,
                },
                'value': cc,
                'ts_datetime': max_dt,
            })

        if SolomonAgentSensorsClient().push_sensors(solomon_sensors):
            raise Exception('Fail to send Solomon sensors!')
