import crypta.audience.lib.tasks as tasks
import yt.wrapper as yt
import retry

import logging
logger = logging.getLogger(__name__)


QUANTILES = [0.5, 0.95, 1.0]
SEGMENT_TYPE = 'audience'


def size(dir_):
    return dir_.count


@retry.retry(exceptions=yt.errors.YtHttpResponseError, tries=10)
def size_batched(batches):
    return sum(batch.batch_size for batch in batches)


def extract_time_stats(state_storage, segment_type=None):
    processing_time = state_storage.get_processing_time_q(quantiles=QUANTILES, segment_type=segment_type)
    unfinished_processing_time = state_storage.get_unfinished_processing_time_q(quantiles=QUANTILES, segment_type=segment_type)
    return processing_time, unfinished_processing_time


def _extend_with_percentiles(points, quantiles, prefix):
    result = list(points)
    for p, time in quantiles.items():
        percent = int(100*p)
        result.append(
            ('{}_p{}'.format(prefix, percent), time)
        )
    return result


def extend_with_time_stats(points, state_storage, segment_type=None):
    processing_time_q, unfinished_processing_time_q = extract_time_stats(state_storage, segment_type)

    points = _extend_with_percentiles(
        points, processing_time_q, 'processing_time'
    )
    points = _extend_with_percentiles(
        points, unfinished_processing_time_q, 'unfinished_processing_time'
    )
    return points


def extend_with_lookalike_stats(points, stats_storage):
    mean_by_input_time_per_day, mean_by_output_time_per_day, max_by_input_time_per_day, max_by_output_time_per_day, count_per_day = stats_storage.get_stats(days=1)
    mean_by_input_time_all, mean_by_output_time_all, max_by_input_time_all, max_by_output_time_all, count_all = stats_storage.get_stats(days=None)

    points.extend(
        [
            ('mean_rate_by_input_time_per_day', mean_by_input_time_per_day),
            ('mean_rate_by_output_time_per_day', mean_by_output_time_per_day),
            ('max_rate_by_input_time_per_day', max_by_input_time_per_day),
            ('max_rate_by_output_time_per_day', max_by_output_time_per_day),
            ('count_per_day', count_per_day),
            ('mean_rate_by_input_time', mean_by_input_time_all),
            ('mean_rate_by_output_time', mean_by_output_time_all),
            ('max_rate_by_input_time', max_by_input_time_all),
            ('max_rate_by_output_time', max_by_output_time_all),
            ('count', count_all)
        ]
    )
    return points


def size_if_exists(directory):
    if directory.exists():
        return directory.size
    else:
        return 0


def metrics(client):
    paths = tasks.lookalike.paths.Paths(client)

    input_size = size_if_exists(paths.input_segments)
    output_size = size_if_exists(paths.output_segments)
    batches_size = size_batched(paths.batched_inputs)
    waiting_size = sum(
        each.attributes['row_count']
        for each in client.search(
            root=paths.waiting_segments,
            path_filter=lambda x: x.endswith('Meta'),
            attributes=['row_count'],
        )
    )

    points = [
        ('input_size', input_size),
        ('output_size', output_size),
        ('batches_size', batches_size),
        ('waiting_size', waiting_size),
    ]

    points = extend_with_time_stats(points, tasks.audience.tables.LookalikeSegmentStateStorage(client), SEGMENT_TYPE)

    points = extend_with_lookalike_stats(points, tasks.audience.tables.LookalikeStatsStorage(client))

    return points
