# coding: utf-8
import argparse
import math
import time
from functools import partial
from os import listdir
from os.path import isfile, join

from nile.api.v1 import (
    aggregators as na,
    extractors as ne,
    filters as nf,
    clusters,
    files
)
from qb2.api.v1 import (
    filters as qf,
    extractors as qe
)

from common import TEMPLATES
from geo_utils import get_bounds_distance
from metrics_conditions import CLICKS
from metrics_utils import *


def get_all_answers(
        #     bounds,
        ll,
        spn,
        request_rubrics,
        date,
        answers_and_metrics,
        rubric2permalinks,
        permalink2info
):
    # lon1, lat1, lon2, lat2 = map(float, bounds.split(','))
    try:
        lon, lat = map(float, ll.split(','))
        lon_diff, lat_diff = map(float, spn.split(','))
    except ValueError:
        return []
    lat1, lon1, lat2, lon2 = lat - lat_diff / 2, lon - lon_diff / 2, lat + lat_diff / 2, lon + lon_diff / 2
    # границы россии 41.185, 19.639, 81.843, 179.999
    # if not (lat1 >= 41.185 and lat2 <= 81.843 and lon1 >= 19.639 and lon2 <= 179.999):
    #     return []

    permalinks = set()
    for backa_rubric_id in request_rubrics.split(','):
        permalinks.update(
            rubric2permalinks.get(date, {}).get(backa_rubric_id, [])
        )
    all_answers = dict()
    # Из всех рекламных пермалинков ищем те, которые могли быть показаны
    for permalink in permalinks:
        company_info = permalink2info.get(date, {}).get(permalink)
        if company_info and lat1 <= company_info['lat'] <= lat2 and lon1 <= company_info['lon'] <= lon2:
            all_answers[permalink] = dict(
                is_potential_answer=True,
                permalink=permalink,
                **company_info
            )

    target_permalinks = set(permalink2info.get(date, {}).keys())
    # Пробегаемся по ответам и дополняем инфу про клики и позицию для реально показанных рекламодателей
    for ans in answers_and_metrics:
        try:
            permalink = int(ans.get('id'))
        except ValueError:
            continue
        if permalink not in target_permalinks:
            continue
        fact_answer_info = dict(
            permalink=permalink,
            metrics=ans.get('metrics'),
            clicks=ans.get('clicks'),
            position=ans.get('position'),
            base=ans.get('base'),
            backa_rubrics=ans.get('rubric_res').split(','),
            is_fact_answer=True,
        )
        potential_answer = all_answers.get(permalink)
        if potential_answer is not None:
            potential_answer.update(
                fact_answer_info
            )
        else:
            all_answers[permalink] = fact_answer_info

    return all_answers.values()


def count_clicks(clicks, conditions):
    clicks_count = defaultdict(int)
    for click in clicks:
        for click_name, click_hook in conditions.iteritems():
            if click_hook(click):
                clicks_count[click_name] += 1
    if 'deep_use' in clicks_count and 'good_use' not in clicks_count:
        clicks_count['good_use'] = 1
    return dict(clicks_count)


def add_metrics_for_answers(answers_and_clicks, service):
    conditions = CLICKS[service]
    for answer in answers_and_clicks:
        metrics = count_clicks(answer.pop('clicks', []), conditions)
        answer['metrics'] = metrics
    return answers_and_clicks


def create_stream(job, service, rubric2permalinks, permalink2info, debug_input, debug_output):
    # rubric_requests_with_target_answers_and_metrics =
    #  Получаем рубрчные запросы из геокуба
    rubric_requests = job.table(
        '$geocube/@dates/$service'
    ).debug_input(
        debug_input
    ).filter(
        nf.equals('usecase', 'search'),
        nf.equals('bus_query_type', 'rubric'),
        # intensity='cpu'
    ).project(
        ne.all(
            exclude=['yandex_login', 'test_buckets', "suggest", "suggested_rubrics_selected", 'iot', 'msp', 'serp_url',
                     'slices']
        )
    ).filter(
        qf.nonzero('request_rubrics', 'll', 'spn'),
    )

    #  Считаем метрики для всех ответов и суммируем их для запроса
    requests_metrics = rubric_requests.project(
        ne.all(
            exclude=['answers_and_clicks']
        ),
        service=ne.const(service),
        # Определяем кликовые метрики (good_use, deep_use, ...)
        # TODO: нужно периодически обновлять пути дипъюзов
        answers_and_metrics=ne.custom(
            add_metrics_for_answers,
            'answers_and_clicks',
            'service'
        ),
        request_good_use_count=ne.custom(
            lambda answers_and_metrics: sum((ans['metrics'].get('good_use', 0) for ans in answers_and_metrics))
        ),
        request_good_use_map_count=ne.custom(
            lambda answers_and_metrics: sum((ans['metrics'].get('good_use_map', 0) for ans in answers_and_metrics))
        ),
        request_good_use_serp_count=ne.custom(
            lambda answers_and_metrics: sum((ans['metrics'].get('good_use_serp', 0) for ans in answers_and_metrics))
        ),
        request_deep_use_count=ne.custom(
            lambda answers_and_metrics: sum((ans['metrics'].get('deep_use', 0) for ans in answers_and_metrics))
        ),
        request_make_route_count=ne.custom(
            lambda answers_and_metrics: sum((ans['metrics'].get('make_route', 0) for ans in answers_and_metrics))
        ),
        request_open_site_count=ne.custom(
            lambda answers_and_metrics: sum((ans['metrics'].get('open_site', 0) for ans in answers_and_metrics))
        ),
        request_make_call_count=ne.custom(
            lambda answers_and_metrics: sum((ans['metrics'].get('make_call', 0) for ans in answers_and_metrics))
        ),
        fact_geoproduct_answers_count=ne.custom(
            lambda answers_and_clicks: sum((_.get('base') == 'yabs' for _ in answers_and_clicks))
        ),
    )

    # Оставляем только нужные ответы, считаем потенциал
    rubric_requests_with_target_answers_and_metrics = requests_metrics.project(
        ne.all(
            exclude=['answers_and_metrics']
        ),
        date=ne.custom(lambda timestamp: str(datetime.date.fromtimestamp(timestamp))),
        all_answers_and_metrics=ne.custom(
            partial(
                get_all_answers,
                rubric2permalinks=rubric2permalinks, permalink2info=permalink2info
            ),
            'll', 'spn',  # 'bounds',
            'request_rubrics',
            'date',
            'answers_and_metrics'
        ),
        intensity='cpu',
        memory_limit=1000
    ).filter(
        qf.nonzero('all_answers_and_metrics')
    )

    rubric_requests_with_target_answers_and_metrics.project(
        'date',
        'normal_query', 'request_rubrics', 'bus_query_type',
        'll', 'bounds', 'spn',
        'reg', 'request_region', 'result_region',
        'reqid', 'serpid',
        'answers_count',
        'fact_geoproduct_answers_count',
        'request_good_use_count', 'request_good_use_map_count',  'request_good_use_serp_count', 'request_deep_use_count',
        'request_make_route_count', 'request_open_site_count', 'request_make_call_count',
        qe.unfold('answer_and_metrics', 'all_answers_and_metrics'),  # Разворачиваем ответы
        user_id='yandexuid',
    ).project(
        ne.all(exclude=['answer_and_metrics']),
        qe.dictitem('is_potential_answer', from_='answer_and_metrics', default=False),
        qe.dictitem('is_fact_answer', from_='answer_and_metrics', default=False),
        # company info
        qe.dictitem('permalink', from_='answer_and_metrics').rename('answer_permalink'),
        qe.dictitem('cluster_permalink', from_='answer_and_metrics'),
        qe.dictitem('lat', from_='answer_and_metrics').rename('answer_lat'),
        qe.dictitem('lon', from_='answer_and_metrics').rename('answer_lon'),
        qe.dictitem('backa_rubrics', from_='answer_and_metrics').hide(),
        qe.custom('answer_rubrics', lambda backa_rubrics: ','.join(sorted(backa_rubrics))),
        qe.dictitem('backa_main_rubric', from_='answer_and_metrics').rename('answer_main_rubric'),
        # answer info
        qe.dictitem('position', from_='answer_and_metrics'),
        qe.dictitem('base', from_='answer_and_metrics'),
        qe.dictitem('metrics', from_='answer_and_metrics').hide(),
        qe.dictitem('good_use', from_='metrics'),
        qe.dictitem('good_use_map', from_='metrics'),
        qe.dictitem('good_use_serp', from_='metrics'),
        qe.dictitem('deep_use', from_='metrics'),
        qe.dictitem('make_route', from_='metrics'),
        qe.dictitem('open_site', from_='metrics'),
        qe.dictitem('make_call', from_='metrics'),
    ).project(
        ne.all(),
        permalink=ne.custom(
            lambda cluster_permalink, answer_permalink: cluster_permalink or answer_permalink
        ).allow_null_dependency(),  # Исправляем смену пермалинка
        duplicate_permalink='answer_permalink',
        bounds_distance=ne.custom(get_bounds_distance, 'll', 'spn'),
        bounds_distance_round_log2=ne.custom(
            lambda bounds_distance: math.pow(2, round(math.log(bounds_distance, 2))) if bounds_distance > 0 else None
        ),  # Определяем уровень зума запроса
    ).filter(
        # фильтр чтобы не учитывать поиски на огромном зуме
        qf.compare('bounds_distance_round_log2', '<', 600 * 1000)
    ).project(
        ne.all(),
        qe.unfold_with_total(
            'answers_bounds_distance', 'bounds_distance_round_log2'
        )  # Раскрываем зумы, что посчитать метрики по каждому уровню
    ).groupby(
        'date', 'permalink', 'answers_bounds_distance',
    ).aggregate(
        # метрики спроса
        potential_show_search_requests=na.count_distinct('reqid', in_memory=True),
        potential_show_search_tasks=na.count_distinct('serpid', in_memory=True),
        potential_show_users=na.count_distinct('user_id', in_memory=True),
        potential_good_use_search_requests=na.count_distinct('reqid', predicate=qf.compare('request_good_use_count', '>', 0), in_memory=True),
        potential_deep_use_search_requests=na.count_distinct('reqid', predicate=qf.compare('request_deep_use_count', '>', 0), in_memory=True),
        potential_good_use_search_tasks=na.count_distinct('serpid', predicate=qf.compare('request_good_use_count', '>', 0), in_memory=True),
        potential_deep_use_search_tasks=na.count_distinct('serpid', predicate=qf.compare('request_deep_use_count', '>', 0), in_memory=True),
        potential_good_use_users=na.count_distinct('user_id', predicate=qf.compare('request_good_use_count', '>', 0), in_memory=True),
        potential_deep_use_users=na.count_distinct('user_id', predicate=qf.compare('request_deep_use_count', '>', 0), in_memory=True),
        fact_answers_good_use_sum=na.sum('request_good_use_count'),
        fact_answers_good_use_map_sum=na.sum('request_good_use_map_count'),
        fact_answers_good_use_serp_sum=na.sum('request_good_use_serp_count'),
        fact_answers_deep_use_sum=na.sum('request_deep_use_count'),
        fact_answers_make_route_sum=na.sum('request_make_route_count'),
        fact_answers_open_site_sum=na.sum('request_open_site_count'),
        fact_answers_make_call_sum=na.sum('request_make_call_count'),

        # метрики фактических показов
        show_search_requests=na.count_distinct('reqid', in_memory=True, predicate=qf.nonzero('is_fact_answer')),
        show_search_tasks=na.count_distinct('serpid', in_memory=True, predicate=qf.nonzero('is_fact_answer')),
        show_users=na.count_distinct('user_id', in_memory=True, predicate=qf.nonzero('is_fact_answer')),
        # метрики предложения
        mean_answers_count=na.mean('answers_count'),
        mean_fact_geoproduct_answers_count=na.mean('fact_geoproduct_answers_count'),
        # median_fact_geoproduct_answers_count=na.median('fact_geoproduct_answers_count'),
        # кликовые метрики по всем реализым ответам

        # метрики целевых ответов
        mean_position=na.mean('position', predicate=qf.nonzero('is_fact_answer')),
        # median_position=na.median('position', predicate=qf.nonzero('is_fact_answer')),

        good_use=na.sum('good_use'),
        good_use_map=na.sum('good_use_map'),
        good_use_serp=na.sum('good_use_serp'),
        deep_use=na.sum('deep_use'),
        make_route=na.sum('make_route'),
        make_call=na.sum('make_call'),
        open_site=na.sum('open_site'),

        good_use_search_tasks=na.count_distinct('serpid', predicate=qf.nonzero('good_use'), in_memory=True),
        good_use_map_search_tasks=na.count_distinct('serpid', predicate=qf.nonzero('good_use_map'), in_memory=True),
        good_use_serp_search_tasks=na.count_distinct('serpid', predicate=qf.nonzero('good_use_serp'), in_memory=True),
        deep_use_search_tasks=na.count_distinct('serpid', predicate=qf.nonzero('deep_use'), in_memory=True),
        make_route_search_tasks=na.count_distinct('serpid', predicate=qf.nonzero('make_route'), in_memory=True),
        make_call_search_tasks=na.count_distinct('serpid', predicate=qf.nonzero('make_call'), in_memory=True),
        open_site_search_tasks=na.count_distinct('serpid', predicate=qf.nonzero('open_site'), in_memory=True),

        good_use_users=na.count_distinct('user_id', predicate=qf.nonzero('good_use'), in_memory=True),
        good_use_map_users=na.count_distinct('user_id', predicate=qf.nonzero('good_use_map'), in_memory=True),
        good_use_serp_users=na.count_distinct('user_id', predicate=qf.nonzero('good_use_serp'), in_memory=True),
        deep_use_users=na.count_distinct('user_id', predicate=qf.nonzero('deep_use'), in_memory=True),
        make_route_users=na.count_distinct('user_id', predicate=qf.nonzero('make_route'), in_memory=True),
        make_call_users=na.count_distinct('user_id', predicate=qf.nonzero('make_call'), in_memory=True),
        open_site_users=na.count_distinct('user_id', predicate=qf.nonzero('open_site'), in_memory=True),

        duplicate_permalink_count=na.count_distinct('duplicate_permalink', in_memory=True),
        fact_answers_duplicate_permalink_count=na.count_distinct('duplicate_permalink', in_memory=True, predicate=qf.nonzero('is_fact_answer')),
    ).debug_output(
        debug_output
    ).put(
        '$task_root/metrics/periods/$start_date:$end_date/$service'
    )


def run_job(cluster, service, rubric2permalinks, permalink2info):
    debug_input = []
    debug_output = []

    job = cluster.job()

    create_stream(job, service, rubric2permalinks, permalink2info, debug_input, debug_output)

    job.run()


def get_compute_periods(start_date, end_date, delta):
    start_date_ = datetime.datetime.strptime(start_date, '%Y-%m-%d').date()
    end_date_ = datetime.datetime.strptime(end_date, '%Y-%m-%d').date()
    while start_date_ <= end_date_:
        next_ = start_date_ + datetime.timedelta(days=min((end_date_ - start_date_).days, delta - 1))
        yield (str(start_date_), str(next_))
        start_date_ = next_ + datetime.timedelta(days=1)



def main(start_date, end_date, service, history_from_backup, num_days_in_batch):
    cluster = clusters.Hahn().env(templates=TEMPLATES)
    if not history_from_backup:
        get_company_info_history(cluster, start_date, end_date, history_from_backup)

    failed_dates = []
    for start_date_, end_date_ in get_compute_periods(start_date, end_date, num_days_in_batch):
        print 'start', start_date_, end_date_, service
        try:
            cluster = cluster.env(
                templates=dict(
                    service=service,
                    start_date=start_date_,
                    end_date=end_date_,
                    dates='{$start_date..$end_date}',
                    **TEMPLATES
                ),
                yt_spec_defaults={
                    'scheduling_tag_filter': 'yamaps',
                    'pool_trees': ["yamaps"],
                },
                files=[files.LocalFile(f) for f in listdir('.') if isfile(join('.', f)) and f.endswith('.py')],
                parallel_operations_limit=10,
            )

            rubric2permalinks, permalink2info = get_company_info_history(cluster, start_date_, end_date_, True)
            run_job(cluster, service, rubric2permalinks, permalink2info)

            print 'end', start_date_, end_date_, service
        except Exception as e:
            failed_dates.append((start_date_, end_date_, service))
            print 'failed_dates', failed_dates
            print e
            print 'sleep 10 mins'
            time.sleep(60 * 10)
        print 'failed_dates', failed_dates


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', required=True, help='Первая дата за которую нужно посчитать метрики')
    parser.add_argument('--end_date', required=True)
    parser.add_argument('--service', required=True)
    parser.add_argument('--history_from_backup', action='store_true')
    parser.add_argument('--num_days_in_batch', default=14, type=int)
    args = parser.parse_args()
    print args
    main(args.start_date, args.end_date, args.service, args.history_from_backup, args.num_days_in_batch)
