# -*- encoding: utf-8 -*-
import csv
import dateutil.parser
import logging
import os
from decimal import Decimal

from collections import defaultdict
from copy import copy
from datetime import datetime, timedelta
from optparse import Option, OptionParser, OptionValueError
from pytz import timezone

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
import django
django.setup()

from travel.avia.stat_admin.lib.logs import create_current_file_run_log

from travel.avia.stat_admin.data.models import OhmUtm, OhmIncoming, OhmRedirect


logger = logging.getLogger(__name__)


def check_date(option, opt, value):
    try:
        date_time=dateutil.parser.parse(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad date: %r" % (opt, value))

    return date_time.date()


def make_data_linked(eventdate, cohorts, version=2):
    redirects = OhmRedirect.objects.filter(
        version=version,
        eventdate__gte=eventdate,
        eventdate__lt=eventdate + timedelta(days=(cohorts + 1) * 7),
        incoming__utm__source__startswith='ohm',
        incoming__eventdate=eventdate,
    ).select_related(
        'incoming'
    )

    utms = {}

    for r in redirects:
        logging.debug('Redirect: %r', r)
        utm_id = int(r.incoming.utm_id)
        cohort_num = (r.eventdatetime - r.incoming.eventdatetime).days / 7

        if 0 <= cohort_num < cohorts:
            if utm_id not in utms:
                empty_cohorts = {c: 0 for c in range(cohorts)}
                utms[utm_id] = empty_cohorts

            utms[utm_id][cohort_num] += 1

    return utms


def make_data_linked_and_prices(eventdate, cohorts, version=2):
    redirects = OhmRedirect.objects.filter(
        version=version,
        eventdate__gte=eventdate,
        eventdate__lt=eventdate + timedelta(days=(cohorts + 1) * 7),
        incoming__utm__source__startswith='ohm',
        incoming__eventdate=eventdate,
    ).select_related(
        'incoming'
    )

    utms = {}

    for r in redirects:
        utm_id = int(r.incoming.utm_id)
        cohort_num = (r.eventdatetime - r.incoming.eventdatetime).days / 7

        if 0 <= cohort_num < cohorts:
            if utm_id not in utms:
                empty_cohorts = {
                    c: {'count': 0, 'price': Decimal(0)} for c in range(cohorts)
                }
                utms[utm_id] = empty_cohorts

            utms[utm_id][cohort_num]['count'] += 1
            utms[utm_id][cohort_num]['price'] += r.click_price

    return utms


def read_data(left_date, right_date, cohorts, version=1):
    yandexuids = OhmRedirect.objects.filter(
        version=version,
        eventdate__gte=left_date,
        eventdate__lt=left_date + timedelta(days=(cohorts + 1) * 7),
    ).values_list('yandexuid')

    incomings = OhmIncoming.objects.filter(
        version=version,
        eventdate__gte=left_date,
        eventdate__lte=right_date,
        yandexuid__in=yandexuids,
        utm__source__startswith='ohm',
    )

    # Запомним заходы
    incoming_data = defaultdict(list)
    for i in incomings:
        incoming_data[i.yandexuid].append((i.eventdatetime, i.utm_id))

    # Отсортируем заходы
    for key, value in incoming_data.items():
        incoming_data[key] = sorted(value)

    # Возьмем все редиректы которые пришли по рекламе
    redirects = OhmRedirect.objects.filter(
        version=version,
        eventdate__gte=left_date,
        eventdate__lt=left_date + timedelta(days=cohorts * 7),
        yandexuid__in=incoming_data.keys(),
    )

    return incoming_data, redirects


def read_data_with_attr(left_date, right_date, cohorts, version=1):
    yandexuids = OhmRedirect.objects.filter(
        version=version,
        eventdate__gte=left_date,
        eventdate__lt=left_date + timedelta(days=(cohorts + 1) * 7),
    ).values_list('yandexuid')

    incomings = OhmIncoming.objects.filter(
        version=version,
        eventdate__gte=left_date,
        eventdate__lte=right_date,
        yandexuid__in=yandexuids,
    )

    # Запомним заходы
    incoming_data = defaultdict(list)
    for i in incomings:
        incoming_data[i.yandexuid].append((i.eventdatetime, i.utm_id))

    # Отсортируем заходы
    for key, value in incoming_data.items():
        incoming_data[key] = sorted(value)

    # Возьмем все редиректы которые пришли по рекламе
    redirects = OhmRedirect.objects.filter(
        version=version,
        eventdate__gte=left_date,
        eventdate__lt=left_date + timedelta(days=cohorts * 7),
        yandexuid__in=incoming_data.keys(),
    )

    return incoming_data, redirects


def make_csv_data(utms, cohorts):
    cached_utms = {int(u.id): u for u in OhmUtm.objects.filter(id__in=utms.keys())}

    human_readable_results = {}
    for utm_id, cohorts_data in utms.items():
        cohorts_str = [cohorts_data[k] for k in range(cohorts)]
        u = cached_utms[utm_id]
        if u.source.startswith('ohm'):
            human_readable_results[u.tuple_key()] = cohorts_str

    rows = []
    header = [u'source', u'campaign', u'medium', u'term', u'redirects'] + [str(c) for c in range(cohorts)]
    for utm, result in sorted(human_readable_results.items()):
        row = list(utm) + [sum(result)] + list(result)
        rows.append(row)

    return header, rows


def make_csv_data_with_prices(utms, cohorts):
    cached_utms = {int(u.id): u for u in OhmUtm.objects.filter(id__in=utms.keys())}

    human_readable_results = {}
    for utm_id, cohorts_data in utms.items():
        cohorts_str = [cohorts_data[k] for k in range(cohorts)]
        u = cached_utms[utm_id]
        if u.source.startswith('ohm'):
            human_readable_results[u.tuple_key()] = cohorts_str

    rows = []
    header = [u'source', u'campaign', u'medium', u'term', u'redirects', u'price']
    for c in range(cohorts):
        header.extend(['cohort_{}_count'.format(c), 'cohort_{}_price'.format(c)])

    for utm, result in sorted(human_readable_results.items()):
        prices = [c['price'] for c in result]
        redirects = [c['count'] for c in result]
        row = list(utm) + [sum(redirects), float(sum(prices))]
        for i in range(cohorts):
            row.extend([redirects[i], float(prices[i])])
        rows.append(row)

    return header, rows


def make_report_data(incoming_data, redirects, cohorts, need_prices=False):
    # Для каждого редиректа найдет ближайший заход по рекламе,
    # который произошел раньше редиректа
    def init_cohorts():
        if need_prices:
            return {c: 0 for c in range(cohorts)}

        return {
            c: {'count': 0, 'price': Decimal(0.0)} for c in range(cohorts)
        }

    utms = {}
    for redirect in redirects:
        redir_tuple = None
        for eventdatetime, utm_id in incoming_data[redirect.yandexuid]:
            if eventdatetime < redirect.eventdatetime:
                redir_tuple = (utm_id, eventdatetime)
            else:
                break

        if redir_tuple:
            utm_id, eventdatetime = redir_tuple
            cohort_num = (redirect.eventdatetime - eventdatetime).days / 7

            if utm_id not in utms:
                empty_cohorts = {c: 0 for c in range(cohorts)}
                utms[utm_id] = empty_cohorts

            # Посчитаем когорты в диапазоне 0..cohorts
            if 0 <= cohort_num <= cohorts - 1:
                if need_prices:
                    utms[utm_id][cohort_num]['price'] += redirect.click_price
                    utms[utm_id][cohort_num]['count'] += 1
                else:
                    utms[utm_id][cohort_num] += 1

    return utms


# Выпилить после RASPTICKETS-9837
def make_report_data_decoded(incoming_data, redirects, cohorts):
    # Для каждого редиректа найдет ближайший заход по рекламе,
    # который произошел раньше редиректа
    utms = {}
    for redirect in redirects:
        redir = None
        for eventdatetime, utm_id in incoming_data[redirect.yandexuid]:
            if eventdatetime < redirect.eventdatetime:
                redir = redirect
            else:
                break

        if redir:
            if utm_id not in utms:
                utms[utm_id] = []

            utms[utm_id].append(redirect)

    return utms


def save_results(header, rows, full_file_name):
    with file(full_file_name, 'w') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        csv_writer.writerow(header)
        for row in rows:
            csv_writer.writerow(row)


def main():
    optparser = OptionParser(option_class=Yoption)

    log = logging.getLogger(__name__)
    create_current_file_run_log()

    default_cohorts = 8
    moscow_timezone = timezone('Europe/Moscow')

    optparser.add_option(
        "-l", "--left_date", dest="left_date", type="date",
        default=(moscow_timezone.localize(datetime.now()) - timedelta(days=default_cohorts * 7)).date()
    )
    optparser.add_option("-c", "--cohorts", dest="cohorts", type="int", default=default_cohorts)
    optparser.add_option("-d", "--directory", dest="directory")

    options, args = optparser.parse_args()

    log.info('Start')

    if not options.directory:
        log.warning('Please set destination directory first')
        return

    left_date = options.left_date
    right_date = left_date + timedelta(days=6)
    today = moscow_timezone.localize(datetime.now()).date()

    if right_date > today:
        right_date = today

    file_name = '{left_date}_{right_date}.csv'.format(
        left_date=left_date.strftime('%Y-%m-%d'),
        right_date=right_date.strftime('%Y-%m-%d'),
    )
    report_file_name = os.path.join(options.directory, file_name)

    incoming_data, redirects = read_data(left_date, right_date, options.cohorts)
    utms = make_report_data(incoming_data, redirects, options.cohorts)

    header, rows = make_csv_data(utms, options.cohorts)
    save_results(header, rows, report_file_name)

    log.info('Done')


class Yoption(Option):
    TYPES = Option.TYPES + ("date", )
    TYPE_CHECKER = copy(Option.TYPE_CHECKER)
    TYPE_CHECKER["date"] = check_date


if __name__ == '__main__':

    main()
