# -*- coding: utf-8 -*-
import os
import re
from datetime import datetime, timedelta
from optparse import OptionParser
from pytz import timezone

from cachetools.func import lru_cache
import yt.wrapper as yt

UTM_LOG_PATH = "//home/rasp/logs/rasp-utm-log"
REDIR_LOG_PATH = "//home/avia/logs/avia-redir-balance-by-day-log"
VALID_SOURCES_STARTSWITH = (
    'adwords',
    'google',
    'ohm',
    'serp',
    'travelaudience',
    'ya',
)
SERVER_TIMEZONE = timezone('Europe/Moscow')
REGEXP_UID = re.compile('^[0-9a-z]{1,32}$')

SETTING_KEY = 'UTM_LAST_IMPORT_TIME'
SETTING_DEFAULTS = {
    'value': '',
}
EMAIL = u'avia-process@yandex-team.ru'
MSK_NOW_STR = SERVER_TIMEZONE.localize(
    datetime.now()
).strftime(
    '%Y-%m-%d %H:%M:%S'
)


def is_valid_uid(yandexuid):
    return bool(REGEXP_UID.match(yandexuid))


def main():
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
    import django
    django.setup()

    import logging

    from django.conf import settings
    from django.db.utils import IntegrityError

    from travel.avia.stat_admin.lib.logs import create_current_file_run_log
    from travel.avia.stat_admin.lib.mail import SmailikClient
    from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper
    from travel.avia.stat_admin.data.models import OhmUtm, OhmIncoming, OhmRedirect, Setting

    configure_wrapper(yt)

    log = logging.getLogger(__name__)
    create_current_file_run_log()

    @lru_cache(maxsize=None)
    def get_utm(utm_source, utm_campaign, utm_medium, utm_term):
        utm_db, _created = OhmUtm.objects.get_or_create(
            source=utm_source,
            campaign=utm_campaign,
            medium=utm_medium,
            term=utm_term,
        )

        return utm_db

    def send_notify():
        subject = u'Данные для когортного отчета залиты'
        content = u'Данные для когортного отчета залиты: {now}'.format(now=MSK_NOW_STR)

        c = SmailikClient(
            service=u'Flights.Yandex',
            send_from=u'Яндекс.Авиабилеты &lt;avia-partner@support.yandex.ru&gt;',
            reply_to=u'Яндекс.Авиабилеты &lt;avia-info@yandex-team.ru&gt;',
            smailik_dir='/var/spool/smailik-preparer/avia-stats/'
        )

        c.send_email(
            EMAIL,
            subject,
            content,
        )

    yesterday_str = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

    optparser = OptionParser()
    optparser.add_option("-d", "--days", dest="days", type="int", help="number of days", default=3)
    optparser.add_option("-r", "--right_date", dest="right_date", default=yesterday_str)
    optparser.add_option('--skip_utms_import', action='store_true')
    optparser.add_option('--check_last_run', action='store_true')

    options, args = optparser.parse_args()

    moscow_timezone = timezone('Europe/Moscow')

    setting, _created = Setting.objects.get_or_create(key=SETTING_KEY, defaults=SETTING_DEFAULTS)
    all_tables_found = True

    if options.check_last_run and setting.value:
        # Если сегодня был успешный импорт, то выходим
        last_run_date = datetime.strptime(setting.value, '%Y-%m-%d %H:%M:%S').date()
        if last_run_date >= datetime.now().date():
            log.info('Last run on %s, skip running', setting.value)
            return

    for x in range(options.days):
        right_date = datetime.strptime(options.right_date, '%Y-%m-%d')
        proccess_day = moscow_timezone.localize(right_date - timedelta(days=x))
        utm_log = "{utm_log_path}/{log_day}".format(
            utm_log_path=UTM_LOG_PATH, log_day=proccess_day.strftime("%Y-%m-%d")
        )

        redir_log = "{redir_log_path}/{log_day}".format(
            redir_log_path=REDIR_LOG_PATH, log_day=proccess_day.strftime("%Y-%m-%d")
        )

        # Для корректной работы нужны оба лога
        if not yt.exists(redir_log):
            log.info('Skip %s, table not found', redir_log)
            all_tables_found = False
            continue

        if not options.skip_utms_import:
            if not yt.exists(utm_log):
                log.info('Skip %s, table not found', utm_log)
                all_tables_found = False
                continue

            log.info('Process %s', utm_log)
            OhmIncoming.objects.filter(
                eventdate=proccess_day,
                version=1,
            ).delete()

            ohm_incomings = []
            for record in yt.read_table(utm_log, format=yt.JsonFormat(), raw=False):
                utm_source = record.get('utm_source')
                iso_eventtime = SERVER_TIMEZONE.localize(
                    datetime.strptime(record.get('datetime'), '%Y-%m-%d %H:%M:%S')
                )

                if not utm_source.startswith(VALID_SOURCES_STARTSWITH):
                    continue

                try:
                    utm = get_utm(
                        utm_source,
                        record.get('utm_campaign'),
                        record.get('utm_medium'),
                        record.get('utm_term'),
                    )
                except IntegrityError:
                    log.error('BAD UTM\'s in row: %r', record)
                    continue

                yandexuid = record.get('yandexuid', '')

                if is_valid_uid(yandexuid):
                    ohm_incoming = OhmIncoming(
                        eventdate=iso_eventtime.date(),
                        eventdatetime=iso_eventtime,
                        utm=utm,
                        yandexuid=yandexuid
                    )
                    ohm_incomings.append(ohm_incoming)
                else:
                    log.info('Bad UID: %s', yandexuid)

            log.info('Bulk create %s records' % len(ohm_incomings))
            OhmIncoming.objects.bulk_create(ohm_incomings, batch_size=1000)

        ohm_redirects = []
        log.info('Process %s' % redir_log)
        OhmRedirect.objects.filter(
            eventdate=proccess_day,
            version=1
        ).delete()

        for record in yt.read_table(redir_log, format=yt.JsonFormat(), raw=False):
            if int(record["FILTER"]) != 0:
                continue

            yandexuid = record.get('YANDEXUID')
            iso_eventtime = SERVER_TIMEZONE.localize(
                datetime.strptime(record.get('ISO_EVENTTIME'), '%Y-%m-%d %H:%M:%S')
            )

            if yandexuid and REGEXP_UID.match(yandexuid):
                ohm_redirect = OhmRedirect(
                    eventdate=iso_eventtime.date(),
                    eventdatetime=iso_eventtime,
                    yandexuid=yandexuid
                )
                ohm_redirects.append(ohm_redirect)
            else:
                log.info('Bad UID: %s', yandexuid)

        log.info('Bulk create %s records' % len(ohm_redirects))
        OhmRedirect.objects.bulk_create(ohm_redirects, batch_size=1000)

    if options.check_last_run and all_tables_found:
        setting.value = MSK_NOW_STR
        setting.save()

        if settings.YANDEX_ENVIRONMENT_TYPE == 'production':
            log.info('Send notify to: {email}'.format(email=EMAIL))
            send_notify()


if __name__ == '__main__':
    main()
