# -*- encoding: utf-8 -*-
import csv
import dateutil.parser
import logging
import os
import sys
from collections import defaultdict
from copy import copy
from datetime import datetime
from optparse import Option, OptionParser, OptionValueError

import yt.wrapper as yt


def logs_tables(path, from_date, to_date):
    tables = []
    for table in yt.search(path, node_type="table"):
        table_date = datetime.strptime(table.split('/')[-1], '%Y-%m-%d').date()

        if from_date <= table_date <= to_date:
            tables.append(table)

    return tables


def fast_datetime_convert(pydate):
    """
    20131203010651
    year, month, day, hour, minute, second
    """
    return datetime(
        int(pydate[0:4]), int(pydate[4:6]), int(pydate[6:8]),
        int(pydate[8:10]), int(pydate[10:12]), int(pydate[12:14])
    )


def parse_line(text):
    return dict([var.split('=') for var in text.strip().split('@@')])


def check_number(option, opt, value):
    try:
        days=int(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad int: %r" % (opt, value))

    return days


def check_date(option, opt, value):
    try:
        date_time=dateutil.parser.parse(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad date: %r" % (opt, value))

    return date_time.date()


def fix_point(point_key, stations_map):
    if point_key.startswith('s'):
        return stations_map.get(point_key)

    return point_key


def check_options(options):
    if not options.start_date:
        print u'Задайте начальную дату: -s'
        sys.exit(1)

    if not options.end_date:
        print u'Задайте конечную дату: -e'
        sys.exit(1)

    if options.start_date > options.end_date:
        print u'Конечная дата должна быть больше начальной'
        sys.exit(1)

    if not options.mailto:
        print u'Задайте получателя: -m'
        sys.exit(1)


def main():
    def map_partners_queries(record):
        if record.get('yandexuid'):
            yield {
                'partner': record['partner'],
                'direction': '%s_%s' % (record['from_id'], record['to_id']),
                'dates': '%s_%s' % (record['date_forward'], record['date_backward']),
                'yandexuid': record['yandexuid'],
                'unixtime': record['unixtime'],
            }

    def reduce_partners_queries(key, records):
        redirects = []

        for r in records:
            unixtime = int(r['unixtime'])
            redirects.append((unixtime, r))

        redirects = sorted(redirects)

        last_time = None
        last_partner = None
        returnings = defaultdict(int)

        for r in redirects:
            unixtime, record = r
            partner = record['partner']

            if last_time and unixtime - last_time < 1200 and last_partner != partner:
                returnings[last_partner] += 1

            last_time = unixtime
            last_partner = partner

        if returnings:
            for partner in returnings.keys():
                yield {
                    'partner': partner,
                    'returnings': returnings[partner]
                }

    def reduce_aggregate(key, records):
        returnings = 0

        for r in records:
            returnings += int(r['returnings'])

        yield {
            'partner': key['partner'],
            'returnings': returnings
        }

    def build_partners_queries(yt_tables, tmp_table):
        yt.run_map(
            map_partners_queries,
            source_table=yt_tables,
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        yt.run_sort(
            source_table=tmp_table,
            sort_by=['yandexuid', 'direction', 'dates']
        )

        yt.run_reduce(
            reduce_partners_queries,
            tmp_table,
            tmp_table,
            format=yt.DsvFormat(),
            reduce_by=['yandexuid', 'direction', 'dates'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        yt.run_sort(
            source_table=tmp_table,
            sort_by=['partner']
        )

        yt.run_reduce(
            reduce_aggregate,
            tmp_table,
            tmp_table,
            format=yt.DsvFormat(),
            reduce_by=['partner'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

    def send_results(tmp_table, storage_dir, options):
        results = defaultdict(int)

        for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
            partner = record['partner']
            returnings = record['returnings']
            results[partner] = returnings

        file_name = os.path.join(
            storage_dir,
            'returnings_%s.csv' % range_key
        )
        csvfile = file(file_name, 'w')

        csv_writer = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        header = [u'Partner', u'Возвраты'.encode('utf-8')]
        csv_writer.writerow(header)

        for partner in sorted(results.keys()):
            row = [partner, results[partner]]
            csv_writer.writerow(row)

        csvfile.flush()
        csvfile.close()

        mail = EmailMultiAlternatives(
            subject=u'Возвраты партнерам %s' % (range_key.replace('_', ' - ')),
            body=u'Даты: %s\n\nФайлы во вложении:\n\n' % (range_key.replace('_', ' - ')),
            from_email=settings.SERVER_EMAIL,
            to=['%s@yandex-team.ru' % options.mailto],
        )

        with open(csvfile.name, 'r') as f:
            data_tab = f.read().decode('utf-8')

        attachment = MIMEText(data_tab.encode('cp1251'), 'csv', 'cp1251')
        attachment.add_header(
            'Content-Disposition', 'attachment',
            filename=os.path.basename(csvfile.name)
        )

        mail.attach(attachment)

        try:
            mail.send()

        except Exception:
            log.exception("ERROR")
            pass

    # START MAIN
    from email.mime.text import MIMEText

    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
    import django
    django.setup()

    from django.conf import settings
    from django.core.mail.message import EmailMultiAlternatives

    from travel.avia.stat_admin.lib.jobs import get_or_create_job_storage_dir
    from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper

    optparser = OptionParser(option_class=Yoption)

    log = logging.getLogger(__name__)

    optparser.add_option("-s", "--start_date", dest="start_date", type="date", help="date from")
    optparser.add_option("-e", "--end_date", dest="end_date", type="date", help="date to", default=datetime.now().date())
    optparser.add_option("-m", "--mailto", dest="mailto", type="email", help="Send resultts to")
    optparser.add_option("-p", "--partner", dest="partner", help="partner")
    optparser.add_option("-j", "--job", dest="job", type="number", help="job ID")

    options, args = optparser.parse_args()

    check_options(options)

    log.info('Start')

    log.info('Check output dir')
    storage_dir = get_or_create_job_storage_dir(options.job)

    range_key = '%s_%s' % (options.start_date, options.end_date)

    log.info('Dates: %s' % (range_key.replace('_', ' - ')))
    log.info('Email: %s' % options.mailto)

    configure_wrapper(yt)

    log.info('Prepare data')

    range_key = '%s_%s' % (options.start_date, options.end_date)

    yt_tables = logs_tables(
        '//home/rasp/logs/rasp-popular-flights-log',
        options.start_date,
        options.end_date,
    )

    tmp_table = yt.create_temp_table(
        path=settings.TOP_DIRECTIONS_TMP_PATH,
        prefix='rasp_min_price_'
    )

    log.info('Build top')
    build_partners_queries(yt_tables, tmp_table)
    log.info('Save results positions')

    log.info('Send results to email')
    send_results(tmp_table, storage_dir, options)

    log.info('Done')

    yt.remove(tmp_table)


class Yoption(Option):
    TYPES = Option.TYPES + ("date", "number", "email")
    TYPE_CHECKER = copy(Option.TYPE_CHECKER)
    TYPE_CHECKER["date"] = check_date
    TYPE_CHECKER["number"] = check_number


if __name__ == '__main__':

    main()
