# -*- encoding: utf-8 -*-
import csv
import dateutil.parser
import functools
import logging
import os
import requests
import sys
from collections import defaultdict
from copy import copy
from datetime import datetime
from optparse import Option, OptionParser, OptionValueError

from django.conf import settings

import yt.wrapper as yt


def logs_tables(path, from_date, to_date):
    tables = []
    for table in yt.search(path, node_type="table"):
        table_date = datetime.strptime(table.split('/')[-1], '%Y-%m-%d').date()

        if from_date <= table_date <= to_date:
            tables.append(table)

    return tables


def fast_datetime_convert(pydate):
    """
    20131203010651
    year, month, day, hour, minute, second
    """
    return datetime(
        int(pydate[0:4]), int(pydate[4:6]), int(pydate[6:8]),
        int(pydate[8:10]), int(pydate[10:12]), int(pydate[12:14])
    )


def parse_line(text):
    return dict([var.split('=') for var in text.strip().split('@@')])


def check_number(option, opt, value):
    try:
        days=int(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad int: %r" % (opt, value))

    return days


def check_date(option, opt, value):
    try:
        date_time=dateutil.parser.parse(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad date: %r" % (opt, value))

    return date_time.date()


def check_national(option, opt, value):
    national_version = value.lower()
    if national_version not in settings.AVIA_NATIONAL_VERSIONS:
        raise OptionValueError(
            "option %s: bad national_version: %r" % (opt, value))

    return value.lower()


def fix_point(point_key, stations_map):
    if point_key and point_key.startswith('s'):
        return stations_map.get(point_key)

    return point_key


def check_options(options):
    if not options.start_date:
        print u'Задайте начальную дату: -s'
        sys.exit(1)

    if not options.end_date:
        print u'Задайте конечную дату: -e'
        sys.exit(1)

    if options.start_date > options.end_date:
        print u'Конечная дата должна быть больше начальной'
        sys.exit(1)

    if not options.mailto:
        print u'Задайте получателя: -m'
        sys.exit(1)


def main():
    def map_top_100(national_version, stations_map, record):
        from_id = fix_point(record.get('from_id'), stations_map)
        to_id = fix_point(record.get('to_id'), stations_map)

        service = record.get('service')
        yandexuid = record.get('yandexuid')

        conditions = [
            service == 'ticket',
            record.get('national_version', '').lower() == national_version.lower(),
            yandexuid,
            from_id,
            to_id
        ]

        if all(conditions):
            yield {
                'direction': '%s_%s' % (from_id, to_id)
            }

    def reduce_top_100(key, records):
        count = 0

        for r in records:
            count += 1

        yield {
            'direction': key['direction'],
            'count': count
        }

    def human_direction(direction):
        from_point_key, to_point_key = direction.split('_')
        from_title = settlements_map.get(from_point_key, from_point_key)
        to_title = settlements_map.get(to_point_key, to_point_key)

        return '%s - %s' % (
            from_title.encode('utf-8'),
            to_title.encode('utf-8'),
        )

    def build_top_directions():
        yt.run_map_reduce(
            mapper=functools.partial(map_top_100, options.national_version, stations_map),
            reducer=reduce_top_100,
            source_table=yt_tables,
            destination_table=tmp_table,
            reduce_by='direction',
        )

    def get_sorted_top_directions():
        top_directions = []

        for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
            count = int(record['count'])
            direction = record['direction']

            top_directions.append((count, direction))

        return sorted(top_directions, reverse=True)[:options.quantity]

    def append_result(result_key, direction, partner_code, position, results):
        if result_key not in results:
            results[result_key] = {}

        if direction not in results[result_key]:
            results[result_key][direction] = defaultdict(float)

        results[result_key][direction][partner_code] = position

    def save_top_directions(top_directions, shows_count_results, file_name):
        all_partners = set()

        for k, partners in shows_count_results.items():
            for p in partners.keys():
                all_partners.add(p)

        all_partners = sorted(all_partners)

        csvfile = file(file_name, 'w')
        csv_writer = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        row = [u'Direction', u'Searches'] + [p for p in all_partners]
        csv_writer.writerow(row)

        for count, direction in top_directions:
            row = [human_direction(direction), count]
            for partner in all_partners:
                row.append(dict(shows_count_results[direction]).get(partner, 0))

            csv_writer.writerow(row)

        return csvfile.name

    def save_results(results, file_name, date_format):
        csvfile = file(file_name, 'w')
        csv_writer = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        row = [u'Date', u'Direction'] + [partner_codes[k].encode('utf-8') for k in partner_codes]
        csv_writer.writerow(row)

        for event_key in sorted(results.keys()):
            for direction in top_directions:
                pos = []
                for partner_code in partner_codes:
                    try:
                        avg_positions = float(results[event_key][direction][partner_code])
                    except KeyError:
                        # print event_key, direction, partner_code, 'HER!'
                        avg_positions = 0

                    pos_str = '%.1f' % (avg_positions) if avg_positions else '-'
                    pos.append(pos_str.replace('.', ','))

                row = [event_key.strftime(date_format), human_direction(direction)] + pos
                csv_writer.writerow(row)

        return csvfile.name

    def get_partner_codes(partner_code):
        partner_codes = {}
        partners_url = '%s/partner/codes/' % settings.RASP_API_HOST
        vendors_url = '%s/dohop_vendors/codes/' % settings.RASP_API_HOST

        r = requests.get(
            partners_url,
            verify=False
        )

        for partner in r.json():
            billing_datasource_id_production = partner['billing_datasource_id_production']
            code = partner['code']
            if partner_code and partner_code != code:
                continue

            if billing_datasource_id_production:
                partner_codes[code] = partner['title']

        r = requests.get(
            vendors_url,
            verify=False
        )

        for partner in r.json():
            code = partner['code']

            if partner_code and partner_code != code:
                continue

            if partner['enabled']:
                partner_codes[code] = partner['title']

        return partner_codes

    def show_map(top_directions, national_version, record):
        direction = '%s_%s' % (
            record['city_from_key'],
            record['city_to_key'],
        )

        if direction in top_directions and national_version == record.get('national'):
            yield {
                'direction': direction,
                'partner': record['partner'],
                'position': record['position'],
                'day_key': record['iso_eventtime'][:10],
                'month_key': record['iso_eventtime'][:7],
            }

    def show_day_reduce(event_key, key, records):
        positions = []
        for r in records:
            try:
                position = int(r.get('position'))

            except ValueError:
                continue

            positions.append(position)

        yield {
            'direction': key['direction'],
            'partner': key['partner'],
            'avg_position': '%.1f' % (float(sum(positions)) / len(positions)) if positions else 0,
            event_key: key[event_key]
        }

    def parse_yt_show_logs(show_log_tables, top_directions):
        if not show_log_tables:
            return {}, {}

        map_tmp_table = yt.create_temp_table()

        day_tmp_table = yt.create_temp_table()

        month_tmp_table = yt.create_temp_table()

        yt.run_map(
            functools.partial(show_map, top_directions, options.national_version),
            source_table=show_log_tables,
            destination_table=map_tmp_table,
        )

        yt.run_sort(
            source_table=map_tmp_table,
            sort_by=['direction', 'partner', 'day_key'],
        )

        yt.run_reduce(
            functools.partial(show_day_reduce, 'day_key'),
            source_table=map_tmp_table,
            destination_table=day_tmp_table,
            reduce_by=['direction', 'partner', 'day_key'],
        )

        yt.run_map_reduce(
            source_table=map_tmp_table,
            destination_table=month_tmp_table,
            reduce_by=['direction', 'partner', 'month_key'],
            reducer=functools.partial(show_day_reduce, 'month_key'),
            mapper=None,
        )

        day_results = {}
        month_results = {}

        for record in yt.read_table(day_tmp_table, format=yt.JsonFormat(), raw=False):
            key = datetime.strptime(record['day_key'], '%Y-%m-%d').date()
            direction = record['direction']
            partner = record['partner']
            avg_position = float(record['avg_position'])

            append_result(key, direction, partner, avg_position, day_results)

        for record in yt.read_table(month_tmp_table, format=yt.JsonFormat(), raw=False):
            key = datetime.strptime(record['month_key'], '%Y-%m').date()
            direction = record['direction']
            partner = record['partner']
            avg_position = float(record['avg_position'])

            append_result(key, direction, partner, avg_position, month_results)

        return day_results, month_results

    def get_stations_map():
        stations = requests.get(
            '%s/station/airports/' % settings.RASP_API_HOST,
            verify=False
        ).json()

        return {k: v['settlement_point_key'] for k, v in stations.items()}

    def get_settlements_map():
        setlements = requests.get(
            '%s/settlement/list/' % settings.RASP_API_HOST,
            verify=False
        ).json()

        return {k: v['title'] for k, v in setlements.items()}

    def show_count_redurce(key, record):
        count = 0

        for r in record:
            count += 1

        yield {
            'direction': key['direction'],
            'partner': key['partner'],
            'count': count
        }

    def build_shows_count(top_directions):
        shows_count_results = defaultdict(lambda : defaultdict(int))
        show_tmp_table = yt.create_temp_table()

        if show_log_tables:
            yt.run_map_reduce(
                mapper=functools.partial(show_map, top_directions, options.national_version),
                reduceer=show_count_redurce,
                source_table=show_log_tables,
                destination_table=show_tmp_table,
                reduce_by=['direction', 'partner'],
            )

        for record in yt.read_table(show_tmp_table, format=yt.JsonFormat(), raw=False):
            count = int(record['count'])
            partner = record['partner']
            direction = record['direction']

            shows_count_results[direction][partner] = count

        return shows_count_results

    # START MAIN
    from email.mime.text import MIMEText

    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
    import django
    django.setup()

    from django.conf import settings
    from django.core.mail.message import EmailMultiAlternatives

    from travel.avia.stat_admin.lib.jobs import get_or_create_job_storage_dir
    from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper

    optparser = OptionParser(option_class=Yoption)

    log = logging.getLogger(__name__)

    optparser.add_option("-s", "--start_date", dest="start_date", type="date", help="date from")
    optparser.add_option("-e", "--end_date", dest="end_date", type="date", help="date to", default=datetime.now().date())
    optparser.add_option("-n", "--national_version", dest="national_version", type="national", help="national version", default='ru')
    optparser.add_option("-q", "--quantity", dest="quantity", type="number", help="TOP quantity", default=100)
    optparser.add_option("-m", "--mailto", dest="mailto", type="email", help="Send resultts to")
    optparser.add_option("-p", "--partner", dest="partner", help="partner")
    optparser.add_option("-j", "--job", dest="job", type="number", help="job ID")

    options, args = optparser.parse_args()

    check_options(options)

    log.info('Start')

    range_key = '%s_%s' % (options.start_date, options.end_date)

    log.info('Dates: %s' % (range_key.replace('_', ' - ')))
    log.info('Email: %s' % (options.mailto))

    configure_wrapper(yt)

    log.info('Check output dir')
    storage_dir = get_or_create_job_storage_dir(options.job)

    log.info('Prepare data')

    partner_codes = get_partner_codes(options.partner)
    stations_map = get_stations_map()
    settlements_map = get_settlements_map()
    range_key = '%s_%s' % (options.start_date, options.end_date)

    yt_tables = logs_tables(
        '//home/rasp/logs/rasp-users-search-log',
        options.start_date,
        options.end_date,
    )

    jobs_count = len(yt_tables) * 20

    if jobs_count < 100:
        jobs_count = 100

    show_log_tables = logs_tables(
        '//home/rasp/logs/rasp-tickets-show-log',
        options.start_date,
        options.end_date,
    )

    tmp_table = yt.create_temp_table(
        path=settings.TOP_DIRECTIONS_TMP_PATH,
        prefix='rasp_min_price_'
    )

    try:
        log.info('Build top')
        if yt_tables:
            build_top_directions()
            sorted_top_directions = get_sorted_top_directions()
        else:
            sorted_top_directions = []

        top_directions = [d[1] for d in sorted_top_directions]

        shows_count_results = build_shows_count(top_directions)

        log.info('Calculate positions')
        day_results, month_results = parse_yt_show_logs(
            show_log_tables, top_directions
        )

        log.info('Save results positions to %s' % storage_dir)

        day_results_filename = save_results(day_results, os.path.join(storage_dir, 'results_day_%s.csv' % range_key), '%m.%d.%Y')
        month_results_filename = save_results(month_results, os.path.join(storage_dir, 'results_month_%s.csv' % range_key), '%B %Y')
        top_results_filename = save_top_directions(sorted_top_directions, shows_count_results, os.path.join(storage_dir, 'top_directions_%s.csv' % range_key))

        log.info('Send results to email')

        mail = EmailMultiAlternatives(
            subject=u'Топ-%s + места в выдаче %s, %s' % (options.quantity, range_key.replace('_', ' - '), options.national_version),
            body=u'Даты: %s\n\nФайлы во вложении:\n\n' % (range_key.replace('_', ' - ')),
            from_email=settings.SERVER_EMAIL,
            to=['%s@yandex-team.ru' % options.mailto],
        )

        for file_name in [day_results_filename, month_results_filename, top_results_filename]:
            with open(file_name, 'r') as f:
                data_tab = f.read().decode('utf-8')

            attachment = MIMEText(data_tab.encode('cp1251'), 'csv', 'cp1251')
            attachment.add_header(
                'Content-Disposition', 'attachment',
                filename=os.path.basename(file_name)
            )

            mail.attach(attachment)

        try:
            mail.send()

        except Exception:
            log.exception("ERROR")

        log.info('Done')

    except Exception:
        log.exception('Error')
        sys.exit(1)


class Yoption(Option):
    TYPES = Option.TYPES + ("date", "number", "national", "email")
    TYPE_CHECKER = copy(Option.TYPE_CHECKER)
    TYPE_CHECKER["date"] = check_date
    TYPE_CHECKER["number"] = check_number
    TYPE_CHECKER["national"] = check_national


if __name__ == '__main__':

    main()
