# -*- encoding: utf-8 -*-
import csv
import dateutil.parser
import functools
import logging
import operator
import os
import sys
import requests
from collections import defaultdict
from copy import copy
from datetime import datetime
from optparse import Option, OptionParser, OptionValueError

import yt.wrapper as yt


def logs_tables(path, from_date, to_date):
    tables = []
    for table in yt.search(path, node_type="table"):
        table_date = datetime.strptime(table.split('/')[-1], '%Y-%m-%d').date()

        if from_date <= table_date <= to_date:
            tables.append(table)

    return tables


def fast_datetime_convert(pydate):
    """
    20131203010651
    year, month, day, hour, minute, second
    """
    return datetime(
        int(pydate[0:4]), int(pydate[4:6]), int(pydate[6:8]),
        int(pydate[8:10]), int(pydate[10:12]), int(pydate[12:14])
    )


def parse_line(text):
    return dict([var.split('=') for var in text.strip().split('@@')])


def check_number(option, opt, value):
    try:
        days=int(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad int: %r" % (opt, value))

    return days


def check_date(option, opt, value):
    try:
        date_time=dateutil.parser.parse(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad date: %r" % (opt, value))

    return date_time.date()


def check_options(options):
    if not options.start_date:
        print u'Задайте начальную дату: -s'
        sys.exit(1)

    if not options.end_date:
        print u'Задайте конечную дату: -e'
        sys.exit(1)

    if options.start_date > options.end_date:
        print u'Конечная дата должна быть больше начальной'
        sys.exit(1)

    if not options.mailto:
        print u'Задайте получателя: -m'
        sys.exit(1)


def fix_point(point_key, stations_map):
    if point_key.startswith('s'):
        try:
            return stations_map.get(point_key)['settlement_point_key']
        except TypeError:
            pass

    return point_key


def main():
    def map_top_100(national_version, stations_map, record):
        from_id = fix_point(record.get('from_id'), stations_map)
        to_id = fix_point(record.get('to_id'), stations_map)

        service = record.get('service')
        yandexuid = record.get('yandexuid')

        conditions = [
            service == 'ticket',
            record.get('national_version', '').lower() == national_version.lower(),
            yandexuid,
            from_id,
            to_id
        ]

        if all(conditions):
            yield {
                'direction': '%s_%s' % (from_id, to_id)
            }

    def reduce_top_100(key, records):
        count = 0

        for r in records:
            count += 1

        yield {
            'direction': key['direction'],
            'count': count
        }

    def build_top_100(airports):
        tmp_table = yt.create_temp_table()

        yt.run_map(
            functools.partial(map_top_100, options.national_version, airports),
            source_table=logs_tables(
                '//home/rasp/logs/rasp-users-search-log', options.start_date, options.end_date
            ),
            destination_table=tmp_table,
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        yt.run_sort(
            source_table=tmp_table,
            sort_by=['direction']
        )

        yt.run_reduce(
            reduce_top_100,
            tmp_table,
            tmp_table,
            format=yt.DsvFormat(),
            reduce_by='direction',
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        return tmp_table

    def get_top(tmp_table):
        top = {}

        for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
            direction = record['direction']
            count = int(record['count'])
            top[direction] = count

        return top

    def map_partners_queries(national_version, record):
        try:
            qid = record.get('qid')
            q_key = qid.split('.')[3]
            nv = q_key.split('_')[-1]
        except Exception:
            return

        if nv != national_version:
            return

        variants_len = int(record.get('variants_len', 0))
        if 'importer' in record:
            importer = record.get('importer')
            if importer.startswith('dohop'):
                partner = 'dohop'
            else:
                try:
                    partner = importer.split('[')[1].strip(']')
                except IndexError:
                    return
                partner = ','.join(sorted(partner.split(",")))
        else:
            partner = record['partner']

        status = record['status']

        if status == "got_reply":
            if variants_len == 0:
                status = "{status}_empty".format(status=status)
        else:
            status = "other_status"

        yield {
            'partner': partner,
            'status': status,
            'query_time': record['query_time'],
        }

    def reduce_partners_queries(key, records):
        count = 0
        query_time_sum = 0

        for r in records:
            count += 1
            query_time_sum += int(float(r['query_time']))

        yield {
            'partner': key['partner'],
            'status': key['status'],
            'count': count,
        }

        if key['status'] == 'got_reply':
            yield {
                'partner': key['partner'],
                'status': 'avg_query_time_got_reply',
                'count': int(query_time_sum / count),
            }

    def uniq_qids(key, records):
        all_records = []

        for r in records:
            all_records.append(r)

        all_records = sorted(all_records, key=lambda x: int(x['unixtime']))

        yield all_records[-1]

    def map_dohop(record):
        r = record.copy()

        if r.get('importer', '').startswith('dohop'):
            r['importer'] = 'dohop[dohop]'

        yield r

    def build_partners_queries(yt_tables, tmp_table, national_version):
        yt.run_map(
            map_dohop,
            source_table=yt_tables,
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        yt.run_sort(
            source_table=tmp_table,
            destination_table=tmp_table,
            sort_by=['qid', 'importer']
        )

        yt.run_reduce(
            uniq_qids,
            tmp_table,
            tmp_table,
            format=yt.DsvFormat(),
            reduce_by=['qid', 'importer'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
            memory_limit=3 * 1024 * 1024 * 1024,
        )

        yt.run_map(
            functools.partial(map_partners_queries, national_version),
            source_table=tmp_table,
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        yt.run_sort(
            source_table=tmp_table,
            sort_by=['partner', 'status']
        )

        yt.run_reduce(
            reduce_partners_queries,
            tmp_table,
            tmp_table,
            format=yt.DsvFormat(),
            reduce_by=['partner', 'status'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

    def human_direction(direction, settlements):
        from_point_key, to_point_key = direction.split('_')
        from_title = settlements.get(from_point_key)
        to_title = settlements.get(to_point_key)

        return '%s - %s' % (
            from_title['title'].encode('utf-8') if from_title else from_point_key,
            to_title['title'].encode('utf-8') if to_title else to_point_key,
        )

    def send_results(tmp_table, responce_table, options, top_100_list, storage_dir):
        results = defaultdict(lambda: defaultdict(int))
        statuses = set()

        for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
            partner = record['partner']
            status = record['status']
            count = int(record['count'])

            results[partner][status] += count

            statuses.add(status)

        results_file_name = os.path.join(
            storage_dir,
            'partners_%s.csv' % range_key
        )
        csvfile = file(results_file_name, 'w')

        csv_writer = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        header = [u'Partner'] + [s for s in sorted(statuses)]
        csv_writer.writerow(header)

        for partner in sorted(results.keys()):
            if options.partner and options.partner != partner:
                continue

            row = [partner]
            for status in sorted(statuses):
                row.append(results[partner].get(status, 0))
            csv_writer.writerow(row)

        csvfile.flush()
        csvfile.close()

        # Среднее время ответа по дням
        responce_result = {}
        partners = set()

        for record in yt.read_table(responce_table, format=yt.JsonFormat(), raw=False):
            partner = record['partner']
            direction = record['direction']
            iso_eventdate = record['iso_eventdate']
            avg_query_time = record['avg_query_time']

            partners.add(partner)

            if iso_eventdate not in responce_result:
                responce_result[iso_eventdate] = {}

            if partner not in responce_result[iso_eventdate]:
                responce_result[iso_eventdate][partner] = {}

            if direction not in responce_result[iso_eventdate][partner]:
                responce_result[iso_eventdate][partner][direction] = avg_query_time

        responce_results_file_name = os.path.join(storage_dir, 'partners_by_day_%s.csv' % range_key)
        settlements = requests.get('%s/settlement/list/' % settings.RASP_API_HOST, verify=False).json()
        csvfile = file(responce_results_file_name, 'w')
        csv_writer = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        header = [u'Date', u'Partner', u'Direction', u'Average Query Time']
        csv_writer.writerow(header)

        for iso_eventdate in sorted(responce_result.keys()):
            for partner in partners:
                # FIXME: sorted top
                for direction in top_100_list:
                    try:
                        avg_query_time = responce_result[iso_eventdate][partner][direction]
                    except KeyError:
                        continue
                    row = [iso_eventdate, partner, human_direction(direction, settlements), avg_query_time]
                    csv_writer.writerow(row)

        csvfile.flush()
        csvfile.close()

        mail = EmailMultiAlternatives(
            subject=u'Количество запросов к партнерам %s' % (range_key.replace('_', ' - ')),
            body=u'Даты: %s\n\nФайлы во вложении:\n\n' % (range_key.replace('_', ' - ')),
            from_email=settings.SERVER_EMAIL,
            to=['%s@yandex-team.ru' % options.mailto],
        )

        with open(responce_results_file_name, 'r') as f:
            responce_data_tab = f.read().decode('utf-8')

        responce_attachment = MIMEText(responce_data_tab.encode('cp1251'), 'csv', 'cp1251')

        responce_attachment.add_header(
            'Content-Disposition', 'attachment',
            filename=os.path.basename(responce_results_file_name)
        )

        mail.attach(responce_attachment)

        with open(results_file_name, 'r') as f:
            data_tab = f.read().decode('utf-8')

        attachment = MIMEText(data_tab.encode('cp1251'), 'csv', 'cp1251')

        attachment.add_header(
            'Content-Disposition', 'attachment',
            filename=os.path.basename(results_file_name)
        )

        mail.attach(attachment)

        try:
            mail.send()

        except Exception:
            log.exception("ERROR")

    def map_responce_time(top_100_list, airports, record):
        qid = record.get('qid')
        status = record.get('status')

        if qid and status == 'got_reply':
            when_created, service, t_code, query_key, lang = parse_qid(qid)
            from_id, to_id = query_key.split('_')[0:2]

            direction = '%s_%s' % (
                fix_point(from_id, airports),
                fix_point(to_id, airports),
            )

            if service == 'ticket' and direction in top_100_list:
                partner = record.get('partner') or record['importer'].split('[')[1].split(',')[0].strip(']')
                yield {
                    'iso_eventdate': record['iso_eventtime'].split(' ')[0],
                    'partner': partner,
                    'direction': direction,
                    'query_time': record['query_time']
                }

    def reduce_responce_time(key, records):
        query_time_count = 0
        query_time_sum = 0

        for r in records:
            query_time_count += 1
            query_time_sum += float(r['query_time'])

        yield {
            'partner': key['partner'],
            'direction': key['direction'],
            'iso_eventdate': key['iso_eventdate'],
            'avg_query_time': int(float(query_time_sum) / query_time_count)
        }

    def build_responce_time(top_100_list, airports):
        tmp_table = yt.create_temp_table()

        yt.run_map(
            functools.partial(map_responce_time, top_100_list, airports),
            source_table=logs_tables(
                '//home/rasp/logs/rasp-partners-query-log', options.start_date, options.end_date
            ),
            destination_table=tmp_table,
            format=yt.DsvFormat(),
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        yt.run_sort(
            source_table=tmp_table,
            sort_by=['partner', 'direction', 'iso_eventdate']
        )

        yt.run_reduce(
            reduce_responce_time,
            tmp_table,
            tmp_table,
            format=yt.DsvFormat(),
            reduce_by=['partner', 'direction', 'iso_eventdate'],
            spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        )

        return tmp_table

    # START MAIN
    from email.mime.text import MIMEText

    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
    import django
    django.setup()
    from django.conf import settings
    from django.core.mail.message import EmailMultiAlternatives

    from travel.avia.stat_admin.lib.jobs import get_or_create_job_storage_dir
    from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper
    from travel.avia.stat_admin.utils.query import parse_qid

    optparser = OptionParser(option_class=Yoption)

    log = logging.getLogger(__name__)

    optparser.add_option("-s", "--start_date", dest="start_date", type="date", help="date from")
    optparser.add_option("-e", "--end_date", dest="end_date", type="date", help="date to", default=datetime.now().date())
    optparser.add_option("-m", "--mailto", dest="mailto", type="email", help="Send resultts to")
    optparser.add_option("-p", "--partner", dest="partner", help="partner")
    optparser.add_option("-n", "--national_version", dest="national_version", help="national_version", default='ru')
    optparser.add_option("-j", "--job", dest="job", type="number", help="job ID")

    options, args = optparser.parse_args()

    check_options(options)

    log.info('Start')

    range_key = '%s_%s' % (options.start_date, options.end_date)

    log.info('Dates: %s' % (range_key.replace('_', ' - ')))
    log.info('Email: %s' % options.mailto)

    configure_wrapper(yt)

    log.info('Check output dir')
    storage_dir = get_or_create_job_storage_dir(options.job)

    log.info('Prepare data')

    range_key = '%s_%s' % (options.start_date, options.end_date)

    yt_tables = logs_tables(
        '//home/rasp/logs/rasp-partners-query-log',
        options.start_date,
        options.end_date,
    )

    tmp_table = yt.create_temp_table(
        path=settings.TOP_DIRECTIONS_TMP_PATH,
        prefix='rasp_min_price_'
    )

    airports = requests.get('%s/station/airports/' % settings.RASP_API_HOST, verify=False).json()

    log.info('Build top')
    build_partners_queries(yt_tables, tmp_table, options.national_version)
    log.info('Save results positions')

    top_table = build_top_100(airports)
    top_100_list = [d[0] for d in sorted(get_top(top_table).items(), key=operator.itemgetter(1), reverse=True)][:100]
    responce_table = build_responce_time(top_100_list, airports)

    log.info('Send results to email')
    send_results(tmp_table, responce_table, options, top_100_list, storage_dir)

    log.info('Done')

    yt.remove(tmp_table)


class Yoption(Option):
    TYPES = Option.TYPES + ("date", "number", "email")
    TYPE_CHECKER = copy(Option.TYPE_CHECKER)
    TYPE_CHECKER["date"] = check_date
    TYPE_CHECKER["number"] = check_number


if __name__ == '__main__':

    main()
