# -*- encoding: utf-8 -*-
import csv
import dateutil.parser
import functools
import logging
import os
import sys

from copy import copy
from datetime import datetime
from optparse import Option, OptionParser, OptionValueError

import yt.wrapper as yt

PERCENTIL = [50, 75, 80, 85, 90, 95, 98, 99]


def logs_tables(path, from_date, to_date):
    tables = []
    for table in yt.search(path, node_type="table"):
        table_date = datetime.strptime(table.split('/')[-1], '%Y-%m-%d').date()

        if from_date <= table_date <= to_date:
            tables.append(table)

    return tables


def check_number(option, opt, value):
    try:
        days=int(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad int: %r" % (opt, value))

    return days


def check_date(option, opt, value):
    try:
        date_time=dateutil.parser.parse(value)

    except ValueError:
        raise OptionValueError(
            "option %s: bad date: %r" % (opt, value))

    return date_time.date()


def check_options(options):
    if not options.start_date:
        print u'Задайте начальную дату: -s'
        sys.exit(1)

    if not options.end_date:
        print u'Задайте конечную дату: -e'
        sys.exit(1)

    if options.start_date > options.end_date:
        print u'Конечная дата должна быть больше начальной'
        sys.exit(1)

    if not options.mailto:
        print u'Задайте получателя: -m'
        sys.exit(1)


def percentile(value_list, p):
    sorted_value_list = sorted(value_list)
    position = len(sorted_value_list) / 100 * p

    return sorted_value_list[position]


def average(value_list):
    return sum(value_list) / len(value_list)


def main():
    def map_queries(national_version, record):
        qid = record.get('qid', '')
        try:
            when_created, service, t_code, query_key, lang = parse_qid(qid)
        except ValueError:
            return

        try:
            qid_national_version = query_key.split('_')[8]
        except (KeyError, IndexError):
            return

        if qid_national_version != national_version:
            return

        status = record.get('status')
        variants_len = record.get('variants_len')

        if status == 'got_reply' and variants_len:
            if 'importer' in record:
                try:
                    partner = record['importer'].split('[')[1].strip(']')
                except IndexError:
                    return
            else:
                partner = record['partner']

            if partner.startswith('dohop'):
                partner = 'dohop'

            for p in partner.split(','):
                yield {
                    'partner': p,
                    'query_time': int(float(record['query_time']))
                }

    def uniq_qids(key, records):
        all_records = []

        for r in records:
            all_records.append(r)

        all_records = sorted(all_records, key=lambda x: int(x['unixtime']))

        yield all_records[-1]

    def reduce_queries(key, records):
        all_query_times = []

        for r in records:
            all_query_times.append(r['query_time'])

        record = {
            'partner': key['partner'],
            'min_time': min(all_query_times),
            'max_time': max(all_query_times),
            'avg_time': average(all_query_times),
            'count': len(all_query_times)
        }

        for p in PERCENTIL:
            record['perc_%s' % p] = percentile(all_query_times, p)

        yield record

    def map_dohop(record):
        r = record.copy()

        if r.get('importer', '').startswith('dohop'):
            r['importer'] = 'dohop[dohop]'

        yield r

    def send_results(tmp_table, options, storage_dir):
        results_file_name = os.path.join(
            storage_dir,
            'partners_%s.csv' % range_key
        )

        csvfile = file(results_file_name, 'w')
        csv_writer = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        header = [u'Partner', 'count', 'min', 'max', 'average'] + ['%s%%' % p for p in PERCENTIL]
        csv_writer.writerow(header)

        for record in yt.read_table(tmp_table, format=yt.JsonFormat(), raw=False):
            partner = record['partner']
            min_time = record['min_time']
            max_time = record['max_time']
            avg_time = record['avg_time']
            count = int(record['count'])

            row = [
                partner,
                count,
                int(float(min_time)),
                int(float(max_time)),
                int(float(avg_time)),
            ]

            for p in PERCENTIL:
                row.append(int(float(record.get('perc_%s' % p))))

            csv_writer.writerow(row)

        csvfile.flush()
        csvfile.close()

        mail = EmailMultiAlternatives(
            subject=u'Время ответов от парнеров %s' % (range_key.replace('_', ' - ')),
            body=u'Даты: %s\n\nФайлы во вложении:\n\n' % (range_key.replace('_', ' - ')),
            from_email=settings.SERVER_EMAIL,
            to=['%s@yandex-team.ru' % options.mailto],
        )

        with open(results_file_name, 'r') as f:
            data_tab = f.read().decode('utf-8')

        attachment = MIMEText(data_tab.encode('cp1251'), 'csv', 'cp1251')

        attachment.add_header(
            'Content-Disposition', 'attachment',
            filename=os.path.basename(results_file_name)
        )

        mail.attach(attachment)

        try:
            mail.send()

        except Exception:
            log.exception("ERROR")

    # START MAIN
    from email.mime.text import MIMEText

    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'travel.avia.stat_admin.local_settings')
    import django
    django.setup()

    from django.conf import settings
    from django.core.mail.message import EmailMultiAlternatives

    from travel.avia.stat_admin.lib.jobs import get_or_create_job_storage_dir
    from travel.avia.stat_admin.lib.yt_helpers import configure_wrapper
    from travel.avia.stat_admin.utils.query import parse_qid

    optparser = OptionParser(option_class=Yoption)

    log = logging.getLogger(__name__)

    optparser.add_option("-s", "--start_date", dest="start_date", type="date", help="date from")
    optparser.add_option("-e", "--end_date", dest="end_date", type="date", help="date to", default=datetime.now().date())
    optparser.add_option("-m", "--mailto", dest="mailto", type="email", help="Send resultts to")
    optparser.add_option("-p", "--partner", dest="partner", help="partner")
    optparser.add_option("-n", "--national_version", dest="national_version", help="national_version", default='ru')
    optparser.add_option("-j", "--job", dest="job", type="number", help="job ID")

    options, args = optparser.parse_args()

    check_options(options)

    log.info('Start')

    range_key = '%s_%s' % (options.start_date, options.end_date)

    log.info('Dates: %s' % (range_key.replace('_', ' - ')))
    log.info('Email: %s' % options.mailto)

    configure_wrapper(yt)

    log.info('Check output dir')
    storage_dir = get_or_create_job_storage_dir(options.job)

    log.info('Prepare data')

    range_key = '%s_%s' % (options.start_date, options.end_date)

    source_tables = logs_tables(
        '//home/rasp/logs/rasp-partners-query-log',
        options.start_date,
        options.end_date,
    )
    tmp_table = yt.create_temp_table()

    # Dohop логирует каждую пачку в отдельности
    yt.run_map(
        map_dohop,
        source_table=source_tables,
        destination_table=tmp_table,
        format=yt.DsvFormat(),
        spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
    )

    yt.run_sort(
        source_table=tmp_table,
        destination_table=tmp_table,
        sort_by=['qid', 'importer']
    )

    yt.run_reduce(
        uniq_qids,
        tmp_table,
        tmp_table,
        format=yt.JsonFormat(),
        reduce_by=['qid', 'importer'],
        spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
        memory_limit=3 * 1024 * 1024 * 1024,
    )

    yt.run_map(
        functools.partial(map_queries, options.national_version),
        source_table=tmp_table,
        destination_table=tmp_table,
        format=yt.JsonFormat(),
        spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB},
    )

    yt.run_sort(
        source_table=tmp_table,
        sort_by=['partner']
    )

    yt.run_reduce(
        reduce_queries,
        tmp_table,
        tmp_table,
        format=yt.JsonFormat(),
        reduce_by='partner',
        spec={"data_size_per_job": settings.YT_DATA_SIZE_PER_JOB / 3},
        memory_limit=3 * 1024 * 1024 * 1024,
    )

    send_results(tmp_table, options, storage_dir)

    log.info('Done')

    yt.remove(tmp_table)


class Yoption(Option):
    TYPES = Option.TYPES + ("date", "number", "email")
    TYPE_CHECKER = copy(Option.TYPE_CHECKER)
    TYPE_CHECKER["date"] = check_date
    TYPE_CHECKER["number"] = check_number


if __name__ == '__main__':

    main()
