# -*- coding: utf-8 -*-
import os
os.environ['MPFS_PACKAGE'] = 'disk'

import sys
# НЕ УДАЛЯТЬ.
# __main__.__file__ - путь до запускаемого файла
import __main__
import yt.wrapper as yt
from datetime import datetime, timedelta
from optparse import OptionParser, Option
from mpfs.core.mrstat.stat_utils import StatPublisher

import mpfs.core.mrstat as mr

from mpfs.config import settings

# Полный путь до папки mrstat
MRSTAT_DIR = os.path.dirname(mr.__file__)

MRSTAT_ENABLED = settings.mrstat['enabled']
MRSTAT_PUBLISH_USER = settings.mrstat['publish_user']
MRSTAT_PUBLISH_HOST = settings.mrstat['publish_host']
MRSTAT_YT_TOKEN = settings.mrstat['yt_token']
MRSTAT_YT_PROXY = settings.mrstat['yt_proxy']
yt.config['proxy']['url'] = settings.mrstat['yt_proxy']
yt.config['token'] = MRSTAT_YT_TOKEN


class JobError(Exception):
    """Ошибки задачи"""


class OperationError(JobError):
    """Не отработала конкретная операция"""


class ArgumentError(JobError):
    """Невалидный аргумент"""


class TableNotExistsError(JobError):
    """Не существует нужной таблицы"""


class Job(object):
    operations = ()
    """Описание MR ступений. Переопределяется в дочерних классах"""
    yt_server = MRSTAT_YT_PROXY
    binary_env = {
        'YT_PRINT_STDERRS': 0,
        'YT_DETACHED': 0,
        'YT_SMART_FORMAT': 1,
        'YT_TOKEN': MRSTAT_YT_TOKEN,
    }
    """Переменные окружения, прокидываемые mapreduce-yt"""

    def __init__(self, dates=None, scale='daily'):
        if dates is None:
            self.dates = [datetime.now().date() - timedelta(days=1)]
        elif isinstance(dates, list):
            self.dates = []
            for date in dates:
                self.dates.append(datetime.strptime(date, "%Y-%m-%d").date())
        elif isinstance(dates, str):
            self.dates = [datetime.strptime(dates, "%Y-%m-%d").date()]
        else:
            raise ArgumentError()

        if scale not in ('hourly', 'daily', 'weekly', 'monthly'):
            raise ArgumentError()
        self.scale = scale

    @property
    def report_vars(self):
        return {
            'first_date': self.dates[0],
            'prev_date': self.dates[0] - timedelta(days=1),
            'scale': self.scale,
            'server': self.yt_server,
            'mrstat_dir': MRSTAT_DIR,
            'report_name': self.report_name,
            'binary_env': " ".join(["%s=%s" % (k, str(v)) for k, v in self.binary_env.iteritems()]),
            'yt_home_prefix': '//home/mpfs-stat'
        }

    @property
    def report_name(self):
        """
        Имя отчета == название папки отчета
        """
        if hasattr(self, '_report_name'):
            return self._report_name
        # __main__.__file__ - путь до запускаемого файла
        report_dir = os.path.basename(os.path.dirname(os.path.abspath(__main__.__file__)))
        self._report_name = report_dir
        return self._report_name

    def run(self, publish_only=False):
        if not MRSTAT_ENABLED:
            print 'MRstat calculation is disabled. Check "settings->mrstat->enabled"'
            return

        for operation in self.operations:
            if operation['type'] in ('map', 'reduce'):
                if not publish_only:
                    self.process_mr_operation(operation)
            elif operation['type'] == 'publish':
                self.process_publish_operation(operation)
            elif operation['type'] == 'exist':
                if not publish_only:
                    self.process_exist_operation(operation)
            elif operation['type'] == 'merge':
                if not publish_only:
                    self.process_merge_operation(operation)
            elif operation['type'] == 'cmd':
                self.process_cmd(operation)
            else:
                raise ArgumentError()

    def process_cmd(self, operation):
        func = operation['func']
        args = operation.get('args', [])
        kwargs = operation.get('kwargs', {})
        report_vars = self.report_vars
        for i, value in enumerate(args):
            if isinstance(value, basestring):
                args[i] = value.format(**report_vars)
        for key, value in kwargs.iteritems():
            if isinstance(value, basestring):
                kwargs[key] = value.format(**report_vars)

        print "Exec cmd: func: %s, args: %s, kwargs: %s." % (func, args, kwargs)
        func(*args, **kwargs)

    def process_merge_operation(self, operation):
        src = " ".join(["-src %s" % i for i in self.build_pathes(operation['src'])])
        dst = "-dst %s" % self.build_pathes([operation['dst']])[0]
        cmd = "{binary_env} "\
            "/usr/bin/mapreduce-yt -server {server} "\
            "-merge -mode unordered {src} {dst} -subkey"\
            "".format(src=src,
                      dst=dst,
                      **self.report_vars)
        print cmd
        os.system(cmd)

    def process_exist_operation(self, operation):
        for table_template in operation['src']:
            table = self.build_pathes([table_template])[0]
            print "Check table: %s" % (table,)
            cmd = "{binary_env} "\
                "/usr/bin/mapreduce-yt -server {server} "\
                "-exists {table}"\
                "".format(table=table,
                          **self.report_vars)
            result = os.popen(cmd).read()
            if result == 'true\n':
                continue
            if result == 'false\n':
                raise TableNotExistsError()
            else:
                raise OperationError()

    def process_publish_operation(self, operation):
        config_path = self.build_pathes([operation['config_path']])[0]
        yt_path = self.build_pathes([operation['yt_path']])[0]

        report_data = []
        for row in yt.read_table(yt_path, format='json'):
            report_doc = {}
            for parts in row.values():
                report_doc.update(dict(kv.split('=', 1) for kv in parts.split('\t')))
            report_data.append(report_doc)
        print "Publish to: https://stat.yandex-team.ru/%s?scale=%s" % (operation['report_path'], self.scale[0])
        StatPublisher.create_and_upload(
            operation['report_path'],
            operation['report_title'],
            open(config_path).read(),
            report_data,
            scale=self.scale
        )

    def process_mr_operation(self, operation):
        src = " ".join(["-src %s" % i for i in self.build_pathes(operation['src'])])
        dst = " ".join(["-dst %s" % i for i in self.build_pathes(operation['dst'])])
        files = ''
        if 'files' in operation:
            files = self.build_pathes(operation['files'])
            files = " ".join(["-file %s" % i for i in files])
        yt_files = ''
        if 'yt_files' in operation:
            yt_files = self.build_pathes(operation['yt_files'])
            yt_files = " ".join(["-ytfile %s" % i for i in yt_files])

        as_is_options = ''
        for option in {'jobcount', 'memlimit', 'outputformat'}:
            if option in operation:
                as_is_options += ' -%s %s' % (option, operation[option])
        as_is_options = as_is_options.strip()
        cmd = "{binary_env} "\
            "/usr/bin/mapreduce-yt -server {server} "\
            "-subkey -{type} '{cmd}' {src} {dst} {files} {yt_files} {as_is_options} "\
            "-ytspec '{{\"{spec_type}\":{{\"environment\":{{\"MRPROC_SCALE\":\"{scale}\"}}}}}}' "\
            "".format(src=src,
                      dst=dst,
                      files=files,
                      yt_files=yt_files,
                      cmd=operation['cmd'],
                      spec_type={'map': 'mapper', 'reduce': 'reducer'}[operation['type']],
                      type=operation['type'],
                      as_is_options=as_is_options,
                      **self.report_vars)
        print cmd
        exit_code = os.system(cmd)
        if exit_code != 0:
            raise OperationError("Failed cmd: %s" % cmd)

    def build_pathes(self, templates):
        """
        Формирование путей для отчета
        """
        seen = set()
        result = list()
        for template in templates:
            for date in self.dates:
                path = template.format(date=date, **self.report_vars)
                if path not in seen:
                    result.append(path)
                    seen.add(path)
        return result


def parse_args():
    """
    CLI job-ов
    """
    usage = "usage: %prog -h"

    option_list = (
        Option(
            '-c', '--scale',
            dest='scale',
            default='d',
            choices=['h', 'd', 'w', 'm'],
            help=u'Детализация отчета: "h"(часы), "d"(день), "w"(неделя), "m"(месяц)'
        ),
        Option(
            '--po',
            action='store_true',
            dest='publish_only',
            default=False,
            help=u'Запустить только операции публикации'
        ),
        Option(
            '-s', '--start_date',
            dest='start_date',
            help=u'Начальная дата',
            default=str(datetime.now().date() - timedelta(days=1)),
        ),
        Option(
            '-e', '--end_date',
            dest='end_date',
            help=u'Конечная дата',
        ),
        Option(
            '-d', '--delta',
            type='int',
            dest='delta',
            help=u'Отклонение от start_date',
        ),
    )

    parser = OptionParser(usage, option_list=option_list)
    options, args = parser.parse_args(sys.argv)

    start_dt = datetime.strptime(options.start_date, "%Y-%m-%d")
    dates = set([options.start_date])
    if options.end_date:
        end_dt = datetime.strptime(options.end_date, "%Y-%m-%d")
        delta = end_dt - start_dt
        for i in range(delta.days + 1):
            dt = start_dt + timedelta(days=i)
            dates.add(str(dt.date()))
    elif options.delta:
        for i in range(0, options.delta, options.delta / abs(options.delta)):
            dt = start_dt + timedelta(days=i)
            dates.add(str(dt.date()))

    dates = sorted(list(dates))
    scale = {'h': 'hourly', 'd': 'daily', 'w': 'weekly', 'm': 'monthly'}[options.scale]
    return (dates, scale, options.publish_only)
