# -*- coding: utf-8 -*-
import csv
import logging
from multiprocessing.dummy import Pool
from os.path import join
from io import StringIO
import pandas as pd
import requests
import traceback as tb
from yt import wrapper as yt
from yql.api.v1.client import YqlClient
from yql.client.list import YqlListRunningRequest
from yql.client.operation import YqlOperationResultsRequest, YqlOperationShareIdRequest
from yql.client.parameter_value_builder import YqlParameterValueBuilder as ValB

from const import CONFIG, SECRET
from robot.errs import BigDataError, WorkerError, YqlError
from sources import make_report_mini_sources
from utils import create_s3_connection, normalize_login

logger = logging.getLogger(__name__)


def calculate_mini_sources(task):
    pool = Pool(3)
    statuses, workers = [], []
    for status, worker in pool.imap_unordered(lambda report_name:
                                              report_worker(task, report_name, task.mini_src[report_name]),
                                              task.reports.keys()):
        statuses.append(status)
        workers.append(worker)

    if any([status != 'OK' for status in statuses]):
        raise WorkerError('some of report workers is not OK')


def report_worker(task, report_name, report_mini_src):
    status, worker = 'OK', None
    try:
        logins = [login for login in task.params['logins'] if login not in report_mini_src.keys()]
        if not logins:
            logger.debug('%s: all mini sources for %s-report ready', task.issue.key, report_name)
            return status, worker

        worker = YQLWorker(report_name=report_name, logins=logins, parent=task)
        worker.configure_query()
        if not task.debug and not task.was_in_progress:
            task.issue.comments.create(text=u'Выполняется %%{0}%%: {1}\nПромежуточные данные будут тут: '
                                            u'((https://yt.yandex-team.ru/hahn/navigation?path={2} {2}))'
                                       .format(report_name, worker.get_public_url(), worker.path))
        worker.get_results()
        worker.make_mini_sources()

    except YqlError as exc:
        status = 'FAIL'
        logger.error('%s: %s-report\n%s', task.issue.key, report_name, exc.message)
        task.stop_progress('need_info',
                           comment={'text': u'Завершился ошибкой %%%%%s%%%%: %s.\n<{Подробнее:\n'
                                            u'%%%%\n%s\n%%%%\n}>' % (report_name, exc.pub_url, exc.errs),
                                    'summonees': ['vbatraev']})
    except BigDataError as exc:
        status = 'FAIL'
        logger.error('%s: %s-report\n%s', task.issue.key, report_name, exc.message)

        if not task.debug:
            issue = task.get_issue()
            issue.update(tags=issue.tags + ['too_much_data'])
        task.stop_progress('need_info', comment={
            'text': u'{issue} (%%{report_name}%%): слишком много данных, для сборки отчётов не хватило выделенного '
                    u'времени.\n\nПожалуйста, упростите задание:\n1. Сократите количество заказанных дней\n'
                    u'2. Создайте отдельную задачу на каждый логин, если их несколько\n3. Используйте опцию '
                    u'**Дополнительно ограничить кампаниями**'.format(issue=task.issue.key, report_name=report_name),
            'summonees': [task.issue.createdBy.login, 'vbatraev']
        })
    except:
        status = 'FAIL'
        exc_head = '%s: %s-report' % (task.issue.key, report_name)
        logger.exception(exc_head)
        task.stop_progress('need_info', comment={'text': u'Что-то сломалось...<{Подробнее:\n%%%%\n%s\n%s\n%%%%\n}>'
                                                         % (exc_head, tb.format_exc()),
                                                 'summonees': ['vbatraev']})

    logger.info('%s: %s-report worker %s', task.issue.key, report_name, status)
    return status, worker


class YQLWorker(object):
    def __init__(self, report_name=None, logins=None, parent=None):
        super(YQLWorker, self).__init__()
        self.report_name = report_name
        self.logins = logins
        self.parent = parent

        self.arc_name = parent.reports[report_name]['arc_name']
        self.rev = parent.reports[report_name]['rev']
        self.syntax_version = parent.reports[report_name]['syntax_version']

        self.query = self._get_query()
        self.query_params = ValB.build_json_map({
            '$DATE_BEGIN': ValB.make_string(parent.reports[report_name]['first_date']),
            '$DATE_FINISH': ValB.make_string(parent.reports[report_name]['last_date']),
            '$LOGINS': ValB.make_list([ValB.make_string(login) for login in logins]),
            '$CAMPAIGN_IDS_WL': ValB.make_list([ValB.make_uint64(cid) for cid in parent.params['cids']]),
            # '$TASK_AUTHOR_LOGIN': ValB.make_string(parent.issue.createdBy.login),
            '$TASK_AUTHOR_LOGIN': ValB.make_string(SECRET['PASSPORT']['login']),
            '$YT_BASE_PATH': ValB.make_string(CONFIG['yt_tables_path']),
            '$YT_TASK_FOLDER': ValB.make_string(parent.issue.key)
        })
        self.path = '%s/%s' % (CONFIG['yt_tables_path'], self.parent.issue.key)
        self.client = YqlClient(token=SECRET['token'])

        self.yt_client = yt.YtClient(token=SECRET['token'], proxy='hahn')

        self.request, self.result = None, None
        self.res_tables = {}

    def _get_query(self):
        for i in range(4):
            query = requests.get(url=CONFIG['queries_path'].format(report_name=self.arc_name, rev=self.rev),
                                 headers={'Authorization': 'OAuth %s' % SECRET['token']},
                                 verify=False
                                 ).content.decode('utf-8')
            if query not in [None, '']:
                return query
        raise Exception('can\'t get revision')

    def configure_query(self):
        # request = None
        # if self.parent.was_in_progress:
        request = self._get_request()

        if not request:
            request = self.client.query(self.query,
                                        title=u'%s_YQL' % self.parent.issue.key, syntax_version=self.syntax_version)
            request.run(parameters=self.query_params)
        else:
            request.run()

        self.request = request
        logger.info('%s: %s-report run %s (rev=%s)', self.parent.issue.key, self.report_name, self.get_public_url(),
                    self.rev)

    def _get_request(self):
        requests_ = YqlListRunningRequest()
        requests_.run()

        for req in requests_.json.get('result', []):
            if self.parent.issue.key in req.get('title'):
                logger.debug('%s: running %s-report request found', self.parent.issue.key, self.report_name)
                return YqlOperationResultsRequest(req.get('id'))

        logger.debug('%s: running %s-report request not found', self.parent.issue.key, self.report_name)

    def get_results(self):
        self.result = self.request.get_results()
        if not self.result.is_success:
            raise YqlError(
                'result isn\'t success!', errs=self.result.errors, pub_url=self.get_public_url())

        data_path = '{table_path}/{report_name}.data'.format(table_path=self.path, report_name=self.report_name)
        if self.yt_client.row_count(data_path) > CONFIG['data_limit']:
            raise BigDataError('data rows limit %i exceeded!' % CONFIG['data_limit'])

        for table_ext in ['info', 'data', 'errors']:
            table_name = '%s.%s' % (self.report_name, table_ext)
            table_path = '%s/%s' % (self.path, table_name)

            if table_ext == 'data':
                table = self.yt_client.read_table(
                    table_path,
                    raw=True,
                    format=yt.SchemafulDsvFormat(columns=CONFIG['REPORTS'][self.report_name]['columns'],
                                                 attributes={'missing_value_mode': 'print_sentinel',
                                                             'missing_value_sentinel': 'null'}
                                                 )
                )
                df = pd.read_table(StringIO('\n'.join(table).decode('utf-8')),
                                   names=CONFIG['REPORTS'][self.report_name]['columns'],
                                   quoting=3, encoding='utf-8', low_memory=True,
                                   na_values=('Null', 'null', 'NULL'), na_filter=True, engine='c')

                df['Login'] = df['Login'].apply(lambda cell: normalize_login(cell))

            else:
                table = self.yt_client.read_table(table_path, format=yt.format.YsonFormat())
                df = pd.DataFrame(list(table))

            self.res_tables[table_name] = df

    def get_public_url(self):
        share_req = YqlOperationShareIdRequest(self.request.operation_id)
        share_req.run()
        return 'https://yql.yandex-team.ru/Operations/{}'.format(share_req.json)

    def make_mini_sources(self):
        # Создание tar.gz на каждый отчет-логин с заливкой на MDS
        logger.info('%s: make mini-sources for %s-report', self.parent.issue.key, self.report_name)

        conn = create_s3_connection()
        bucket = conn.get_bucket(CONFIG['S3_API']['mini_bucket'])
        for login, obj in make_report_mini_sources(self.parent, self, bucket).iteritems():
            self.parent.mini_src[self.report_name][login] = obj
            logger.debug('%s: %s created', self.parent.issue.key, obj.key)
        conn.close()

    def make_files(self, path=None):
        if not path:
            path = self.parent.path

        files = []
        for table_ext in ['info', 'errors']:
            table_name = '%s.%s' % (self.report_name, table_ext)
            table_path = join(path, table_name)
            self.res_tables[table_name].to_csv(table_path, sep='\t', index=False, encoding='utf-8',
                                               quoting=csv.QUOTE_NONE)
            files.append(table_path)

        url_path = join(path, '%s_public_url' % self.report_name)
        with open(url_path, 'w') as fd:
            fd.write(self.get_public_url())
        files.append(url_path)

        return files
