#!/usr/bin/python -tt
# -*- coding: utf-8 -*-

import json
import os
try:
    from cStringIO import StringIO
except ImportError:
    from io import StringIO

import luigi
import requests
import tvmauth

from crypta.lib.python import retryable_http_client
import crypta.lib.python.audience.client as audience
from crypta.lib.python.yt import yt_helpers

from crypta.profile.utils.api import get_api
from crypta.profile.utils.config import config
from crypta.profile.utils.config.secrets import get_secrets
from crypta.profile.utils.luigi_utils import (
    BaseYtTask,
    YtDailyRewritableTarget,
    YtTarget,
    OldNodesByNameCleaner,
    ExternalInputDate,
)

MAX_IDS_IN_SEGMENT = 40000000

yql_query_template = """
INSERT INTO `{output_table}` WITH TRUNCATE
SELECT
    {id_type},
    segment_id
FROM (
    SELECT
        {id_type},
        segment_id
    FROM (
        SELECT
            {id_type},
            Yson::ConvertToUint64List(audience_segments) AS audience_segments
        FROM `{profiles_table}`
        WHERE audience_segments is not Null
    )
    FLATTEN BY audience_segments AS segment_id
)
{condition}
ORDER BY segment_id
"""


class PrepareAudienceSegments(BaseYtTask):
    date = luigi.Parameter()
    data_source = luigi.Parameter()  # yandexuid or crypta_id
    priority = 100
    task_group = 'export_profiles'

    def requires(self):
        input_by_data_source = {
            'yandexuid': ExternalInputDate(config.YANDEXUID_EXPORT_PROFILES_14_DAYS_TABLE, self.date),
            'crypta_id': ExternalInputDate(config.CRYPTAID_EXPORT_PROFILES_14_DAYS_TABLE, self.date),
        }
        return input_by_data_source[self.data_source]

    def output(self):
        return YtDailyRewritableTarget(
            os.path.join(
                config.PROFILES_INTERNAL_EXPORT_YT_DIRECTORY,
                'audience',
                'audience_segments_{}_flat_export'.format(self.data_source),
            ),
            self.date,
        )

    def get_crypta_id_segments(self):
        api = get_api()
        crypta_id_segment_ids = set()

        for segment in api.lab.getAllSegments().result():
            for export in segment.exports.exports:
                if export.keywordId == 557 and export.exportTypeId == 'crypta_id':
                    crypta_id_segment_ids.add(export.segmentId)

        return tuple(crypta_id_segment_ids)

    def run(self):
        with self.yt.Transaction() as transaction:
            if self.data_source == 'yandexuid':
                condition = 'WHERE segment_id not in {crypta_id_segments}'
            else:
                condition = 'WHERE segment_id in {crypta_id_segments}'

            self.yql.query(
                query_string=yql_query_template.format(
                    id_type=self.data_source,
                    condition=condition.format(crypta_id_segments=self.get_crypta_id_segments()),
                    profiles_table=self.input().table,
                    output_table=self.output().table,
                ),
                transaction=transaction,
            )
            self.yt.set_attribute(self.output().table, 'generate_date', self.date)


class UploadToApiReducer(object):
    def __init__(self, id_column_name, tvm_src_id, tvm_dst_id, audience_segment_info):
        self.audience_client = None
        self.id_column_name = id_column_name
        self.tvm_src_id = tvm_src_id
        self.tvm_dst_id = tvm_dst_id
        self.audience_segment_info = audience_segment_info

    def start(self):
        tvm_client = tvmauth.TvmClient(tvmauth.TvmApiClientSettings(
            self_tvm_id=self.tvm_src_id,
            self_secret=os.environ.get(yt_helpers.get_yt_secure_vault_env_var_for('AUDIENCE_TVM_SECRET')),
            dsts=[self.tvm_dst_id],
        ))

        self.audience_client = audience.PrivateApiAudienceClient(
            tvm_client=tvm_client,
            tvm_dst_id=self.tvm_dst_id,
        )

    def __call__(self, key, records):
        ids_string = StringIO()
        ids_number = 0
        for record in records:
            ids_string.write(str(record[self.id_column_name]))
            ids_string.write('\n')

            ids_number += 1
            if ids_number >= MAX_IDS_IN_SEGMENT:
                break

        result_record = {
            'segment_id': key['segment_id'],
            'segment_info': json.dumps(self.audience_segment_info.get(key['segment_id'], {})),
        }

        data = ids_string.getvalue()
        try:
            segment_info = self.audience_segment_info.get(key['segment_id'])

            if not segment_info:
                status = 'warn'
                log = 'no such segment'
            elif segment_info['status'] in ('processed', 'few_data'):
                try:
                    result = self.audience_client.modify_segment_with_data(
                        data=data,
                        audience_segment_id=key['segment_id'],
                        modification_type='replace',
                        ulogin=config.CRYPTA_AUDIENCE_LOGIN,
                        check_size=False,
                    )
                    status = 'ok'
                    log = json.dumps(result, ensure_ascii=False)
                except retryable_http_client.RetryableHttpClientError as exc:
                    json_text = json.loads(exc.text)
                    if not (json_text.get("code") == requests.codes.bad_request and json_text.get("message") == u"Данные сегмента не изменились"):
                        raise
                    else:
                        status = 'ok'
                        log = json_text.get("message")
            else:
                log = json.dumps(segment_info, ensure_ascii=False)
                status = 'skipped'
        except Exception as exception:
            status = 'error'
            log = str(exception)

        result_record.update({
            'log': log,
            'status': status,
        })
        yield result_record


class UploadAudienceSegmentsToApi(BaseYtTask):
    date = luigi.Parameter()
    data_source = luigi.Parameter()  # yandexuid or crypta_id
    priority = 100
    task_group = 'export_profiles'

    def __init__(self, *args, **kwargs):
        super(UploadAudienceSegmentsToApi, self).__init__(*args, **kwargs)
        self.tvm_src_ids = {
            'yandexuid': config.UNICORN_AUDIENCE_TVM_ID,
            'crypta_id': config.CRYPTAIDOR_AUDIENCE_TVM_ID,
        }

        self.tvm_secrets = {
            'yandexuid': get_secrets().get_secret('UNICORN_AUDIENCE_TVM_SECRET'),
            'crypta_id': get_secrets().get_secret('CRYPTAIDOR_AUDIENCE_TVM_SECRET'),
        }

    def requires(self):
        dir_to_clean = {
            'yandexuid': config.AUDIENCE_API_LOGS_DIRECTORY,
            'crypta_id': config.CRYPTAID_AUDIENCE_API_LOGS_DIRECTORY,
        }

        return {
            'audience_segments': PrepareAudienceSegments(date=self.date, data_source=self.data_source),
            'cleaner': OldNodesByNameCleaner(
                self.date,
                folder=dir_to_clean[self.data_source],
                lifetime=config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP,
            ),
        }

    def output(self):
        outputs = {
            'yandexuid': YtTarget(os.path.join(config.AUDIENCE_API_LOGS_DIRECTORY, self.date)),
            'crypta_id': YtTarget(os.path.join(config.CRYPTAID_AUDIENCE_API_LOGS_DIRECTORY, self.date)),
        }

        return outputs[self.data_source]

    def get_audience_segment_info(self):
        tvm_client = tvmauth.TvmClient(tvmauth.TvmApiClientSettings(
            self_tvm_id=self.tvm_src_ids[self.data_source],
            self_secret=self.tvm_secrets[self.data_source],
            dsts=[audience.AUDIENCE_API_TVM_ID],
        ))

        audience_client = audience.PrivateApiAudienceClient(
            tvm_client=tvm_client,
            tvm_dst_id=audience.AUDIENCE_API_TVM_ID,
        )

        return {
            audience_segment['id']: audience_segment
            for audience_segment in audience_client.list_segments(ulogin=config.CRYPTA_AUDIENCE_LOGIN)
        }

    def run(self):
        with self.yt.Transaction():
            self.yt.create_empty_table(
                self.output().table,
                schema={
                    'segment_id': 'uint64',
                    'status': 'string',
                    'log': 'string',
                    'segment_info': 'string',
                },
            )

            self.yt.run_reduce(
                UploadToApiReducer(
                    tvm_src_id=self.tvm_src_ids[self.data_source],
                    tvm_dst_id=audience.AUDIENCE_API_TVM_ID,
                    id_column_name=self.data_source,
                    audience_segment_info=self.get_audience_segment_info(),
                ),
                source_table=self.input()['audience_segments'].table,
                destination_table=self.output().table,
                reduce_by='segment_id',
                spec={
                    'max_speculative_job_count_per_task': 0,
                    'resource_limits': {'user_slots': 10},
                    'reducer': {
                        'memory_limit': 6 * 1024 * 1024 * 1024,
                        'memory_reserve_factor': 1,
                    },
                    'secure_vault': dict(AUDIENCE_TVM_SECRET=self.tvm_secrets[self.data_source]),
                },
            )

            self.yt.run_merge(
                self.output().table,
                self.output().table,
                mode='unordered',
                spec={'combine_chunks': True},
            )
