import datetime
import os

from cached_property import cached_property
import json
from library.python.protobuf.json import proto2json

from crypta.lib.python import (
    crypta_env,
    swagger,
    time_utils,
)
from crypta.lib.python.yt import yt_helpers
from crypta.lib.python.yt.schema_utils.proto import get_schema_from_proto
from crypta.profile.lib.socdem_helpers import socdem_groups
from crypta.siberia.bin.custom_audience.suggester.bin.service.lib.proto import segment_pb2

ORGVISITS_GROUP = "group-8130912a"
ALL_CAMPAIGNS = (
    segment_pb2.TExport.ECampaignType.media,
    segment_pb2.TExport.ECampaignType.performance,
)
MEDIA_ONLY = (
    segment_pb2.TExport.ECampaignType.media,
)
CA_TYPE_TAG_PREFIX = 'ca_type:'
CA_TYPE_TAG_PREFIX_LEN = len(CA_TYPE_TAG_PREFIX)


class UploadDirectExportsTankerKeysToYt(object):
    def __init__(self, config, logger):
        self.config = config
        self.logger = logger
        self.errors = []

    @cached_property
    def yt(self):
        return yt_helpers.get_yt_client(self.config.Yt.Proxy, self.config.Yt.Pool)

    @cached_property
    def replica_yt(self):
        return yt_helpers.get_yt_client(self.config.ReplicaYt.Proxy, self.config.ReplicaYt.Pool)

    @cached_property
    def target_yts(self):
        return self.yt, self.replica_yt

    @cached_property
    def api(self):
        api_token = os.environ.get(crypta_env.EnvNames.crypta_api_oauth)
        assert api_token, f'{crypta_env.EnvNames.crypta_api_oauth} environment variable is not set'

        return swagger.swagger(self.config.ApiUrl, api_token)

    @cached_property
    def segments(self):
        return self.api.lab.getAllSegments().result()

    @cached_property
    def socdem_groups(self):
        return socdem_groups.socdem_groups(self.api.lab.getSegmentGroupsTree().result())

    @cached_property
    def orgvisits(self):
        parents_per_segment = self.api.lab.getParentsPerSegment().result()
        return {segment for segment in parents_per_segment if ORGVISITS_GROUP in parents_per_segment[segment]}

    @cached_property
    def shortterm_interests(self):
        return {
            (int(row["bb_keyword_short"]), int(row["bb_keyword_value_short"]))
            for row in self.yt.read_table(self.config.DirectCryptaGoalsTable)
            if row["crypta_goal_type"] == "interests" and row["scope"].find("common") != -1
        }

    @property
    def _direct_segments_schema(self):
        return get_schema_from_proto(segment_pb2.TSegment)

    @property
    def output_table(self):
        return self.config.DirectExportsTankerNamesTable

    @cached_property
    def errors_table(self):
        errors_table_name = time_utils.get_current_moscow_datetime().strftime("%Y-%m-%dT%H-%M-%S")
        return os.path.join(self.config.SegmentErrorsDir, errors_table_name)

    def run(self):
        self.errors = []
        rows = list(self._yield_direct_segments_info())

        for yt in self.target_yts:
            with yt.Transaction():
                yt_helpers.create_empty_table(
                    yt_client=yt,
                    path=self.output_table,
                    schema=self._direct_segments_schema,
                )
                yt.write_table(
                    self.output_table,
                    rows,
                )

        if self.errors:
            with self.yt.Transaction():
                yt_helpers.create_empty_table(
                    yt_client=self.yt,
                    path=self.errors_table,
                )
                self.yt.write_table(
                    self.errors_table,
                    self.errors
                )
                yt_helpers.set_ttl(
                    yt_client=self.yt,
                    table=self.errors_table,
                    ttl_timedelta=datetime.timedelta(days=self.config.ErrorsTtlDays),
                )

    def _add_error_message(self, segment_id, message):
        self.logger.error(message)
        self.errors.append({
            'segment_id': segment_id,
            'message': message,
        })

    @staticmethod
    def _get_ca_types(export):
        return [tag[CA_TYPE_TAG_PREFIX_LEN:] for tag in export.tags if tag.startswith(CA_TYPE_TAG_PREFIX)]

    def _yield_direct_segments_info(self):
        for segment in self.segments:

            # exclude socdem segments
            if segment.parentId in self.socdem_groups:
                continue

            proto = segment_pb2.TSegment()
            campaigns = set()
            ca_types = set()

            for export in segment.exports.exports:
                if 'direct' in export.tags:
                    export_campaigns = ALL_CAMPAIGNS if (export.keywordId, export.segmentId) in self.shortterm_interests else MEDIA_ONLY
                    campaigns.update(export_campaigns)

                    ca_types.update(self._get_ca_types(export))

                    proto.Exports.append(
                        segment_pb2.TExport(
                            KeywordId=export.keywordId,
                            SegmentId=export.segmentId,
                        )
                    )

            if len(proto.Exports) == 0:
                continue

            unknown_types = ca_types - set(self.config.SegmentTypeTankerKeys.keys())
            if unknown_types:
                self._add_error_message(
                    segment.id,
                    f'Segment {segment.id} has unknown CA types: {", ".join(sorted(unknown_types))}',
                )
                ca_types -= unknown_types

            if len(ca_types) > 1:
                self._add_error_message(
                    segment.id,
                    f'Segment {segment.id} has several different CA types: {", ".join(sorted(ca_types))}',
                )
                ca_types = set()

            if ca_types:
                ca_type = ca_types.pop()
                campaigns.update(ALL_CAMPAIGNS)
            else:
                ca_type = 'segment'

            if segment.id in self.orgvisits:
                ca_type = 'orgvisits'
                campaigns.update(ALL_CAMPAIGNS)

            proto.Type = ca_type
            proto.TankerSegmentTypeKey = self.config.SegmentTypeTankerKeys[ca_type]
            proto.CampaignTypes.extend(sorted(campaigns))

            proto.TankerDescriptionKey = segment.tankerDescriptionKey
            proto.TankerNameKey = segment.tankerNameKey

            try:
                tanker_name = (
                    self.api.lab.getTranslationsByKey(key=segment.tankerNameKey).result()
                    if segment.tankerNameKey
                    else None
                )
                tanker_descr = (
                    self.api.lab.getTranslationsByKey(key=segment.tankerDescriptionKey).result()
                    if segment.tankerDescriptionKey
                    else None
                )
            except Exception as e:
                self._add_error_message(segment.id, f'Getting tanker keys for {segment.id} failed. Exception: {e}')
                continue

            if tanker_name is None or tanker_descr is None:
                self._add_error_message(segment.id, f'No tanker keys for {segment.id}')
            else:
                proto.TankerNames.append(
                    segment_pb2.TTankerInfo(
                        Language='ru',
                        Name=tanker_name.ru.text,
                        Description=tanker_descr.ru.text,
                    )
                )
                proto.TankerNames.append(
                    segment_pb2.TTankerInfo(
                        Language='en',
                        Name=tanker_name.en.text,
                        Description=tanker_descr.en.text,
                    )
                )

            if len(proto.TankerNames) != 0:
                yield json.loads(proto2json.proto2json(proto, config=proto2json.Proto2JsonConfig(
                    enum_mode=proto2json.EnumValueMode.EnumName,
                    field_name_mode=proto2json.FldNameMode.FieldNameSnakeCase,
                )))
