import datetime
from functools import partial
import logging

import tvmauth

import crypta.lib.python.bt.conf.conf as conf
from crypta.lib.python.bt.tasks import (
    YtTask,
)
from crypta.lib.python.bt.workflow import (
    IndependentTask,
)
from crypta.lib.python.bt.workflow.targets.table import (
    HasAttribute,
)
from crypta.lab.lib.common import (
    WithApi,
)
from crypta.lib.proto.identifiers import id_type_pb2
from crypta.siberia.bin.common import sample_stats_getter
import crypta.siberia.bin.common.create_user_set_from_sample_reducer.py as sampler
from crypta.siberia.bin.common.describing.mode.python import describing_mode
from crypta.siberia.bin.common.siberia_client import SiberiaClient

logger = logging.getLogger(__name__)


SOCDEM_FIELD_TO_KEYWORD_ID = {
    'gender': {
        'keyword_id': '174',
        'segment_ids': {
            'm': '0',
            'f': '1',
        },
    },
    'age_segment': {
        'keyword_id': '543',
        'segment_ids': {
            '0_17': '0',
            '18_24': '1',
            '25_34': '2',
            '35_44': '3',
            '45_54': '4',
            '55_99': '5',
        },
    },
    'income_5_segment': {
        'keyword_id': '614',
        'segment_ids': {
            'A': '0',
            'B1': '1',
            'B2': '2',
            'C1': '3',
            'C2': '4',
        },
    },
}


SEGMENT_FIELD_TO_KEYWORD_ID = {
    'heuristic_segments': 216,
    'probabilistic_segments': 217,
    'interests_composite': 217,
    'marketing_segments': 281,
    'lal_common': 544,
    'lal_private': 545,
    'lal_internal': 546,
    'heuristic_common': 547,
    'heuristic_private': 548,
    'heuristic_internal': 549,
    'audience_segments': 557,
    'longterm_interests': 601,
    'shortterm_interests': 602,
}


def prepare_segment_sample_mapper(row, segment_lab_ids):
    for socdem_type, socdem_info in SOCDEM_FIELD_TO_KEYWORD_ID.iteritems():
        exact_socdem = row['exact_socdem'] or {}
        socdem_segment = exact_socdem.get(socdem_type)
        if socdem_segment is not None:
            yield {
                'yandexuid': row['yandexuid'],
                'segment_lab_id': segment_lab_ids[
                    (socdem_info['keyword_id'], socdem_info['segment_ids'][socdem_segment])
                ],
            }

    for segment_type, segment_keyword_id in SEGMENT_FIELD_TO_KEYWORD_ID.iteritems():
        if row[segment_type]:
            for segment_id in row[segment_type]:
                segment_lab_id = segment_lab_ids.get((str(segment_keyword_id), str(segment_id)))
                if segment_lab_id:
                    yield {
                        'yandexuid': row['yandexuid'],
                        'segment_lab_id': segment_lab_id,
                    }


class PrepareSample(IndependentTask, YtTask, WithApi):
    @property
    def _today(self):
        return datetime.date.today().isoformat()

    @property
    def _yesterday(self):
        return (datetime.date.today() - datetime.timedelta(days=1)).isoformat()

    @property
    def _attribute(self):
        return '_day'

    @property
    def source(self):
        return conf.paths.profiles_for_14days

    @property
    def destination(self):
        return conf.paths.lab.segments

    def targets(self):
        yield HasAttribute(self.yt, self.destination, self._attribute, self._today)

    def get_segment_lab_ids(self):
        segments_lab_ids = {}

        for segment in self.api.lab.getAllSegments().result():
            for export in segment.exports.exports:
                segments_lab_ids[(str(export.keywordId), str(export.segmentId))] = segment.id + '__' + export.id

        return segments_lab_ids

    def run(self, **kwargs):
        if not self.yt.exists(self.destination):
            self.yt.create(
                'table',
                self.destination,
                attributes={
                    'schema': [
                        {'name': 'yandexuid', 'type': 'uint64'},
                        {'name': 'segment_lab_id', 'type': 'string'},
                    ],
                    'optimize_for': 'scan',
                },
            )

        self.yt.run_map(
            partial(
                prepare_segment_sample_mapper,
                segment_lab_ids=self.get_segment_lab_ids(),
            ),
            self.source,
            self.destination,
        )

        self.yt.set_attribute(self.destination, self._attribute, self._today)


class DescribeInSiberia(YtTask):
    def requires(self):
        yield PrepareSample()

    @property
    def _today(self):
        return datetime.date.today().isoformat()

    @property
    def _attribute(self):
        return '_day'

    @property
    def source(self):
        return conf.paths.lab.segments

    @property
    def destination(self):
        return conf.paths.lab.sample_user_sets.segments

    def targets(self):
        yield HasAttribute(self.yt, self.destination, self._attribute, self._today)

    def run(self, **kwargs):
        sampler.create_user_set_from_sample(
            self.yt,
            self.native_map_reduce_with_combiner,
            self.native_map,
            source=self.source,
            destination=self.destination,
            tvm_settings={
                "source_id": conf.proto.Tvm.SourceTvmId,
                "destination_id": conf.proto.Siberia.Tvm.DestinationTvmId,
                "secret": conf.proto.Tvm.Secret,
            },
            group_id_column="segment_lab_id",
            id_type=id_type_pb2.EIdType.YANDEXUID,
            id_column="yandexuid",
            sample_size=conf.proto.Options.SampleSize,
            siberia_host=conf.proto.Siberia.Host,
            siberia_port=conf.proto.Siberia.Port,
            max_ids_per_second=conf.proto.Options.MaxIdsPerSecond,
            max_jobs=conf.proto.Options.MaxDescribeJobs,
            describing_mode=describing_mode.SLOW,
            experiment="by_crypta_id",
        )

        tvm_client = tvmauth.TvmClient(
            tvmauth.TvmApiClientSettings(
                self_tvm_id=conf.proto.Tvm.SourceTvmId,
                self_secret=conf.proto.Tvm.Secret,
                dsts={"siberia": conf.proto.Siberia.Tvm.DestinationTvmId},
            )
        )

        sample_stats_getter.get_stats(
            self.yt,
            SiberiaClient(conf.proto.Siberia.Host, conf.proto.Siberia.Port),
            tvm_client.get_service_ticket_for("siberia"),
            self.destination,
        )

        self.yt.set_attribute(self.destination, self._attribute, self._today)


class UploadIdsToDatabase(YtTask, WithApi):
    def requires(self):
        yield DescribeInSiberia()

    @property
    def _today(self):
        return datetime.date.today().isoformat()

    @property
    def _attribute(self):
        return '_uploaded_to_database_at'

    @property
    def source(self):
        return conf.paths.lab.sample_user_sets.segments

    def targets(self):
        yield HasAttribute(self.yt, self.source, self._attribute, self._today)

    def run(self, **kwargs):
        self.api.lab.updateSegmentIdsTable(source=self.source).result()
        self.yt.set_attribute(self.source, self._attribute, self._today)
