import logging
import time
import uuid

from yt import wrapper

from crypta.lab.lib import (
    native_operations,
    specs,
    tables,
)
from crypta.lab.proto import other_pb2
from crypta.lib.proto import user_data
from crypta.lib.python import templater
from crypta.lib.python.bt import (
    tasks,
    workflow,
)
from crypta.lib.python.bt.commons import dates
from crypta.lib.python.bt.conf import conf
from crypta.lib.python.bt.workflow.targets import table as table_targets
from crypta.lib.python.yt import yt_helpers
from crypta.lib.python.yt.yt_helpers.proto.thin_out_config_pb2 import TThinOutConfig
from crypta.lib.python.yt.yt_helpers.thin_out import ThinOutHistoricalData


logger = logging.getLogger(__name__)


class PrepareUserData(tasks.YtTask, tasks.YQLTaskV1):
    day = workflow.Parameter(parse=dates.parse_day)

    KEYWORDS = [216, 217, 601, 602, 544, 546, 547, 549]

    def requires(self):
        yield tasks.ExternalInputWithAttribute(
            yt_table=conf.paths.profiles_for_14days,
            attribute='generate_date',
            value=dates.parse_day(self.day)
        )

    def targets(self):
        yield table_targets.HasAttribute(
            self.yt, self.userdata, tables.UserData.Attributes.LAST_UPDATE_DATE, str(self.day)
        )
        yield table_targets.HasAttribute(
            self.yt, self.userdata_stats, tables.UserData.Attributes.LAST_UPDATE_DATE, str(self.day)
        )

    @property
    def userdata(self):
        return conf.paths.lab.data.userdata

    @property
    def userdata_daily(self):
        return conf.paths.lab.data.userdata_daily

    @property
    def userdata_stats(self):
        return conf.paths.lab.data.userdata_stats

    @property
    def id_type(self):
        return tables.UserData.Fields.YUID

    @property
    def query(self):
        return ''

    def get_extract_user_data_state(self, vectors, profiles, sampler):
        keywords = self.yt.get_attribute(profiles, 'bb_keywords_to_field_name')
        nstrict_keywords = self.yt.get_attribute(profiles, 'not_strict_bb_keyword_id_to_field_names')
        extract_fields = lambda value: [value] if isinstance(value, str) else value.keys() if isinstance(value,
                                                                                                         dict) else []

        state = other_pb2.TExtractUserDataState()
        state.Sampler.Denominator = sampler.denominator
        state.Sampler.Rest = sampler.rest
        keyword_fields = state.ProfilesLogState.KeywordFields
        for keyword in self.KEYWORDS:
            names = extract_fields(keywords.get(str(keyword))) + extract_fields(nstrict_keywords.get(str(keyword)))
            fields = keyword_fields[keyword]
            fields.Field.extend(names)
        date = self.yt.get_attribute(vectors, 'generate_date')
        timestamp = time.mktime(dates.parse_day(date).timetuple())
        state.OldestTimestamp = int(timestamp - 24 * 60 * 60 * int(conf.proto.Options.UserData.Days))
        state.UseAppsWeights = self.id_type == tables.UserData.Fields.CRYPTA_ID
        return state

    def postprocess_tables(self, table_to_process):
        daily_dst = wrapper.ypath_join(self.userdata_daily, str(self.day))
        self.sort(self.userdata, self.userdata, sort_by=self.id_type)

        tables_proto = {
            self.userdata: user_data.user_data_pb2.TUserData,
            self.userdata_stats: user_data.user_data_stats_pb2.TUserDataStats,
            daily_dst: user_data.user_data_pb2.TUserData,
        }

        for table_path, proto_message_type in tables_proto.iteritems():
            self.yt.set_attribute(table_path, table_to_process.Attributes.DATA_ID, str(uuid.uuid4()))
            yt_helpers.set_yql_proto_fields(table_path, proto_message_type, self.yt)
            self.yt.set_attribute(table_path, table_to_process.Attributes.LAST_UPDATE_DATE, str(self.day))

        self.yt.set_attribute(self.userdata, table_to_process.Attributes.LAST_UPDATE_TIMESTAMP, int(time.time()))

    def create_table(self, path, schema):
        self.yt.create(
            'table',
            path,
            recursive=True,
            attributes=dict(schema=schema, compression_codec='none', optimize_for='scan'),
            force=True,
        )

    def run_map_reduce(self, sources, vectors, profiles, mapper_files=[]):
        with self.yt.Transaction():
            self.create_table(path=self.userdata, schema=tables.UserData.SCHEMA)
            self.create_table(path=self.userdata_stats, schema=tables.UserDataStats.SCHEMA)

            self.native_map_reduce(
                mapper_name=native_operations.TExtractUserData,
                reducer_name=native_operations.TMergeUserData,
                source=sources,
                destination=self.userdata,
                reduce_by=self.id_type,
                spec=specs._spec(
                    specs.YtSpecs.MEDIUM_DATA_SIZE_PER_MAP_JOB,
                    specs.YtSpecs.NO_INTERMEDIATE_COMPRESSION,
                    specs.YtSpecs.MAPPER_HIGH_MEMORY_USAGE,
                ),
                mapper_state=self.get_extract_user_data_state(vectors, profiles, conf.userdata_sampler).SerializeToString(),
                mapper_files=mapper_files,
            )

            self.native_map_reduce_with_combiner(
                native_operations.TTransformUserDataToUserDataStats,
                native_operations.TMergeUserDataStats,
                native_operations.TMergeUserDataStats,
                self.userdata,
                self.userdata_stats,
                reduce_by=tables.UserData.Fields.GROUP_ID,
                spec=specs._spec(
                    specs.YtSpecs.SMALL_DATA_SIZE_PER_SORT_JOB,
                    specs.YtSpecs.SMALL_DATA_SIZE_PER_MAP_JOB,
                    specs.YtSpecs.REDUCER_HIGH_MEMORY_USAGE,
                    specs.YtSpecs.REDUCER_BIG_ROWS,
                    specs.YtSpecs.MAPPER_BIG_ROWS,
                    specs.YtSpecs.NO_INTERMEDIATE_COMPRESSION,
                    specs.YtSpecs.MAPPER_HIGH_MEMORY_USAGE,
                ),
            )

    def store_daily(self):
        with self.yt.Transaction() as transaction:
            daily_dst = wrapper.ypath_join(self.userdata_daily, str(self.day))

            self.yt.create('map_node', self.userdata_daily, ignore_existing=True)

            query = templater.render_resource(
                '/crypta/lab/store_daily.yql',
                strict=True,
                vars={
                    'date': str(self.day),
                    'id_type': self.id_type,
                    'userdata_table': self.userdata,
                    'daily_dst': daily_dst,
                },
            )
            self.yql_client.execute(
                query=query,
                transaction=str(transaction.transaction_id),
                title='YQL Store daily userdata',
            )

            config = TThinOutConfig(
                DailyFolder=self.userdata_daily,
                ShortTtlDays=conf.paths.lab.data.userdata_month_ttl_days,
                LongTtlDays=conf.paths.lab.data.userdata_year_ttl_days,
                ThinOutDays=conf.paths.lab.data.userdata_thin_out_days,
            )

            thin_out_helper = ThinOutHistoricalData(
                config=config,
                yt=self.yt,
            )

            thin_out_helper.thin_out(transaction=transaction)

    def run(self, **kwargs):
        yuid_with_all = wrapper.TablePath(
            name=conf.paths.ids_storage.yandexuid.yuid_with_all,
            rename_columns={'id': 'yuid'},
            columns=('yuid', 'main_region_city', 'main_region_country', 'ua_profile', 'ip_activity_type')
        )

        yandexuid_cryptaid = wrapper.TablePath(
            name=conf.paths.matching.yandexuid_cryptaid,
            rename_columns={'id': 'yuid', 'target_id': 'cid'},
            columns=('yuid', 'cid')
        )

        vectors = conf.paths.vectors.monthly
        profiles = conf.paths.profiles_for_14days
        yuids_words_weights = conf.paths.affinitives.words
        yuids_hosts_weights = conf.paths.affinitives.hosts

        sources = [vectors, yuid_with_all, profiles, yandexuid_cryptaid, yuids_words_weights, yuids_hosts_weights]

        self.run_map_reduce(sources, vectors, profiles)
        self.store_daily()
        self.postprocess_tables(table_to_process=tables.UserData)
