#!/usr/bin/env python
# -*- coding: utf-8 -*-

from collections import defaultdict, namedtuple
import os

from yt.wrapper import aggregator

from crypta.profile.lib import pandas_yt
from crypta.profile.tasks.common.merge_socdem_storage import join_with_yandexuid_reducer
from crypta.profile.tasks.monitoring.__base__ import Monitoring
from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import ExternalInputDate, OldNodesByNameCleaner, YtTarget
from crypta.profile.utils.socdem import (
    get_age_segment_from_age,
    get_age_segment_from_birth_date,
    get_age_segment_from_year_of_birth,
    socdem_storage_schema,
)
from crypta.profile.utils.utils import report_ml_metrics_to_solomon


SocdemStorageInfo = namedtuple(
    'SocdemStorageInfo',
    ['id_type', 'source', 'gender', 'age_segment', 'income_segment'],
)


def get_age_segment_from_row(row):
    age_segment = None

    if 'birth_date' in row and row['birth_date']:
        age_segment = get_age_segment_from_birth_date(row['birth_date'])
    elif 'age' in row and row['age']:
        age_segment = get_age_segment_from_age(row['age'])
    elif 'year_of_birth' in row and row['year_of_birth']:
        age_segment = get_age_segment_from_year_of_birth(row['year_of_birth'])

    return age_segment


@aggregator
def count_by_source_mapper(rows):
    info_counter = defaultdict(int)

    for row in rows:
        age_segment = get_age_segment_from_row(row)
        info = SocdemStorageInfo(
            id_type=row['id_type'],
            source=row['source'],
            gender=row['gender'],
            age_segment=age_segment,
            income_segment=row['income_segment'],
        )
        info_counter[info] += 1

    for info, cnt in info_counter.iteritems():
        yield {
            'id_type': info.id_type,
            'source': info.source,
            'gender': info.gender,
            'age_segment': info.age_segment,
            'income_segment': info.income_segment,
            'count': cnt,
        }


def count_reducer(key, rows):
    total = sum(row['count'] for row in rows)
    result = {
        'count': total,
    }
    result.update(key)
    yield result


def count_by_id_reducer(key, rows):
    socdem_counters = {
        'gender': [],
        'age_segment': [],
        'income_segment': [],
    }

    for row in rows:
        age_segment = get_age_segment_from_row(row)

        if row['gender']:
            socdem_counters['gender'].append(row['gender'])

        if row['income_segment']:
            socdem_counters['income_segment'].append(row['income_segment'])

        if age_segment:
            socdem_counters['age_segment'].append(age_segment)

    result_row = {
        'id_type': key['id_type'],
    }

    for socdem_col, values in socdem_counters.iteritems():
        state_column_name = '{}_state'.format(socdem_col)

        if len(values) == 0:
            result_row[state_column_name] = 'unknown'
        elif len(values) == 1:
            result_row[state_column_name] = 'single'
        else:
            if len(set(values)) == 1:
                result_row[state_column_name] = 'confirmed'
            else:
                result_row[state_column_name] = 'conflicting'

    yield result_row


@aggregator
def group_by_mapper(rows):
    group_by_columns = ('id_type', 'gender_state', 'age_segment_state', 'income_segment_state')

    counters = defaultdict(int)
    for row in rows:
        info = [row[col_name] for col_name in group_by_columns]
        counters[tuple(info)] += 1

    for row_info, cnt in counters.iteritems():
        result_row = {}
        for col_name, col_value in zip(group_by_columns, row_info):
            result_row[col_name] = col_value
        result_row['count'] = cnt
        yield result_row


yandexuid_socdem_storage_schema = socdem_storage_schema.copy()
yandexuid_socdem_storage_schema['yandexuid'] = 'uint64'


class SocdemStorageMonitoring(Monitoring):
    name = 'socdem_storage'

    def requires(self):
        return {
            'cleaner': OldNodesByNameCleaner(
                self.date,
                folder=os.path.dirname(self.yt_folder),
                lifetime=5,
            ),
            'socdem_storage': ExternalInputDate(
                config.SOCDEM_STORAGE_TABLE,
                self.date,
            ),
        }

    def output(self):
        return {
            'socdem_count_by_source': YtTarget(
                os.path.join(self.yt_folder, 'socdem_count_by_source'),
            ),
            'socdem_count_by_id': YtTarget(
                os.path.join(self.yt_folder, 'socdem_count_by_id'),
            ),
            'id_count_by_source': YtTarget(
                os.path.join(self.yt_folder, 'id_count_by_source'),
            ),
            'yandexuid_count_by_source': YtTarget(
                os.path.join(self.yt_folder, 'yandexuid_count_by_source'),
            ),
            'id_count_by_id': YtTarget(
                os.path.join(self.yt_folder, 'id_count_by_id'),
            ),
            'yandexuid_count_by_id': YtTarget(
                os.path.join(self.yt_folder, 'yandexuid_count_by_id'),
            ),
        }

    @staticmethod
    def _send_metrics_to_solomon(metrics_to_send):
        report_ml_metrics_to_solomon(
            service=config.SOLOMON_SOCDEM_STORAGE_SERVICE,
            metrics_to_send=metrics_to_send,
        )

    def _send_socdem_stat_by_source_to_solomon(self, stat_table):
        df_socdem = pandas_yt.read_into_pandas_dataframe(self.yt, stat_table)
        metrics_to_send = []

        # Send general info about ids count
        groupby_columns = ('id_type', 'source')
        for (id_type, source), count in df_socdem.groupby(groupby_columns)['count'].sum().iteritems():
            metrics_to_send.append({
                'labels': {
                    'divided_by': 'source',
                    'source': source,
                    'id_type': id_type,
                    'socdem_type': 'all',
                    'metric': 'count',
                },
                'value': count,
            })

        # Send info about gender, age and income separately
        for socdem_type in ('gender', 'age_segment', 'income_segment'):
            df_socdem[socdem_type].fillna('unknown', inplace=True)
            groupby_columns = ('id_type', 'source', socdem_type)
            for group, count in df_socdem.groupby(groupby_columns)['count'].sum().iteritems():
                id_type, source, socdem_segment = group
                metrics_to_send.append({
                    'labels': {
                        'divided_by': 'source',
                        'source': source,
                        'id_type': id_type,
                        'socdem_type': socdem_type,
                        'socdem_segment': socdem_segment,
                        'metric': 'count',
                    },
                    'value': count,
                })

        self._send_metrics_to_solomon(metrics_to_send)

    def _send_socdem_stat_by_id_to_solomon(self, stat_table):
        df_socdem = pandas_yt.read_into_pandas_dataframe(self.yt, stat_table)
        metrics_to_send = []

        # Send general info about ids count
        for id_type, count in df_socdem.groupby('id_type')['count'].sum().iteritems():
            metrics_to_send.append({
                'labels': {
                    'divided_by': 'id',
                    'id_type': id_type,
                    'socdem_state_type': 'all',
                    'metric': 'count',
                },
                'value': count,
            })

        # Send info about gender, age and income separately
        for socdem_state_type in ('gender_state', 'age_segment_state', 'income_segment_state'):
            groupby_columns = ('id_type', socdem_state_type)
            for group, count in df_socdem.groupby(groupby_columns)['count'].sum().iteritems():
                id_type, socdem_state = group
                metrics_to_send.append({
                    'labels': {
                        'divided_by': 'id',
                        'id_type': id_type,
                        'socdem_state_type': socdem_state_type,
                        'socdem_state': socdem_state,
                        'metric': 'count',
                    },
                    'value': count,
                })

        self._send_metrics_to_solomon(metrics_to_send)

    def _send_id_count_stat_by_id_to_solomon(self, stat_table, stat_count_type):
        df = pandas_yt.read_into_pandas_dataframe(self.yt, stat_table)
        metrics_to_send = []

        # Send general info about ids count
        for index, row in df.iterrows():
            metrics_to_send.append({
                'labels': {
                    'divided_by': 'id',
                    'id_type': row['id_type'],
                    'metric': '{}s_count'.format(stat_count_type),
                },
                'value': row['count'],
            })

        self._send_metrics_to_solomon(metrics_to_send)

    def _send_id_count_stat_by_source_to_solomon(self, stat_table, stat_count_type):
        df = pandas_yt.read_into_pandas_dataframe(self.yt, stat_table)
        metrics_to_send = []

        # Send general info about ids count
        for index, row in df.iterrows():
            metrics_to_send.append({
                'labels': {
                    'divided_by': 'source',
                    'source': row['source'],
                    'id_type': row['id_type'],
                    'metric': '{}s_count'.format(stat_count_type),
                },
                'value': row['count'],
            })

        self._send_metrics_to_solomon(metrics_to_send)

    def run(self):
        with self.yt.Transaction(), \
                self.yt.TempTable() as socdem_state_table, \
                self.yt.TempTable() as yandexuid_socdem_storage_table, \
                self.yt.TempTable() as unique_id_table, \
                self.yt.TempTable() as unique_id_table_with_source:
            # id_type, source
            self.yt.create_empty_table(
                self.output()['socdem_count_by_source'].table,
                schema={
                    'id_type': 'string',
                    'source': 'string',
                    'gender': 'string',
                    'age_segment': 'string',
                    'income_segment': 'string',
                    'count': 'uint64',
                },
                force=True,
            )

            self.yt.run_map_reduce(
                count_by_source_mapper,
                count_reducer,
                self.input()['socdem_storage'].table,
                self.output()['socdem_count_by_source'].table,
                reduce_by=['id_type', 'source', 'gender', 'age_segment', 'income_segment'],
            )

            self._send_socdem_stat_by_source_to_solomon(self.output()['socdem_count_by_source'].table)

            # id, id_type
            self.yt.create_empty_table(
                self.output()['socdem_count_by_id'].table,
                schema={
                    'id_type': 'string',
                    'gender_state': 'string',
                    'age_segment_state': 'string',
                    'income_segment_state': 'string',
                    'count': 'uint64',
                },
                force=True,
            )

            self.yt.run_reduce(
                count_by_id_reducer,
                self.input()['socdem_storage'].table,
                socdem_state_table,
                reduce_by=['id', 'id_type'],
            )

            self.yt.run_map_reduce(
                group_by_mapper,
                count_reducer,
                socdem_state_table,
                self.output()['socdem_count_by_id'].table,
                reduce_by=['id_type', 'gender_state', 'age_segment_state', 'income_segment_state'],
            )

            self._send_socdem_stat_by_id_to_solomon(self.output()['socdem_count_by_id'].table)

            # join with yandexuid
            self.yt.create_empty_table(
                yandexuid_socdem_storage_table,
                schema=yandexuid_socdem_storage_schema,
                force=True,
            )

            self.yt.run_reduce(
                join_with_yandexuid_reducer,
                [self.input()['socdem_storage'].table, config.INDEVICE_YANDEXUID],
                yandexuid_socdem_storage_table,
                reduce_by=['id', 'id_type'],
            )

            for stat_count_type in ('id', 'yandexuid'):
                # id_type, source
                self.yt.create_empty_table(
                    self.output()['{}_count_by_source'.format(stat_count_type)].table,
                    schema={
                        'id_type': 'string',
                        'source': 'string',
                        'count': 'uint64',
                    },
                    force=True,
                )

                self.yt.unique(
                    self.yt.TablePath(
                        yandexuid_socdem_storage_table,
                        columns=[stat_count_type, 'id_type', 'source'],
                    ),
                    unique_id_table_with_source,
                    unique_by=[stat_count_type, 'id_type', 'source'],
                )

                self.yt.unique_count(
                    source_table=unique_id_table_with_source,
                    destination_table=self.output()['{}_count_by_source'.format(stat_count_type)].table,
                    unique_by=['id_type', 'source'],
                )

                self._send_id_count_stat_by_source_to_solomon(
                    self.output()['{}_count_by_source'.format(stat_count_type)].table,
                    stat_count_type,
                )

                # id_type, id
                self.yt.create_empty_table(
                    self.output()['{}_count_by_id'.format(stat_count_type)].table,
                    schema={
                        'id_type': 'string',
                        'count': 'uint64',
                    },
                    force=True,
                )

                self.yt.unique(
                    unique_id_table_with_source,
                    unique_id_table,
                    unique_by=[stat_count_type, 'id_type'],
                )

                self.yt.unique_count(
                    source_table=unique_id_table,
                    destination_table=self.output()['{}_count_by_id'.format(stat_count_type)].table,
                    unique_by=['id_type'],
                )

                self._send_id_count_stat_by_id_to_solomon(
                    self.output()['{}_count_by_id'.format(stat_count_type)].table,
                    stat_count_type,
                )
