from __future__ import print_function

import logging
import os
from collections import defaultdict
from functools import partial

import luigi
import yt.wrapper as yt
from crypta.graph.soup.config.python import (  # N811 # noqa
    SOURCE_TYPE as source_type,
    LOG_SOURCE as log_source,
    EDGE_TYPE as edges,
    Edges,
)
from crypta.graph.v1.python.data_imports.import_active_dumps.market.market_orders import ImportMarketOrders
from crypta.graph.v1.python.data_imports.import_active_dumps.metrika_user_params.user_params import (
    ImportMetrikaUserParams,
    PreprocessMetrikaUserParamsOwners,
)
from crypta.graph.v1.python.data_imports.import_dumps import graph_passport_dump, graph_tickets, graph_yamoney
from crypta.graph.v1.python.data_imports.import_dumps.partners import import_other_dumps
from crypta.graph.v1.python.data_imports.import_dumps.sherlock import import_vkid_mail
from crypta.graph.v1.python.data_imports.import_dumps.social import graph_social_auth
from crypta.graph.v1.python.data_imports.import_logs import (
    chevent_log,
    device_yuids_oauth,
    device_yuids_redir,
    device_yuids_sdk,
    device_yuids_tracking,
    graph_access,
    graph_barnavig,
    graph_dmp_ditmsk,
    graph_eal,
    graph_import_fp,
    graph_passport,
    graph_passport_sensitive,
    graph_postclicks,
    graph_rassilyator,
    graph_sbapi_lookup,
    graph_sovetnik,
    graph_watch_log,
    import_kinopoisk_access,
    rtb_log,
)
from crypta.graph.v1.python.data_imports.import_logs.autoru import graph_autoru
from crypta.graph.v1.python.data_imports.import_logs.bs import xuniq
from crypta.graph.v1.python.data_imports.import_logs.visit_log import graph_visit_log
from crypta.graph.v1.python.data_imports.import_logs.webvisor import graph_webvisor
from crypta.graph.v1.python.data_imports.import_logs.app_metrica_day import WaitMetricaSoup
from crypta.graph.v1.python.data_imports.import_logs.postback_log import WaitPostbackLogSoup

from crypta.graph.v1.python.lib import graphite_sender
from crypta.graph.v1.python.lib.luigi import base_luigi_task

from crypta.graph.v1.python.matching.device_matching.app_metrica import account_manager
from crypta.graph.v1.python.rtcconf import config

from crypta.graph.v1.python.utils import utils, mr_utils as mr
from crypta.graph.v1.python.utils import yt_clients
from crypta.graph.v1.python.utils.yql_utils import run_yql

from crypta.graph.v1.python.v2.soup import soup_utils, soup_dirs, indevice_fuzzy2
from crypta.graph.v1.python.v2.soup.soup_tables import (
    edge_key_columns,
    edge_type_key_columns,
    SoupDailyDumpTable,
    SoupDailyTable,
    SoupDumpTable,
    SoupStorageTable,
)

from crypta.graph.v1.python.lib.luigi.yt_luigi import YtDateAttributeTarget, YtFolderTarget


def soup_update_edge_types():
    return [et for et in edges.values() if et.Usage.SoupUpdate]


def add_day_to_soup_python(date, daily_soup_tables, tx, throw_before_date=None):
    logging.info("Running add_day_to_soup_python")

    in_day_table_paths = []
    storage_tables = []
    storage_table_paths = []
    out_indexes = dict()

    for day_table in daily_soup_tables:
        in_day_table_paths.append(day_table.table_path())

        storage_table = day_table.corresponding_storage_table()
        if storage_table.table_path() not in storage_table_paths:
            storage_tables.append(storage_table)
            storage_table_paths.append(storage_table.create(tx, recreate_if_exists=False))

            out_indexes[edges.tuplize(day_table.edge_type)] = len(storage_table_paths) - 1

    mr.mkdir(soup_dirs.SOUP_DAY_DIR + date + "/validation")
    validation_errors = soup_dirs.SOUP_DAY_DIR + date + "/validation/incremental_errors"

    # assume sorted
    yt_client = yt_clients.get_yt_client()
    yt_client.run_map_reduce(
        soup_utils.normalize_edge,
        partial(soup_utils.increment_day_activity, out_table_indexes=out_indexes, throw_before_date=throw_before_date),
        in_day_table_paths + storage_table_paths,
        storage_table_paths + [validation_errors],
        reduce_by=edge_key_columns,
        sort_by=edge_key_columns,
    )

    SoupStorageTable.finalize_all(storage_tables)


def add_day_to_soup_yql(date, tx, log_source, edge_types, throw_before_date=None):
    logging.info("Running add_day_to_soup_yql: {}".format(log_source.Name))

    run_yql(
        "AddDaySoup",
        dict(
            date=date,
            throw_before_date=throw_before_date,
            transaction_id=tx.transaction_id,
            soup_dir=soup_dirs.SOUP_DIR,
            log_source=log_source,
            edge_types=edge_types,
            normalize_lazy=config.SOUP_NORMALIZE_LAZY,
        ),
    )


LOG_IMPORTERS = {
    log_source.ACCESS_LOG.Type: [
        graph_access.ImportAccessLogsDayTask,
        import_kinopoisk_access.ImportKinopoiskAccessLog,
    ],
    log_source.AUTORU.Type: graph_autoru.ImportAutoRuDayTask,
    log_source.BAR_NAVIG_LOG.Type: graph_barnavig.ImportBarNavigDayTask,
    log_source.BS_RTB_LOG.Type: rtb_log.WaitRtbLogSoup,
    log_source.BS_XUNIQS_LOG.Type: xuniq.ImportBsXuniqsLog,
    log_source.DITMSK.Type: graph_dmp_ditmsk.ImportDitMskTask,
    log_source.EVENT_LOG.Type: chevent_log.ImportCheventDayTask,
    log_source.EXPORT_ACCESS_LOG.Type: graph_eal.ImportEalDayTask,
    log_source.METRIKA_MOBILE_LOG.Type: WaitMetricaSoup,
    log_source.MOBILE_REDIRECT_BIND_ID_LOG.Type: device_yuids_sdk.ImportSdkLogsDayTask,
    log_source.MOBILE_TRACKING_LOG.Type: device_yuids_tracking.ImportMobileTrackingLogsDayTask,
    log_source.OAUTH_LOG.Type: device_yuids_oauth.ImportOauthLogsDayTask,
    log_source.PASSPORT_PHONE_LOG.Type: graph_passport.ImportPassportPhoneDayTask,
    log_source.PASSPORT_SENSITIVE_LOG.Type: graph_passport_sensitive.ImportPassportPhoneBindingsDayTask,
    log_source.POSTCLICK_LOG.Type: graph_postclicks.ImportPostclicksDailyTask,
    log_source.REDIR_LOG.Type: device_yuids_redir.ImportRedirLogsDayTask,
    log_source.SBAPI_MITB_LOG.Type: graph_sbapi_lookup.ImportSbApiMitbLogDayTask,
    log_source.SENDER_LOG.Type: graph_rassilyator.ImportSenderDayTask,
    log_source.SOVETNIK.Type: graph_sovetnik.ImportSovetnikDayTask,
    log_source.VISIT_LOG.Type: graph_visit_log.ImportVisitLogDayTask,
    log_source.WATCH_LOG.Type: [graph_watch_log.ImportWatchLogDayTask, graph_watch_log.WaitWatchLogSoup],
    log_source.WEBVISOR_LOG.Type: graph_webvisor.ImportWebvisorDayTask,
    log_source.PASSPORT_LOG.Type: graph_import_fp.ImportFPDayTask,
    log_source.REQANS_LOG.Type: graph_import_fp.ImportFPDayTask,
    log_source.POSTBACK_LOG.Type: WaitPostbackLogSoup,
}


class PrepareDirs(base_luigi_task.BaseTask):

    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(PrepareDirs, self).__init__(*args, **kwargs)
        self.dirs = [
            soup_dirs.SOUP_DAY_DIR + self.date + "/validation",
            os.path.join(soup_dirs.SOUP_DIR, "validation"),
        ]

    def output(self):
        return [YtFolderTarget(x, allow_empty=True) for x in self.dirs]

    def run(self):
        for x in self.dirs:
            mr.mkdir(x)


class AddLogToSoup(base_luigi_task.BaseTask, yt_clients.YtClientMixin):

    date = luigi.Parameter()
    log_source_type = luigi.IntParameter()

    def __init__(self, *args, **kwargs):
        super(AddLogToSoup, self).__init__(*args, **kwargs)
        self.log_source = log_source.by_type(self.log_source_type)
        self.edge_types = [
            et
            for et in soup_update_edge_types()
            if (
                et.Props.ActivityType == et.Props.DAILY
                and et.Props.SupplyMethod == et.Props.LOG  # W503 # noqa
                and et.LogSource == self.log_source  # W503 # noqa
                and et.SourceType != source_type.ACCOUNT_MANAGER  # W503 # noqa
            )
        ]
        self.daily_soup_tables = [SoupDailyTable(et, self.date) for et in self.edge_types]

    def requires(self):
        def mkimp(imp):
            return imp(date=self.date, run_date=self.date)

        importers = LOG_IMPORTERS[self.log_source.Type]
        if isinstance(importers, list):
            return [mkimp(x) for x in importers] + [PrepareDirs(date=self.date)]
        elif importers is not None:
            return [mkimp(importers), PrepareDirs(date=self.date)]

    def run(self):
        # DANGER: don't try in prod, you may lose soup data
        if config.CRYPTA_ENV == "testing":
            throw_before_date = utils.get_date_before(self.date, 7)
        else:
            throw_before_date = None

        with self.yt.Transaction() as tx:
            add_day_to_soup_yql(
                self.date,
                tx=tx,
                log_source=self.log_source,
                throw_before_date=throw_before_date,
                edge_types=self.edge_types,
            )

            for daily_table in self.daily_soup_tables:
                storage_table = daily_table.corresponding_storage_table()
                if not self.yt.exists(storage_table.table_path()):
                    storage_table.create(tx, recreate_if_exists=False)
                mr.set_generate_date(storage_table.table_path(), self.date)

        validation_errors = (
            soup_dirs.SOUP_DAY_DIR + self.date + "/validation/incremental_errors_" + self.log_source.Name
        )
        report_validation_errors(validation_errors, self.log_source.Name, self.date)

    def output(self):
        return self.log_yt_targets_check([t.corresponding_storage_table().as_target() for t in self.daily_soup_tables])


class AddDayToSoup(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(AddDayToSoup, self).__init__(*args, **kwargs)
        self.target = YtDateAttributeTarget(soup_dirs.SOUP_DIR.rstrip("/"), "daily_generate_date", self.date)

    def requires(self):
        return [AddLogToSoup(date=self.date, log_source_type=x) for x in LOG_IMPORTERS.keys()]

    def run(self):
        self.target.set_date(self.date)

    def output(self):
        return self.target


def report_validation_errors(errors_table, soup_type, date):  # C901 # noqa
    def mk_key(rec):
        return tuple([str(rec[x]) for x in edge_type_key_columns])

    @yt.aggregator
    def mapper(recs):
        counters = defaultdict(int)

        for r in recs:
            counters[mk_key(r)] += 1

        for k, v in counters.iteritems():
            result = dict()
            for col_name, col_value in zip(edge_type_key_columns, k):
                result[col_name] = col_value

            result["count"] = v
            yield result

    def reducer(key, recs):
        count = 0
        for r in recs:
            count += r["count"]

        result = dict()
        result.update(key)
        result["count"] = count
        yield result

    yt_client = yt_clients.get_yt_client()
    yt_client.run_map_reduce(
        mapper, reducer, errors_table, errors_table + "_per_source_count", reduce_by=edge_type_key_columns
    )

    metrics = dict()
    for r in yt_client.read_table(errors_table + "_per_source_count"):
        et = soup_utils.get_rec_edge_type(r)
        metrics[edges.name(et)] = str(r["count"])

    metric_name = "soup.validation_errors.per_source." + soup_type
    graphite_sender.to_graphite_sender_batch(metric_name, metrics.iteritems(), date)


class AddDumpsToSoup(base_luigi_task.BaseTask, yt_clients.YtClientMixin):
    date = luigi.Parameter()
    active_dump = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(AddDumpsToSoup, self).__init__(*args, **kwargs)
        if self.active_dump:
            self.dump_tables = [
                SoupDailyDumpTable(edge_type.LogSource, self.date)
                for edge_type in soup_update_edge_types()
                if edge_type.Props.ActivityType == edge_type.Props.DAILY
                and edge_type.Props.SupplyMethod == edge_type.Props.DUMP  # W503 # noqa
            ]
            # some active dump edges have same log_source and same table_path
            self.in_table_paths = list(set([table.table_path() for table in self.dump_tables]))
            self.out_tables = [
                SoupStorageTable(edge_type, self.date)
                for edge_type in soup_update_edge_types()
                if edge_type.Props.ActivityType == edge_type.Props.DAILY
                and edge_type.Props.SupplyMethod == edge_type.Props.DUMP  # W503 # noqa
            ]
        else:
            self.dump_tables = [
                SoupDumpTable(et, self.date)
                for et in soup_update_edge_types()
                if (et.Props.ActivityType == et.Props.NONE)
                and et.SourceType != source_type.PROBABILISTIC2  # W503 # noqa
                and et.LogSource != log_source.HOUSEHOLDS  # W503 # noqa
                and et.LogSource != log_source.SOUP_PREPROCESSING  # W503 # noqa
                or et.SourceType == source_type.ACCOUNT_MANAGER  # W503 # noqa
            ]
            self.in_table_paths = [t.table_path() for t in self.dump_tables]
            self.out_tables = [t.corresponding_storage_table() for t in self.dump_tables]

    def requires(self):
        if self.active_dump:
            return [
                ImportMarketOrders(date=self.date),
                ImportMetrikaUserParams(date=self.date),
                PreprocessMetrikaUserParamsOwners(date=self.date),
                PrepareDirs(date=self.date),
            ]
        else:
            return [
                account_manager.AccountManagerUpdateDictTask(date=self.date),
                graph_passport_dump.ImportPassportDump(date=self.date),
                graph_yamoney.ImportYandexMoneyDump(date=self.date),
                graph_tickets.ImportYandexTicketsDump(date=self.date),
                graph_social_auth.ImportSocialAuthDump(date=self.date),
                import_vkid_mail.ImportVkidPuidMailDump(date=self.date),
                import_other_dumps.ImportEmailPhoneDumpsToSoup(date=self.date),
                PrepareDirs(date=self.date),
            ]

    def run(self):
        if self.active_dump:
            dump_name = "active_dump"
        else:
            dump_name = "dump"
        validation_errors = os.path.join(soup_dirs.SOUP_DIR, "validation", dump_name + "_errors")

        out_table_paths = []
        out_table_indexes = dict()
        with self.yt.Transaction() as tr:
            for table in self.out_tables:
                out_table_paths.append(table.create(tr))
                out_table_indexes[edges.tuplize(table.edge_type)] = len(out_table_paths) - 1

            self.yt.run_map_reduce(
                soup_utils.normalize_edge,
                partial(soup_utils.uniq_dump, out_table_indexes=out_table_indexes, add_dates=self.active_dump),
                self.in_table_paths,
                out_table_paths + [validation_errors],
                reduce_by=edge_key_columns,
            )

            SoupStorageTable.finalize_all(self.out_tables)
            mr.set_attribute(soup_dirs.SOUP_DIR, dump_name + "_generate_date", self.date)

            if self.active_dump:
                for table in self.dump_tables:
                    table.finalize()

            report_validation_errors(validation_errors, dump_name, self.date)

    def output(self):
        if self.active_dump:
            return self.log_yt_targets_check([table.as_target() for table in self.out_tables])
        else:
            return self.log_yt_targets_check([t.corresponding_storage_table().as_target() for t in self.dump_tables])


class AddFuzzyToSoup(base_luigi_task.BaseTask, yt_clients.YtClientMixin):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(AddFuzzyToSoup, self).__init__(*args, **kwargs)
        self.dump_tables = [
            SoupDumpTable(et, self.date)
            for et in soup_update_edge_types()
            if et.Props.ActivityType == et.Props.NONE
            and (  # W503 # noqa
                et.SourceType == source_type.PROBABILISTIC2  # W503 # noqa
                or et.LogSource == log_source.HOUSEHOLDS  # W503 # noqa
            )
        ]

    def requires(self):
        return [indevice_fuzzy2.Fuzzy2MonthTask(date=self.date), PrepareDirs(date=self.date)]

    def run(self):
        validation_errors = soup_dirs.SOUP_DIR + "validation/fuzzy_dump_errors"
        with self.yt.Transaction() as tr:
            soup_storage_tables = [t.corresponding_storage_table() for t in self.dump_tables]

            in_table_paths = [t.table_path() for t in self.dump_tables]
            out_table_paths = []
            out_table_indexes = dict()

            for t in soup_storage_tables:
                out_table_paths.append(t.create(tr))
                out_table_indexes[edges.tuplize(t.edge_type)] = len(out_table_paths) - 1

            self.yt.run_map_reduce(
                soup_utils.normalize_edge,
                partial(soup_utils.uniq_dump, out_table_indexes=out_table_indexes),
                in_table_paths,
                out_table_paths + [validation_errors],
                reduce_by=edge_key_columns,
            )

            SoupStorageTable.finalize_all(soup_storage_tables)
            mr.set_attribute(soup_dirs.SOUP_DIR, "fuzzy_generate_date", self.date)

        report_validation_errors(validation_errors, "dump", self.date)

    def output(self):
        return self.log_yt_targets_check([t.corresponding_storage_table().as_target() for t in self.dump_tables])
