import luigi
from crypta.graph.soup.config.python import (
    ID_TYPE as ids,
    LOG_SOURCE as log_source,
    EDGE_TYPE as edges,
    SOURCE_TYPE as source_type,
)
from crypta.graph.v1.python.lib.luigi import base_luigi_task
from crypta.graph.v1.python.lib.luigi import yt_luigi
from crypta.graph.v1.python.rtcconf import config
from crypta.graph.v1.python.utils import mr_utils as mr
from crypta.graph.v1.python.utils import yt_clients
from crypta.graph.v1.python.v2.ids_storage.eternal import UpdateIdStorageApps
from crypta.graph.v1.python.v2.soup import soup_dirs
from crypta.graph.v1.python.v2.soup.graph_soup_cook_yql import cook_soup_yql
from crypta.graph.v1.python.v2.soup.graph_soup_update import AddDayToSoup, AddDumpsToSoup
from crypta.graph.v1.python.v2.soup.soup_tables import SoupStorageTable


class SoupPreprocessing(base_luigi_task.BaseTask, yt_clients.YtClientMixin):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(SoupPreprocessing, self).__init__(*args, **kwargs)
        self.soup_dir = soup_dirs.SOUP_DIR
        self.soup_ids_out_dir = soup_dirs.SOUP_DIR + "ids/"
        self.out_uniq_ids_tables = [self.soup_ids_out_dir + id_t.Name for id_t in ids.values()]
        self.out_soup_tables = [
            SoupStorageTable(et, self.date) for et in edges.values() if et.LogSource == log_source.SOUP_PREPROCESSING
        ]

    def requires(self):
        return [
            AddDayToSoup(self.date),
            AddDumpsToSoup(self.date, active_dump=False),
            AddDumpsToSoup(self.date, active_dump=True),
            UpdateIdStorageApps(self.date),
        ]

    def run(self):
        mr.mkdir(self.soup_ids_out_dir)

        non_preprocessable_log_sources = {
            log_source.SOUP_PREPROCESSING.Type,
            log_source.FUZZY2_INDEVICE.Type,
            log_source.HOUSEHOLDS.Type,
            log_source.CRYPTA_BAYES.Type,
            log_source.CRYPTA_INDEVICE.Type,
            log_source.HEURISTIC.Type,
        }
        soup_edges_before_preproc = [
            et
            for et in edges.values()
            if et.LogSource.Type not in non_preprocessable_log_sources
            and et.SourceType.Type != source_type.DISTR_HISTORICAL.Type
        ]

        with self.yt.Transaction() as tx:  # affects shared directory, thus input should be consistent
            cook_soup_yql(
                config.MR_SERVER,
                tx,
                self.date,
                self.soup_dir,
                self.soup_ids_out_dir,
                self.soup_dir,
                soup_edges_before_preproc,
            )

            SoupStorageTable.finalize_all(self.out_soup_tables)
            for t in self.out_uniq_ids_tables:
                mr.set_generate_date(t, self.date)

    def output(self):
        uniq_ids_targets = [yt_luigi.YtDateTarget(t, self.date) for t in self.out_uniq_ids_tables]

        generated_soup_targets = [t.as_target() for t in self.out_soup_tables]

        return self.log_yt_targets_check(uniq_ids_targets + generated_soup_targets)
