import time
import luigi

import yt.wrapper as yt
from yt.wrapper import ypath_join

from crypta.graph.soupy_indevice.lib import config


class BuildIndeviceParams(object):
    full_soup_tables = None

    idstorage_root = None

    output_table = None
    output_table_comp_sizes = None
    output_bad_edges = None
    output_bad_ids = None

    workdir = None
    collapse_uuids = False

    def __str__(self):
        return str(self.__dict__)


class YtTableOutput(luigi.Target):
    def __init__(self, path):
        self.path = path

    def exists(self):
        ytc = yt.YtClient(proxy=config.YT_PROXY, token=config.YT_TOKEN)
        return ytc.exists(self.path)


class PrepareWorkdir(luigi.Task):
    workdir = luigi.Parameter()

    def output(self):
        return YtTableOutput(self.workdir)

    def run(self):
        ytc = yt.YtClient(proxy=config.YT_PROXY, token=config.YT_TOKEN)
        now = int(time.time())
        expire = now + 3 * 24 * 3600
        ytc.mkdir(self.workdir, recursive=True)
        ytc.set_attribute(self.workdir, 'expiration_time', expire * 1000)


class BaseTask(luigi.Task):
    params = luigi.Parameter()

    def requires(self):
        return []

    def output(self):
        return [YtTableOutput(x) for x in self.output_tables().values()]

    def run(self):
        ytc = yt.YtClient(proxy=config.YT_PROXY, token=config.YT_TOKEN)
        with ytc.Transaction() as tx:
            self.do_run(ytc, tx)

    def do_run(self, ytclient, tx):
        pass

    def output_tables(self):
        return {}


class PreprocessSoup(BaseTask):

    def requires(self):
        deps = [PrepareWorkdir(workdir=self.params.workdir)]
        return deps

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib.preprocess import preprocess_soup

        soup_tables = self.params.full_soup_tables
        preprocess_soup(soup_tables, self.params.idstorage_root, self.output_tables()['pairs'], self.output_tables()['bad_ids'], tx)

    def output_tables(self):
        return {
            'pairs': yt.ypath_join(self.params.workdir, 'preprocessed_pairs'),
            'bad_ids': yt.ypath_join(self.params.workdir, 'bad_ids_emulators')
        }


class CollapseUuids(BaseTask):

    def requires(self):
        return PreprocessSoup(self.params)

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib.preprocess import collapse_app_uuids

        collapse_app_uuids(
            PreprocessSoup(self.params).output_tables()['pairs'],
            self.output_tables()['uuid_apps'],
            self.output_tables()['pairs'],
            tx)

    def output_tables(self):
        return {
            'uuid_apps': yt.ypath_join(self.params.workdir, 'uuid_apps'),
            'pairs': yt.ypath_join(self.params.workdir, 'preprocessed_pairs_collapse_uuids')
        }


class CombinedPreprocess(BaseTask):

    def requires(self):
        reqs = [PreprocessSoup(self.params)]
        if self.params.collapse_uuids:
            reqs.append(CollapseUuids(self.params))
        return reqs

    def do_run(self, ytclient, tx):
        pass

    def output_tables(self):
        tables = {
            'bad_ids': PreprocessSoup(self.params).output_tables()['bad_ids'],
        }

        if self.params.collapse_uuids:
            tables['pairs'] = CollapseUuids(self.params).output_tables()['pairs']
            tables['uuid_apps'] = CollapseUuids(self.params).output_tables()['uuid_apps']
        else:
            tables['pairs'] = PreprocessSoup(self.params).output_tables()['pairs']

        return tables


class CalcLinkCounts(BaseTask):

    def requires(self):
        return CombinedPreprocess(self.params)

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import calculate_link_counts
        calculate_link_counts(CombinedPreprocess(self.params).output_tables()['pairs'], self.output_tables()['link_counts'], tx)

    def output_tables(self):
        return {
            'link_counts': yt.ypath_join(self.params.workdir, 'link_counts')
        }


class CalcLinkCountPercentiles(BaseTask):

    def requires(self):
        return CalcLinkCounts(self.params)

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import calculate_link_count_percentiles
        calculate_link_count_percentiles(CalcLinkCounts(self.params).output_tables()['link_counts'],
                                         self.output_tables()['link_count_percentiles'], tx)

    def output_tables(self):
        return {
            'link_count_percentiles': yt.ypath_join(self.params.workdir, 'link_count_percentiles')
        }


class FilterByIdInfo(BaseTask):

    def requires(self):
        return CombinedPreprocess(self.params)

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import filter_by_id_info

        filter_by_id_info(
            CombinedPreprocess(self.params).output_tables()['pairs'],
            self.output_tables()['bad_ids'],
            self.output_tables()['bad_edges'],
            tx)

    def output_tables(self):
        return {
            'bad_ids': yt.ypath_join(self.params.workdir, 'filter_idinfo_ids'),
            'bad_edges': yt.ypath_join(self.params.workdir, 'filter_idinfo_edges')
        }


class FilterYpCookie(BaseTask):

    def requires(self):
        return [CombinedPreprocess(self.params), CalcLinkCounts(self.params)]

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import filter_yp_cookie

        filter_yp_cookie(
            CombinedPreprocess(self.params).output_tables()['pairs'],
            CalcLinkCounts(self.params).output_tables()['link_counts'],
            self.output_tables()['bad_edges'],
            tx)

    def output_tables(self):
        return {
            'bad_edges': yt.ypath_join(self.params.workdir, 'filter_yp_cookie')
        }


class FilterSingleDayCookies(BaseTask):

    def requires(self):
        return CombinedPreprocess(self.params)

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import filter_single_day_cookies

        filter_single_day_cookies(
            CombinedPreprocess(self.params).output_tables()['pairs'],
            self.output_tables()['bad_edges'],
            tx)

    def output_tables(self):
        return {
            'bad_edges': yt.ypath_join(self.params.workdir, 'filter_single_day_cookies')
        }


class FilterByPercentiles(BaseTask):

    def requires(self):
        return [CombinedPreprocess(self.params), CalcLinkCounts(self.params), CalcLinkCountPercentiles(self.params)]

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import filter_by_percentiles

        link_count_percentiles = CalcLinkCountPercentiles(self.params).output_tables()['link_count_percentiles']

        filter_by_percentiles(
            CombinedPreprocess(self.params).output_tables()['pairs'],
            CalcLinkCounts(self.params).output_tables()['link_counts'],
            link_count_percentiles,
            self.output_tables()['bad_edges'],
            self.output_tables()['bad_edges_extra'],
            tx)

    def output_tables(self):
        return {
            'bad_edges': yt.ypath_join(self.params.workdir, 'filter_perc_edges'),
            'bad_edges_extra': yt.ypath_join(self.params.workdir, 'filter_perc_edges_extra')
        }


class FilterInsignificant(BaseTask):

    def requires(self):
        return [FilterByPercentiles(self.params), FilterByIdInfo(self.params), CombinedPreprocess(self.params)]

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import filter_insignificant_edges

        bad_ids = [
            CombinedPreprocess(self.params).output_tables()['bad_ids'],
            FilterByIdInfo(self.params).output_tables()['bad_ids']
        ]

        bad_edges = [
            FilterByIdInfo(self.params).output_tables()['bad_edges'],
            FilterByPercentiles(self.params).output_tables()['bad_edges']
        ]

        filter_insignificant_edges(
            self.params.prev_indevice_table,
            CombinedPreprocess(self.params).output_tables()['pairs'],
            bad_ids,
            bad_edges,
            self.output_tables()['edges'],
            tx)

    def output_tables(self):
        return {
            'edges': yt.ypath_join(self.params.workdir, 'insignificant_edges')
        }


class PrepareForCc(BaseTask):

    edge_filters = [FilterByIdInfo, FilterYpCookie, FilterSingleDayCookies, FilterByPercentiles]
    id_filters = [CombinedPreprocess, FilterByIdInfo]

    def requires(self):
        req = [x(self.params) for x in (self.edge_filters + self.id_filters)]
        return req

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import prepare_for_cc

        bad_ids = [x(self.params).output_tables()['bad_ids'] for x in self.id_filters]
        bad_edges = [x(self.params).output_tables()['bad_edges'] for x in self.edge_filters]

        insignificant = None

        prepare_for_cc(
            CombinedPreprocess(self.params).output_tables()['pairs'],
            bad_ids,
            bad_edges,
            insignificant,
            self.output_tables()['soup_full'],
            tx)

    def output_tables(self):
        return {
            'soup_full': ypath_join(self.params.workdir, 'soup_full')
        }


class FindCc(BaseTask):

    def requires(self):
        return PrepareForCc(self.params)

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import find_connected_components
        find_connected_components(PrepareForCc(self.params).output_tables()['soup_full'], self.output_tables()['soup_cc_pre'], tx)

    def output_tables(self):
        return {
            'soup_cc_pre': ypath_join(self.params.workdir, 'soup_cc_pre')
        }


class AdditionalSplit(BaseTask):

    def requires(self):
        return [PrepareForCc(self.params), FindCc(self.params), FilterByPercentiles(self.params)]

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import additional_split, HUGE_INDEVICE
        additional_split(
            PrepareForCc(self.params).output_tables()['soup_full'],
            FindCc(self.params).output_tables()['soup_cc_pre'],
            FilterByPercentiles(self.params).output_tables()['bad_edges_extra'],
            HUGE_INDEVICE,
            self.output_tables()['soup_cc'],
            self.output_tables()['bad_edges'],
            ytclient,
            tx
        )

    def output_tables(self):
        return {
            'soup_cc': ypath_join(self.params.workdir, 'soup_cc'),
            'bad_edges': ypath_join(self.params.workdir, 'additional_split_bad_edges')
        }


class FinalizeIndevice(BaseTask):

    def requires(self):
        return [AdditionalSplit(self.params), PrepareForCc(self.params)]

    def do_run(self, ytclient, tx):
        from crypta.graph.soupy_indevice.lib import finalize_indevice

        prep_cc_task = PrepareForCc(self.params)
        bad_ids = [x(self.params).output_tables()['bad_ids'] for x in prep_cc_task.id_filters]
        bad_edges = [x(self.params).output_tables()['bad_edges'] for x in prep_cc_task.edge_filters]
        bad_edges.append(AdditionalSplit(self.params).output_tables()['bad_edges'])

        finalize_indevice(
            PrepareForCc(self.params).output_tables()['soup_full'],
            AdditionalSplit(self.params).output_tables()['soup_cc'],
            CombinedPreprocess(self.params).output_tables().get('uuid_apps'),
            bad_ids,
            bad_edges,
            self.output_tables()['result'],
            self.output_tables()['sizes'],
            self.output_tables()['bad_ids'],
            self.output_tables()['bad_edges'],
            tx
        )

    def output_tables(self):
        result = self.params.output_table
        sizes = self.params.output_table_comp_sizes
        bad_ids = self.params.output_bad_ids
        bad_edges = self.params.output_bad_edges

        return {
            'result': result,
            'sizes': sizes,
            'bad_ids': bad_ids,
            'bad_edges': bad_edges
        }
