import logging
import sys

import luigi
import yt.wrapper as yt

from lib.luigi import base_luigi_task
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr


class GraphHistorySnapshot(base_luigi_task.BaseTask):

    date = luigi.Parameter()
    parent_task = luigi.Parameter()

    def requires(self):
        if self.parent_task:
            return self.parent_task
        else:
            return []

    def __init__(self, date, parent_task):
        # every day
        day_tables = {
            config.YT_OUTPUT_FOLDER + date + '/exact/cluster/vertices': 'vertices',
            config.YT_OUTPUT_FOLDER + date + '/exact/cluster/edges': 'edges',
            config.YT_OUTPUT_FOLDER + date + '/exact/cluster/removed_pairs': 'removed_edges',
            config.YT_OUTPUT_FOLDER + date + '/tmp/cluster/vertices': 'fuzzy_vertices',
            config.YT_OUTPUT_FOLDER + date + '/tmp/cluster/edges': 'fuzzy_edges',
            config.GRAPH_YT_DICTS_FOLDER + 'yuid_with_all_bad': 'yuid_with_all_bad',
        }

        overlimit_pairs_f = config.YT_OUTPUT_FOLDER + date + '/pairs/limit/'
        overlimit_pairs = ['yuids_' + p.id_type for p in config.YUID_PAIR_TYPES_EXACT]
        day_tables.update({overlimit_pairs_f + t: 'overlimit/' + t for t in overlimit_pairs})

        org_pairs_f = config.YT_OUTPUT_FOLDER + date + '/pairs/orgs/'
        org_pairs = ['yuids_' + id_type for id_type in [config.ID_TYPE_EMAIL, config.ID_TYPE_LOGIN]]
        day_tables.update({org_pairs_f + t: 'org_emails_limit/' + t for t in org_pairs})

        # every month
        month_tables = {
            config.GRAPH_YT_DICTS_FOLDER + 'yuid_with_all': 'yuid_with_all',
            config.GRAPH_YT_DICTS_FOLDER + 'dev_info_yt': 'dev_info_yt',
            config.GRAPH_YT_DICTS_FOLDER + 'account_manager': 'account_manager',
            config.GRAPH_YT_DICTS_FOLDER + 'puid_yuid_yt': 'puid_yuid_yt',
            config.GRAPH_YT_DICTS_FOLDER + 'ui_yuid_all': 'ui_yuid_all',
            config.INDEVICE_YT_FOLDER + date + '/perfect/devid_yuid_all': 'devid_yuid_all',
        }

        self.copy_today = day_tables
        if date[-2:] == '01':  # every first day of month
            self.copy_today.update(month_tables)

        super(GraphHistorySnapshot, self).__init__(date=date, parent_task=parent_task)

    def run(self):
        logging.info('Preparing history snapshot for %s' % self.date)

        history_f = config.YT_OUTPUT_FOLDER + 'history/' + self.date + '/'
        mr.mkdir(history_f)
        mr.mkdir(history_f + 'overlimit')
        mr.mkdir(history_f + 'org_emails_limit')

        for copy_from, copy_to in self.copy_today.iteritems():
            copy_to = history_f + copy_to

            if yt.exists(copy_from):
                logging.info('Copying %s to %s' % (copy_from, copy_to))
                mr.copy(copy_from, copy_to)

            else:
                logging.warn('%s doesn\'t exist' % copy_from)


    def output(self):
        history_f = config.YT_OUTPUT_FOLDER + 'history/' + self.date + '/'
        return [yt_luigi.YtTarget(history_f + target_table, allow_empty=True)
                for _, target_table in self.copy_today.iteritems()]


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    dt = sys.argv[1]

    logging.basicConfig(level='INFO')
    logging.info('Making history snapshot for %s' % dt)

    luigi.build([GraphHistorySnapshot(dt, None)], workers=5, scheduler_port=8083)
