# coding=utf-8
from functools import partial

import luigi
import yt.wrapper as yt

from lib.luigi import yt_luigi
from partners_utils import map_partners_audience_dumps
from rtcconf import config
from utils import mr_utils as mr
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDumpTable

if __name__ == '__main__':
    # Active only if running directly
    SURROGATE_OUTPUT_PATH = '//home/crypta/team/artembelov/partners_test'
    # TODO: fix cookies
    RUN_TRACKING_COOKIES_IMPORT = False
else:
    SURROGATE_OUTPUT_PATH = None
    RUN_TRACKING_COOKIES_IMPORT = False

if SURROGATE_OUTPUT_PATH:
    config.GRAPH_PARTNERS_DATA = '//home/x-products/production/partners_data/audience'


def get_base_output_path(path=''):
    if SURROGATE_OUTPUT_PATH:
        # This output path works only on direct run
        base_path = SURROGATE_OUTPUT_PATH
    else:
        base_path = config.GRAPH_YT_DICTS_FOLDER
    return '/'.join([base_path.rstrip('/'), path.lstrip('/')])


def get_date_output_path(date, path=None):
    if SURROGATE_OUTPUT_PATH:
        base_path = get_base_output_path()
    else:
        base_path = config.YT_OUTPUT_FOLDER
    if path:
        return '/'.join([base_path.rstrip('/'), date, path.lstrip('/')])
    else:
        return '/'.join([base_path.rstrip('/'), date])


def all_partners_sources():
    # Audience data
    partners_paths_list = filter(
        lambda x: not (
            x.endswith(".updates")
            or
            yt.row_count(config.GRAPH_PARTNERS_DATA.rstrip('/') + "/" + x) == 0
        ),
        yt.list(config.GRAPH_PARTNERS_DATA.rstrip('/'))
    )

    input_config = dict()
    for partner_input_table in partners_paths_list:
        partner_input_table = partner_input_table
        maybe_source_type = partner_input_table.lower()

        if maybe_source_type in soup_config.AUDIENCE_TABLE_TO_SOURCE_TYPE:
            source_type = soup_config.AUDIENCE_TABLE_TO_SOURCE_TYPE[maybe_source_type]
            if source_type:
                input_config[config.GRAPH_PARTNERS_DATA.rstrip('/') + '/' + partner_input_table] = source_type
            else:
                print "NOTICE: Skipping \"{}\" partners source type. It's marked as None in " \
                      "AUDIENCE_TABLE_TO_SOURCE_TYPE that mean it is not supposed to ne included in " \
                      "processing by design because it's quality metrics related or internal data." \
                      "".format(partner_input_table)
        else:
            print "WARNING: Invalid source type: \"{}\". This kind of records will be ignored." \
                  "".format(partner_input_table)
    return input_config


def perform_import(input_config, invalid_table, soup_tables):
    source_tables_paths = list()
    partners_sources_table_indexes = dict()
    for idx, (source_table, source_type) in enumerate(sorted(input_config.iteritems())):
        source_tables_paths.append(source_table)
        partners_sources_table_indexes[idx] = source_type

    soup_tables_index = {
        t.edge_type: idx
        for idx, t in enumerate(soup_tables)
    }

    with yt.Transaction() as tr:
        soup_out_paths = [t.create(tr) for t in soup_tables]

        yt.run_map(
            partial(map_partners_audience_dumps,
                    in_partner_source_by_index=partners_sources_table_indexes,
                    out_soup_edge_types_index=soup_tables_index),
            source_tables_paths,
            soup_out_paths + [invalid_table],
            spec=mr.DATA_SIZE_PER_JOB_20MB_SPEC
        )

        mr.merge(invalid_table)

        SoupDumpTable.finalize_all(soup_tables, tr)


class ImportPartnersDump(luigi.Task):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(ImportPartnersDump, self).__init__(*args, **kwargs)
        self.soup_tables = [SoupDumpTable(et, self.date)
                            for et in soup_config.partners_edges_types + soup_config.partners_hash_edges_types]

    @property
    def resources(self):
        return {config.GRAPH_YT_DICTS_FOLDER.rstrip('/'): True}

    def requires(self):
        return [yt_luigi.ExternalInput(path) for path in all_partners_sources().keys()]

    def output(self):
        return [t.as_target() for t in self.soup_tables]

    def run(self):
        # Prepare partners tables
        # Partners total is creating once per source and further providing lock on partners data
        # aggregation to avoid extra calculation. If you are changing partners sources, delete partners
        # table as well to perform proper input update.

        mr.mkdir(get_date_output_path(''))
        mr.mkdir(get_base_output_path(''))

        perform_import(
            input_config=all_partners_sources(),
            invalid_table=get_date_output_path(self.date, 'partners_imported.invalid'),
            soup_tables=self.soup_tables
        )


if __name__ == '__main__':
    yt.config.set_proxy('hahn')

    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)

    # mr.mkdir(SURROGATE_OUTPUT_PATH + '/2017-12-11')

    luigi.build([ImportPartnersDump('2017-12-11')], workers=1, scheduler_port=8083)

    soup_tables = [SoupDumpTable(et, '')
                   for et in soup_config.partners_edges_types + soup_config.partners_hash_edges_types]

    soup_tables_index = {
        t.edge_type: idx
        for idx, t in enumerate(soup_tables)
    }

    m = partial(map_partners_audience_dumps,
                in_partner_source_by_index={0: 'partner_alfabank'},
                out_soup_edge_types_index=soup_tables_index)

    rec = {"external_id": "9524680133",
           "impulses": [{"impulse": "CASH_LOAN"}, {"impulse": "IPOTEKA"}, {"impulse": "LOAN_FOR_CAR"},
                        {"impulse": "TRAVEL_ABROAD"}, {"impulse": "ENTERTAINMENT"}, {"impulse": "STOCK_MARKET"},
                        {"impulse": "CREDIT_CARD"}, {"impulse": "BANK_DEPOSIT"}, {"impulse": "BUY_ELECTRONICS"},
                        {"impulse": "BUY_DIY_FURNITURE"}, {"impulse": "BUY_CLOTHES"}, {"impulse": "BUY_SPORT_GOODS"},
                        {"impulse": "TRAVEL_RU"}], "phones": [{"id_value": "+79265366687"}],
           "emails": None, "cookies": None, "birth_date": None, "gender": None, "ban_info": None,
           "add_date": "2017-08-22T11:00:00.00Z", "yandex_can_use": None,
           "other_fields": [{"field_value": True, "field_name": "filename:yandex_sample_21092017.csv"}],
           "impulse_fields": None, '@table_index': 0}

    print list(m(rec))
