import luigi
import yt.wrapper as yt

from lib import phone_parser
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDumpTable


def change_dump_format_to_soup(rec):
    yield SoupDumpTable.make_rec(
        rec['puid'],
        rec['id_value'],
        soup_config.puid_phhash_passdump,
        []
    )

class ImportPassportDump(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(ImportPassportDump, self).__init__(*args, **kwargs)
        self.puid_phhash_passdump = SoupDumpTable(soup_config.puid_phhash_passdump, self.date)

    def input_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def output_folders(self):
        return {
            'yuid_raw': config.GRAPH_YT_DICTS_FOLDER + 'yuid_raw/'
        }

    def requires(self):
        return [
            yt_luigi.ExternalInput(self.in_f('dict') + 'external_dumps/passport_phone_dump_unknown'),
            yt_luigi.ExternalInput(self.in_f('dict') + 'external_dumps/passport_phone_phne_dump_01_11_2017')
        ]

    def run(self):
        mr.mkdir(self.out_f('yuid_raw'))
        in_dumps = [
            self.in_f('dict') + 'external_dumps/passport_phone_dump_unknown',
            self.in_f('dict') + 'external_dumps/passport_phone_phne_dump_01_11_2017'
        ]
        out_t = self.out_f('yuid_raw') + 'puid_with_' + config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_PASSPORT_DUMP

        with yt.Transaction() as tr:

            mr.distinct_by(["id_count", "id_prefix", "id_type", "id_value", "key", "puid", "source_type"],
                           in_dumps, out_t)
            mr.set_generate_date(out_t, self.date)

            yt.run_map(change_dump_format_to_soup, out_t, self.puid_phhash_passdump.create(tr))
            self.puid_phhash_passdump.finalize(tr)



    def output(self):
        t = 'puid_with_' + config.ID_TYPE_PHONE + '_' + config.ID_SOURCE_TYPE_PASSPORT_DUMP
        return [yt_luigi.YtDateTarget(self.out_f('yuid_raw') + t, self.date)] + \
                [self.puid_phhash_passdump.as_target()]



def map_login_puid(rec):
    login = rec.get('login', '')
    puid = rec.get('uid', '')
    if login and puid:
        yield {'id_value': login, 'puid': puid}
        yield {'id_value': puid, 'login': login, '@table_index': 1}
        yield SoupDumpTable.make_rec(puid, login, soup_config.puid_login_passport, [], 2)


class ImportPassportStatboxHeavyDict(yt_luigi.BaseYtTask):

    def __init__(self, *args, **kwargs):
        super(ImportPassportStatboxHeavyDict, self).__init__(*args, **kwargs)
        self.soup_storage = SoupDumpTable(soup_config.puid_login_passport, self.date)

    def input_folders(self):
        return {
            'passport': config.STATBOX_SQL_PASSPORT
        }

    def output_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    date = luigi.Parameter()

    def requires(self):
        return [yt_luigi.ExternalInputLastDate(config.STATBOX_SQL_PASSPORT)]

    def run(self):
        passport_log = self.in_f('passport')

        out_dict_f = self.out_f('dict')
        mr.mkdir(out_dict_f)
        mr.mkdir(out_dict_f + 'passport')
        self.soup_storage.ensure_dir()

        # get all puid-login matching from passport
        with yt.Transaction() as tr:
            # import passport dict
            yt.run_map(map_login_puid, mr.get_last_table(passport_log),
                       [out_dict_f + 'passport/login_puid',
                        out_dict_f + 'passport/puid_login',
                        self.soup_storage.create(tr)],
                       ordered=True)

            self.soup_storage.finalize(tr)

            mr.sort_all([
                out_dict_f + 'passport/puid_login',
                out_dict_f + 'passport/login_puid'
            ], sort_by='id_value')

        for t in self.output():
            mr.set_generate_date(t.table, self.date)

    def output(self):
        out_folder = self.out_f('dict')
        return [yt_luigi.YtDateTarget(out_folder + 'passport/puid_login', self.date),
                yt_luigi.YtDateTarget(out_folder + 'passport/login_puid', self.date)
                ] + [self.soup_storage.as_target()]



def read_dump():

    errors_7000 = 0
    errors_3700 = 0

    for idx, l in enumerate(open('phne-phones.txt')):
        if idx % 10000 == 0:
            print 'line ' + str(idx)

        parts = l.strip().split("\t")
        if len(parts) == 2:
            puid, phone = parts
            parsed = phone_parser.parse_phone_number(phone)
            if not parsed:
                if '70000' in phone:
                    errors_7000 += 1
                elif phone.startswith('+37') or phone.startswith('+38'):
                    errors_3700 += 1
                else:
                    print "not parsed", phone

            normed = utils.prepare_phone_md5(phone)
            id_prefix, id_hash = normed

            yield {"id_count": 1, "id_prefix": id_prefix, "id_type": config.ID_TYPE_PHONE,
                   "id_value": id_hash, "key": puid, "puid": puid, "source_type": config.ID_SOURCE_TYPE_PASSPORT_DUMP}

        else:
            print 'missing phone', l


    print 'errors_7000', str(errors_7000)
    print 'errors_3700', str(errors_3700)


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    # luigi.run(['ImportPassportDumpDayTask', '--date', dt, '--run-date', dt, '--workers', '10', '--local-scheduler'])

    yt.write_table('//home/crypta/production/state/graph/dicts/external_dumps/passport_phone_phne_dump_01_11_2017',
                   read_dump())


