import argparse
import logging
import sys


def setup_logging():
    root = logging.getLogger()
    root.setLevel(logging.DEBUG)

    handler = logging.StreamHandler(sys.stderr)
    handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    root.addHandler(handler)


def make_parser():
    parser = argparse.ArgumentParser(description='Join uids by phone between datasets')
    parser.add_argument('passport_dumps', nargs='+', metavar='PASSPORT_DUMPS', type=str,
                        help='Path to passport dataset')
    parser.add_argument('-F' ,'--delimiter',help='Field delimiter in incoming file', type=str)
    parser.add_argument('-k', '--key', help='Phone key id in incoming file', type=int)
    return parser


def read_passport_dataset(filenames):
    log = logging.getLogger()
    dataset = dict()
    values = 0
    log.info('Reading passport dataset ...')
    for filename in filenames:
        log.info('Reading file {} ...'.format(filename))
        with open(filename, 'r') as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                uid, _, phone = line.partition('\t')
                values += 1
                phone = phone.lower()
                if phone in dataset:
                    dataset[phone].add(uid)
                else:
                    dataset[phone] = {uid}
        log.info('Finished reading file {} ...'.format(filename))
    log.info('Finished reading passport datasets, {} keys, {} values read'.format(
        len(dataset),
        values,
    ))
    return dataset


def join_data(incoming_stream, passport_dataset, delimiter, key_position):
    keys_matches = 0
    uids_joined = 0
    for line in incoming_stream:
        line = line.strip()
        if not line:
            continue
        bits = line.split(delimiter)
        key = bits[key_position].lower()
        uids = sorted(passport_dataset.get(key, []))
        if uids:
            uids_joined += len(uids)
            keys_matches += 1
        bits.append(','.join(uids))
        yield delimiter.join(bits)
    log = logging.getLogger()
    log.info('Matched {} uids for {} keys'.format(uids_joined, keys_matches))


def run_app():
    setup_logging()
    args = make_parser().parse_args()
    passport_dataset = read_passport_dataset(args.passport_dumps)
    for output_line in join_data(sys.stdin, passport_dataset, args.delimiter, args.key):
        print(output_line)
