# -*- coding: utf-8 -*-
import csv
import argparse

from datacloud.dev_utils.time.patterns import FMT_DATE
from datacloud.input_pipeline.input_checker.constants import delimiter_vars
from datacloud.input_pipeline.normalizer.xprod_csv_normalizer import (
    XProdCsvNormalizer, normalize_empty_target,
    normalize_multi_phone, normalize_multi_email, normalize_date,
    hard_kill_bad_emails, try_to_int, hard_kill_bad_phones, normalize_md5,
    normalize_multi_hash, normalize_gender, normalize_multi_yuid
)


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-d',
        '--delimiter',
        default='tab',
        choices=delimiter_vars.keys(),
        help='delimiter of csv file',
    )
    parser.add_argument(
        '-f',
        '--file',
        help='csv file to parse',
    )
    parser.add_argument(
        '-ul',
        '--use-list',
        action='store_true',
        help='writeing to file after all rows are normalized',
    )
    parser.add_argument(
        '-l',
        '--limit',
        type=int,
        help='limit number of lines',
    )
    parser.add_argument(
        '-s',
        '--summary',
        type=int,
        help='write summary every ... rows',
    )
    parser.add_argument(
        '-o',
        '--output',
        help='csv file to write result',
    )
    parser.add_argument(
        '-df',
        '--date-format',
        default=FMT_DATE,
        help='specify fixed format of input retro dates',
    )
    parser.add_argument(
        '-hk',
        '--hard-kill',
        action='store_true',
        help='hard kill of all bad values',
    )
    parser.add_argument(
        '-mdp',
        '--max-dups-to-print',
        type=int,
        default=1,
        help='max info messages to print about duplicates found',
    )
    parser.add_argument(
        '-mep',
        '--max-empty-to-print',
        type=int,
        default=1,
        help='max info messages to print about rows with empty ids found',
    )

    args = parser.parse_args()
    return (
        delimiter_vars[args.delimiter],
        args.file,
        args.use_list,
        args.limit,
        args.output,
        args.summary,
        args.date_format,
        args.hard_kill,
        args.max_dups_to_print,
        args.max_empty_to_print
    )


def get_xprod_normalizer(date_format=FMT_DATE, hard_kill=False, max_dups_to_print=1, max_empty_ids_to_print=1):
    normalizer = XProdCsvNormalizer(
        normalize_if_exists=True,
        hard_kill=hard_kill,
        max_dups_to_print=1,
        max_empty_ids_to_print=1
    )

    normalizer.add_target_normalizer(normalize_empty_target())
    normalizer.add_target_normalizer(try_to_int())

    normalizer.add_value_normalizer('phone', try_to_int())
    normalizer.add_value_normalizer('phone', normalize_multi_phone())
    normalizer.add_value_normalizer('email', normalize_multi_email())
    normalizer.add_value_normalizer('gender', normalize_gender())
    normalizer.add_value_normalizer(
        'retro_date',
        normalize_date(output_format=FMT_DATE, input_date_format=date_format, hard_kill=hard_kill)
    )

    normalizer.add_value_normalizer('phone_id_value', normalize_multi_hash())
    normalizer.add_value_normalizer('email_id_value', normalize_multi_hash())

    # normalizer.add_value_normalizer('phone', normalize_phone_7())
    # normalizer.add_value_normalizer('email', normalize_email_dash())
    # normalizer.add_value_normalizer('email', normalize_email_russian_no())

    normalizer.add_value_normalizer('yuid', normalize_multi_yuid())

    if hard_kill:
        normalizer.add_value_normalizer('email', hard_kill_bad_emails())
        normalizer.add_value_normalizer('phone', hard_kill_bad_phones())

    normalizer.add_record_normalizer(normalize_md5())

    """
        TODO: make birth date normalizer
        You sould not use this normalizer for birth date!
        Birth date should be parsed using another var in normalizer
        Birth date should be translated to str in 3 different ways
    """
    normalizer.add_value_normalizer(
        'birth_date',
        normalize_date(output_format=FMT_DATE, input_date_format=date_format, hard_kill=hard_kill)
    )

    return normalizer


def write_csv(rows, file_path, summary_once=0, delimiter='\t', limit=0):
    with open(file_path, 'wb+') as csv_file:
        writer = csv.writer(csv_file, delimiter=delimiter)
        for i, row in enumerate(rows):
            if summary_once and (i + 1) % summary_once == 0:
                print('{0} rows written'.format(i + 1))
            if not limit or i < limit:
                writer.writerow(row)
            else:
                return
