# -*- coding: utf-8 -*-
import os
import glob
import csv
from textwrap import dedent
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.input_pipeline.normalizer.helpers import get_xprod_normalizer, write_csv, get_args
from datacloud.input_pipeline.normalizer.constants import DEFAULT_OUTPUT_FILE

logger = get_basic_logger(__name__)


def normalize_csv(
        in_file,
        out_file,
        delimiter='\t',
        use_list=False,
        limit=None,
        summary_once=None,
        date_format=None,
        hard_kill=False,
        max_dups_to_print=1,
        max_empty_ids_to_print=1):

    normalizer = get_xprod_normalizer(
        date_format=date_format,
        hard_kill=hard_kill,
        max_dups_to_print=max_dups_to_print,
        max_empty_ids_to_print=max_empty_ids_to_print
    )

    with open(in_file, 'rb') as csv_file:
        data = csv.reader(csv_file, delimiter=delimiter)

        if not use_list:
            rows = normalizer.inormalize(data)
        else:
            rows = normalizer.normalize(data, limit=limit, summary_once=summary_once)

        write_csv(rows, out_file, limit=limit, summary_once=summary_once)

        if normalizer.duplicates_counter:
            print(dedent("""
                {0} duplicate{1} found and erased"""
            ).format(  # noqa
                normalizer.duplicates_counter,
                's' if normalizer.duplicates_counter > 1 else ''
            ))

        if normalizer.empty_ids_counter:
            print(dedent("""
                {0} record{1} with empty ids found and erased"""
            ).format(  # noqa
                normalizer.empty_ids_counter,
                's' if normalizer.empty_ids_counter > 1 else ''
            ))

        if normalizer.dup_contracts_counter:
            print(dedent("""
                {0} record{1} with duplicate external_id,retro_date found and erased"""
            ).format(  # noqa
                normalizer.dup_contracts_counter,
                's' if normalizer.dup_contracts_counter > 1 else ''
            ))


if __name__ == '__main__':
    delimiter, in_file, use_list, limit, out_file, summary_once, date_format, \
        hard_kill, max_dups_to_print, max_empty_to_print = get_args()

    if not in_file:
        csv_files = glob.glob(os.path.join(os.getcwd(), '*.csv'))
        tsv_files = glob.glob(os.path.join(os.getcwd(), '*.tsv'))

        found_files_len = len(csv_files) + len(tsv_files)

        if found_files_len > 1:
            raise RuntimeError('Can not determine input file. Candidates are {}'.format(
                csv_files + tsv_files))

        elif found_files_len == 1:
            in_file = (csv_files + tsv_files)[0]
            logger.info('Found input file {}'.format(in_file))

        else:
            raise RuntimeError('No input files found! Lookin for .tsv / .csv')

    out_file = out_file or os.path.join(
        os.path.dirname(in_file),
        DEFAULT_OUTPUT_FILE
    )

    normalize_csv(
        in_file=in_file,
        out_file=out_file,
        delimiter=delimiter,
        use_list=use_list,
        limit=limit,
        summary_once=summary_once,
        date_format=date_format,
        hard_kill=hard_kill,
        max_dups_to_print=max_dups_to_print,
        max_empty_ids_to_print=max_empty_to_print
    )
