from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.batch.lib import validator


logger = get_basic_logger(__name__)


def main():
    input_file = './sample.csv'
    total_lines = 0
    with open(input_file, 'r') as fh:
        for line in fh:
            total_lines += 1

    with open(input_file, 'r') as fh:
        broken_lines, problems = validator.validate(fh)
        logger.info('Broken lines are: {}'.format(broken_lines))
        logger.info('Problems are: {}'.format(problems))

        if 1 in broken_lines:
            # File is invalid
            logger.info('Broken header, nothing to do here')

        if broken_lines:
            # TODO: if fraction > 10% - file is invalid
            logger.info('Broken {}/{}'.format(len(broken_lines), total_lines))


if __name__ == '__main__':
    main()
