from nile.api.v1.clusters.yt import Hahn
from yt.wrapper.ypath import ypath_join

from textwrap import dedent
import argparse
import datetime


def _parse_args():
    parser = argparse.ArgumentParser(
        description=dedent('''
            Prints tariffs used in logs but absent in `task_tariff_map` dictionary for
            the provided date.

            Note. The token to connect to the Hahn YT cluster must be provided
            either by means of YT_TOKEN environment variable or `~/.yt/token`
            file.
        '''),
        formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument(
        '--date',
        help='The date (YYYY-MM-DD) absent tariffs must be looked for.',
        type=datetime.date.fromisoformat,
        required=True,
    )
    parser.add_argument(
        '--task-tariff-map-dir',
        help='YT directory with `task_tariff_map` dictionaries.',
        required=True,
    )
    parser.add_argument(
        '--log-dirs',
        help='One or more YT directories with logs.',
        required=True,
        nargs='+',
    )

    args = parser.parse_args()
    date = str(args.date)
    task_tariff_map = ypath_join(args.task_tariff_map_dir, date)
    logs = [ypath_join(log_dir, date) for log_dir in args.log_dirs]

    return (task_tariff_map, logs)


def _get_task_ids(cluster, table_name):
    records = cluster.read(table_name, bytes_decode_mode='strict')
    return set(record.get('task_id') for record in records)


def _get_absent_task_ids(cluster, task_tariff_map, logs):
    exist_task_ids = _get_task_ids(cluster, task_tariff_map)

    used_task_ids = set()
    for log in logs:
        used_task_ids |= _get_task_ids(cluster, log)

    return used_task_ids - exist_task_ids


def main():
    task_tariff_map, logs = _parse_args()

    absent_task_ids = _get_absent_task_ids(Hahn(), task_tariff_map, logs)
    for task_id in sorted(absent_task_ids):
        print(task_id)
