import sys


TABLE_FIELD_NAME = "table"
SOURCE_DIR_ATTRIBUTE = "source_dir"


class ProcessedTablesTracker(object):
    def __init__(self, tracked_source):
        self.tracked_source = tracked_source

    def get_unprocessed_tables(self, yt_client, max_tables_count=sys.maxsize):
        all_tables = self._list_source_dir(yt_client)
        processed_tables = self._get_processed_tables(yt_client)
        return sorted(all_tables.difference(processed_tables), reverse=True)[:max_tables_count]

    def add_processed_tables(self, yt_client, new_tables):
        all_tables = self._list_source_dir(yt_client)
        processed_tables = self._get_processed_tables(yt_client)
        processed_tables.update(new_tables)

        if not yt_client.exists(self.tracked_source.TrackTable):
            yt_client.create("table", self.tracked_source.TrackTable, recursive=True)

        yt_client.write_table(self.tracked_source.TrackTable, [{TABLE_FIELD_NAME: table} for table in processed_tables.intersection(all_tables)])
        yt_client.set_attribute(self.tracked_source.TrackTable, SOURCE_DIR_ATTRIBUTE, self.tracked_source.SourceDir)

    def for_each_unprocessed(self, yt_client, f):
        while True:
            with yt_client.Transaction() as tx:
                unprocessed_tables = self.get_unprocessed_tables(yt_client)

                if not unprocessed_tables:
                    return

                unprocessed_table = unprocessed_tables[0]

                f(tx, unprocessed_table)

                self.add_processed_tables(yt_client, [unprocessed_table])

    def _get_processed_tables(self, yt_client):
        return set([row[TABLE_FIELD_NAME] for row in yt_client.read_table(self.tracked_source.TrackTable)]) if yt_client.exists(self.tracked_source.TrackTable) else set()

    def _list_source_dir(self, yt_client):
        return set(yt_client.list(self.tracked_source.SourceDir, absolute=True))
