import logging
import random
import itertools


class FetchSplitter(object):

    def __init__(self, sizes, target_size=20 * (2**30), group_base_size=1 * (2**30), unknown_table_size=2*(2**30)):
        self.tag_tables = {}
        self.sizes = sizes
        self.target_size = target_size
        self.group_base_size = group_base_size
        self.unknown_table_size = unknown_table_size

    def add_tag(self, tag, tables):
        self.tag_tables[tag] = set(tables)

    def iter_split(self):
        logging.info("Splitting tags %s into groups for fetch", ', '.join(self.tag_tables))
        groups = self._split(self.tag_tables.keys())
        for group in groups:
            sz = self._group_size(group) / 2**20
            logging.info("Group %s: %s MiB", group, sz)
            yield group, sz

    def _group_size(self, tags):
        tables = set()
        for tag in tags:
            tables |= self.tag_tables[tag]

        return self.group_base_size + sum(self.sizes.get(t, self.unknown_table_size) for t in tables)

    def _split(self, tags):
        if not tags:
            return []
        if len(tags) == 1:
            return [list(tags)]

        best_excess_size = None

        max_attempts_per_bin_count = 2**16/len(tags)
        for n_bins in xrange(len(tags)):
            if n_bins**len(tags) <= max_attempts_per_bin_count:
                bin_idx_iter = itertools.product(range(n_bins), repeat=len(tags))
            else:
                bin_idx_iter = (tuple(random.randint(0, n_bins - 1) for _ in tags) for _ in xrange(max_attempts_per_bin_count))

            for bin_indices in bin_idx_iter:
                bins = [list() for _ in xrange(n_bins)]
                for tag, idx in zip(tags, bin_indices):
                    bins[idx].append(tag)
                excess_size = max(self._group_size(b) - self.target_size for b in bins)
                if excess_size <= 0:
                    logging.info("Found suitable grouping of fetch tags: %s", bins)
                    return [_ for _ in bins if _]
                if best_excess_size is None or best_excess_size > excess_size:
                    best_excess_size = excess_size
                    best_split = bins
        best_split = [_ for _ in best_split if _]
        logging.info("Failed to group fetch tags in a satisfactory way, using the best grouping found: %s, best excess size %s MiB", best_split, best_excess_size/2**20)
        return best_split
