import collections

from crypta.lib.python import templater

QUERY_TEMPLATE = u"""
INSERT INTO `{{ output }}`
{% for rule_id, hosts, regexps in rules %}
SELECT
    DISTINCT
    yandexuid,
    {{ rule_id }} AS rule_id
FROM `{{ input }}`
PREWHERE {{ host_field }} IN (
{% for host in hosts %}
    '{{ host }}'{% if not loop.last %},{% endif %}

{% endfor %}
)
{% if is_yandex_referer %}
AND is_yandex_referer
{% endif %}
WHERE multiMatchAny({{ url_field }}, [
{% for regexp in regexps %}
    '{{ regexp }}'{% if not loop.last %},{% endif %}

{% endfor %}
])
{% if not loop.last %}
UNION ALL
{% endif %}
{% endfor %};
"""

BATCH_REGEXPS_LIMIT = 2000
BATCH_RULE_LIMIT = 50


class UrlRuleRevision(object):
    def __init__(self):
        self.hosts = set()
        self.regexps = set()

    def add(self, host, regexp):
        self.hosts.add(host)
        self.regexps.add(regexp)

    def __eq__(self, other):
        if isinstance(other, UrlRuleRevision):
            return self.hosts == other.hosts and self.regexps == other.regexps
        return False

    def __repr__(self):
        return "UrlRuleRevision(hosts={}, regexps={})".format(repr(self.hosts), repr(self.regexps))


class UrlFilter(object):
    def __init__(self, logger=None):
        self.logger = logger
        self.rule_revisions = collections.defaultdict(UrlRuleRevision)

    def add_url(self, rule_revision_id, host, regexp):
        self.rule_revisions[rule_revision_id].add(host, regexp)

    def get_yql_queries(self, input_table, output_table, source, host_field='host', url_field='url', rule_revision_ids=None):
        rule_revisions = {
            key: value
            for key, value in self.rule_revisions.iteritems()
            if rule_revision_ids is None or key in rule_revision_ids
        }

        return [
            templater.render_template(
                QUERY_TEMPLATE,
                {
                    "input": input_table,
                    "output": output_table,
                    "rules": batch,
                    "url_field": url_field,
                    "host_field": host_field,
                    "is_yandex_referer": source == 'yandex_referrer'
                }
            )
            for batch in self.get_batches(rule_revisions)
        ]

    @staticmethod
    def get_batches(rule_revisions):
        batches = []
        current_batch = []
        regexps_in_batch = 0
        for rule_revision_id, rule_revision in rule_revisions.iteritems():
            regexps_in_batch += len(rule_revision.regexps)

            if regexps_in_batch > BATCH_REGEXPS_LIMIT or len(current_batch) == BATCH_RULE_LIMIT:
                batches.append(current_batch)
                current_batch = []
                regexps_in_batch = len(rule_revision.regexps)

            current_batch.append((rule_revision_id, rule_revision.hosts, rule_revision.regexps))

        if current_batch:
            batches.append(current_batch)
        return batches
