import re
from collections import defaultdict
from dataclasses import dataclass
from itertools import tee
from operator import itemgetter
from typing import Iterable, NewType, Optional

import ujson

AgencyID = NewType('AgencyID', int)


def main():
    entries = iter_suggest_entries()
    queries = 'queries'
    groups = 'groups'
    streams = 'streams'
    data_answers = 'data'
    data_meta = 'data_params'
    writers = [
        (queries, queries_writer),
        (groups, groups_writer),
        (streams, streams_writer),
        (data_answers, answers_writer),
        (data_meta, meta_writer),
    ]
    for (path, writer), entries in zip(writers, tee(entries, len(writers))):
        write_lines(path, writer(entries))


def write_lines(path, lines):
    with open(path, 'w') as f:
        for line in lines:
            f.write(line)
            f.write('\n')


def groups_writer(entries: Iterable['SuggestEntry']):
    return ('\t'.join(entry.queries) for entry in entries)


def streams_writer(entries: Iterable['SuggestEntry']):
    streams = defaultdict(int)
    for entry in entries:
        for region, weight in entry.weights.items():
            streams[region] += weight
    return (
        f'ALL\t{sum(streams.values())}',
        *(f'{region}\t{weight}' for region, weight in streams.items())
    )


def meta_writer(entries: Iterable['SuggestEntry']):
    return (
        f'{query}\t'
        '-1\t'
        f'{entry.name}\t'
        f'{entry.id}:{entry.url}:agency'
        f'{(":" + entry.logo) if entry.logo is not None else ""}'
        for entry in entries
        for query in entry.queries
    )


def answers_writer(entries: Iterable['SuggestEntry']):
    return (
        f'{query}\t-1\t{entry.name}\t{entry.id}'
        for entry in entries
        for query in entry.queries
    )


def queries_writer(entries: Iterable['SuggestEntry']):
    return sorted(
        f'{query}\t\t'
        f'{sum(entry.weights.values()) + 0.0001}\t'
        f'{",".join(f"{k}:{v}" for k, v in entry.weights.items())}'
        for entry in entries
        for query in entry.queries
    )


def iter_suggest_entries():
    weights = make_weights()
    agencies = ujson.load(open('agencies.json'))['agencies']
    tokens_set = set()
    return (
        SuggestEntry(
            queries={
                filtered_token
                for token in (
                    *(agency.get('aliases') or '').split('|'),
                    url_query_patcher.sub('', agency.get('site_url') or ''),
                    agency.get('name') or '',
                    agency.get('shortname') or '',
                )
                if (filtered_token := token.strip('! \t\r/"').lower()) and (
                    not filtered_token in tokens_set
                    and tokens_set.add(filtered_token) is None
                )
            },
            url=url_query_patcher.sub('', agency.get('site_url') or '').strip('/'),
            logo=agency.get('logo_square'),
            id=int(agency['aid']),
            name=agency['name'],
            weights=weights[AgencyID(int(agency['aid']))],
        )
        for agency in agencies.values()
        if agency['has_enabled_feeds'] > 0
    )


def make_weights():
    ratings = ujson.load(open('ratings.json'))
    weights: dict[AgencyID, dict[int, float]] = defaultdict(dict)
    for rating in ratings:
        if rating.get('rubric') == 0:
            issue = rating['issue']
            for agency in rating['agencies']:
                weights[agency['agency']][issue] = agency['quality']

    return weights


@dataclass
class SuggestEntry:
    queries: set[str]
    url: str
    logo: Optional[str]
    id: int
    name: str
    weights: dict[int, float]


alias_patcher = itemgetter(slice(1, None))
url_query_patcher = re.compile(r'(?:https?://)?(?:www\.)?')
url_meta_patcher = re.compile(r'(?:https?://)?')

if __name__ == '__main__':
    main()
