import time
import logging

import requests

from libraries.topology.mongo import groups, tags, searcher_lookup
from libraries.topology.utils import version_to_tag, is_trunk, trunk_to_commit
from libraries.utils import memoize


MIN_COMMIT_TO_KEEP = 4306931


@memoize
def tag_to_commit():
    return {
        rec['tag']: str(rec['commit'])
        for rec in tags().find()
    }


@memoize
def commits_with_tags():
    return set(tag_to_commit().values())


def is_group_removed(group):
    res = requests.get(
        'http://api.gencfg.yandex-team.ru/trunk/groups/{}'.format(group),
        timeout=10,
    ).status_code == 404
    if res:
        print '  ! {} is removed in trunk'.format(group)
    else:
        print '  ! {} is not removed in trunk'.format(group)
    return res


def get_min_online_commit(group):
    url = 'http://clusterstate.yandex-team.ru/group/{}/alive'.format(group)
    versions = requests.get(url, timeout=10).json()['versions'].keys()
    commits = {
        int(trunk_to_commit(ver)) if is_trunk(ver) else int(tag_to_commit()[version_to_tag(ver)])
        for ver in versions
    }
    assert commits or is_group_removed(group)
    commits.add(MIN_COMMIT_TO_KEEP)
    return min(commits)


def main():
    all_groups = searcher_lookup().distinct('group')
    print 'total {} groups'.format(len(all_groups))

    for group in sorted(all_groups):
        print '  - start', group
        try:
            bulk = groups().initialize_unordered_bulk_op()
            min_commit = str(get_min_online_commit(group))
            for rec in groups().find(
                {'group': group, 'commit': {'$lt': min_commit}},
                {'group': 1, 'commit': 1}
            ):
                commit = str(rec['commit'])
                if commit not in commits_with_tags():
                    print '    remove', group, commit
                    bulk.find({'commit': commit, 'group': group}).remove()
                else:
                    print '    skip', commit
            print '  - execute'
            bulk.execute()
            print '  - sleep'
            time.sleep(5)
        except:
            logging.exception('group %s failed', group)
            time.sleep(0.5)


if __name__ == '__main__':
    main()
