#!/usr/bin/env python

"""
The script searches the database for "orphan" resource - whose records,
which has reference to not existence task record and drops them.
"""

import random
import distutils.util

from common import console
import common.types.database

from yasandbox import manager
from yasandbox.database import mapping


def chunker(data, size):
    while data:
        chunk, data = data[:size], data[size:]
        yield chunk


def main():
    manager.use_locally()
    mapping.ensure_connection()

    with mapping.switch_db(mapping.Task, common.types.database.ReadPreference.SECONDARY) as Task:
        with console.LongOperation("Fetching tasks count"):
            tasks = Task.objects.count()

        pbar = console.ProgressBar('Fetching task IDs', tasks)
        tasks = set()
        for tid in Task.objects.scalar('id'):
            tasks.add(tid)
            pbar.add(1)
        pbar.finish()

    orphans = set()
    missing = set()
    with mapping.switch_db(mapping.Resource, common.types.database.ReadPreference.SECONDARY) as Resource:
        pbar = console.ProgressBar('Checking resource records', Resource.objects.count())
        for rid, tid in Resource.objects.scalar('id', 'task_id'):
            if tid not in tasks:
                orphans.add(rid)
                missing.add(tid)
            pbar.add(1)
        pbar.finish()

    print("Found {} missing tasks. Minimum ID is {}, maximum: {}".format(len(missing), min(missing), max(missing)))
    print("Found {} orphan resources. Minimum ID is {}, maximum: {}".format(len(orphans), min(orphans), max(orphans)))
    while True:
        ans = raw_input("Is this OK (yes/NO/r[andom])? ").strip() or "no"
        if ans.lower() in ('r', 'random'):
            rids = set()
            tids = set()
            while len(rids) < 10 and len(tids) < 10:
                rids.add(random.choice(orphans))
                tids.add(random.choice(missing))
            print("Random resources: {!r}".format(sorted(rids)))
            print("Random tasks: {!r}".format(sorted(tids)))
        elif not distutils.util.strtobool(ans):
            print("Operation cancelled by user.")
            return
        else:
            break

    pbar = console.ProgressBar('Dropping database records', len(orphans))
    for chunk in chunker(orphans, 1000):
        mapping.Audit.objects(task_id__in=missing).delete()
        mapping.Resource.objects(id__in=orphans).delete()
        pbar.add(len(chunk))
    pbar.finish()


if __name__ == '__main__':
    main()
