
from django.core.management.base import BaseCommand
from django.db.models import Count
from wiki.pages.models.revision import Revision

import logging

logger = logging.getLogger('wiki.management_commands')


class Command(BaseCommand):
    help = 'Remove duplicate revision records'

    def add_arguments(self, parser):
        super(Command, self).add_arguments(parser)
        parser.add_argument('--limit', type=int, help='number or records')
        parser.add_argument('--dry-run', type=int, help="don't remove dupcicates")

    def handle(self, *args, **options):
        limit = options.get('limit', 1000)
        dry_run = options.get('dry_run', 0)

        started_message = 'Remove duplicate revisions started'
        if dry_run:
            started_message += ' (dry_run mode)'

        logger.info(started_message)

        # Выбираем все ревизии в которых больше одной записи с одинаковыми страницей и storage_id
        revisions = (
            Revision.objects.values('mds_storage_id', 'page_id')
            .annotate(rev_num=Count(1))
            .filter(rev_num__gt=1)[:limit]
        )

        for revision in revisions:
            mds_storage_id = revision['mds_storage_id']
            page_id = revision['page_id']
            rev_num = revision['rev_num']
            logger.info('page {} has {} records with mds_storage_id {}'.format(page_id, rev_num, mds_storage_id))
            dup_revisions = list(Revision.objects.filter(mds_storage_id=mds_storage_id, page_id=page_id).order_by('id'))

            # Сохраняем ревизию с максимальным id, ссылка на нее будет в meta в pageevents
            keep_revision = dup_revisions.pop()
            logger.info('keep revision id {}'.format(keep_revision.id))

            removed = 0
            for dup_revision in dup_revisions:
                logger.info(
                    'remove revision id {} page_id {} mds_storage_id {}'.format(
                        dup_revision.id, dup_revision.page_id, dup_revision.mds_storage_id
                    )
                )
                removed += 1
                if not dry_run:
                    dup_revision.delete()

            logger.info(
                'removed {} revisions with mds_storage_id {} and page_id {}'.format(removed, mds_storage_id, page_id)
            )
