import time
import logging
from multiprocessing.dummy import Pool

from django.db import transaction
from django.core.management.base import BaseCommand

from intranet.femida.src.attachments.models import Attachment
from intranet.femida.src.utils.itertools import get_chunks


logger = logging.getLogger(__name__)


@transaction.atomic
def saver(_id):
    try:
        attachment = Attachment.objects.select_for_update().get(id=_id)
        attachment.extract_text()
        attachment.save()
    except Exception:
        logger.exception('Error during handle attachment `%d`', _id)
        return 0
    else:
        return 1


class Command(BaseCommand):

    help = 'Extract text from attachments'

    def add_arguments(self, parser):
        parser.add_argument('--min-pk', action='store')
        parser.add_argument('--max-pk', action='store')
        parser.add_argument('--chunk-size', action='store', default=5000)
        parser.add_argument('--pool-size', action='store', default=12)
        parser.add_argument('--ignore-errors', action='store_true')

    def handle(self, *args, **options):
        pool_size = int(options['pool_size'])
        chunk_size = int(options['chunk_size'])
        min_pk = options['min_pk']
        max_pk = options['max_pk']
        ignore_errors = options['ignore_errors']

        qs = Attachment.objects.filter(text__isnull=True).order_by('id')
        if min_pk is not None:
            qs = qs.filter(pk__gte=min_pk)
        if max_pk is not None:
            qs = qs.filter(pk__lte=max_pk)

        attachment_ids = list(qs.values_list('id', flat=True))
        print('{} attachments to handle'.format(len(attachment_ids)))

        pool = Pool(pool_size)
        chunks = get_chunks(attachment_ids, chunk_size)

        start = time.time()
        for i, chunk in enumerate(chunks, start=1):
            results = pool.map_async(saver, chunk).get()
            success_count = sum(results)
            current_chunk_size = len(chunk)
            print('{} attachments was handled in {} seconds'.format(
                i * chunk_size + current_chunk_size, time.time() - start
            ))
            if success_count < current_chunk_size and not ignore_errors:
                break
