from datetime import datetime
import logging

from wiki.utils.dict_keypath import value_by_path
from wiki.utils.tasks.base import LockedCallableTask
from wiki.utils.models import get_chunked
from wiki.utils.timezone import make_aware_utc, now
from wiki.pages.models import CloudPage
from wiki.integrations.ms.utils import get_doc_retriever_client
from wiki.integrations.ms.schema import DriveItem, LastModifiedRequest

logger = logging.getLogger(__name__)


class SyncLastModified(LockedCallableTask):
    """
    Синхронизирует дату последнего изменения облачных страниц.
    Необходимо чтобы затем поместить их в каталог для индексации
    внутренним поиском.
    """

    name = 'wiki.sync_lastmodified'
    time_limit = 60 * 60

    def run(self, *args, **kwargs):
        client = get_doc_retriever_client()
        page_qs = CloudPage.objects.filter(page__status__gt=0).order_by('lastmodified_sync_at')
        for page_chunk in get_chunked(page_qs):

            pages, requests = _make_batch_request(page_chunk)

            try:
                response = client.get_lastmodified(requests)['responses']
            except Exception as e:
                logger.error(f'Failed getting batch last modified: {repr(e)}')
                continue

            for cloud_page in pages:
                page_response = response.get(str(cloud_page.page_id))
                if not page_response:
                    logger.warning(f'Failed getting last modified for page {cloud_page.page_id}: no page response')
                    continue

                error = page_response.get('error')
                if error:
                    logger.warning(f'Failed getting last modified for page {cloud_page.page_id}: {error}')
                    continue

                last_modified = page_response.get('lastModifiedDateTime')
                if not last_modified:
                    logger.warning(
                        f'Failed getting last modified for page {cloud_page.page_id}: no data in page response'
                    )
                    continue

                last_modified_dt = datetime.strptime(last_modified, '%Y-%m-%dT%H:%M:%SZ')
                # modified_at_for_index обновится автоматичесчи в методе save
                cloud_page.page.modified_at = make_aware_utc(last_modified_dt)
                cloud_page.page.save()

                cloud_page.lastmodified_sync_at = now()
                cloud_page.save()


def _make_batch_request(page_chunk):
    pages = []
    requests = []
    for cloud_page in page_chunk:
        if not cloud_page.cloud_src:
            continue

        drive_id = value_by_path('driveitem.drive_id', cloud_page.cloud_src, None)
        item_id = value_by_path('driveitem.item_id', cloud_page.cloud_src, None)
        if not (drive_id and item_id):
            continue

        requests.append(
            LastModifiedRequest(id=cloud_page.page_id, driveitem=DriveItem(drive_id=drive_id, item_id=item_id))
        )
        pages.append(cloud_page)
    return pages, requests
