"""
Скрипт для подсчета статистики использования b2b

WIKI-16605
"""

import json
import math
import statistics
import time

from collections import Counter, defaultdict, OrderedDict
from typing import List, Iterable, Optional, Literal

from ninja import Schema

from django.contrib.auth import get_user_model
from django.db.models import Count, Sum
from django.db.models.functions import Length

from wiki.files.models import File
from wiki.notifications.models import PageEvent, EventTypes
from wiki.pages.models import Revision, Page, Access, Comment, PageWatch
from wiki.subscriptions.models import Subscription, SubscriptionType
from wiki.sync.connect.models import Organization

User = get_user_model()

USER_ROBOT_LOGIN = ['yndx-wiki-cnt-robot', 'yndx-forms-cnt-robot', 'robot-wiki']
REGEX_PAGE_EXCLUDE = r'^(users/([^/]*?)$|users/([^/]*?)/notes$|clusterusers$|homepage$|users$|sandbox|sandbox/(.*)$)'

SAVE_PATH = '/tmp/stat_org.json'


def avg(values: Iterable[int]) -> int:
    mean = statistics.mean(values)
    return math.ceil(mean)


class OrgStat(Schema):
    id: int
    dir_id: int
    name: str
    lifetime_mth: int

    pages: int
    users: int

    pages_create: dict[str, int]
    users_edit: dict[str, int]
    users_view: dict[str, int]

    avg_pages_create: Optional[int]
    avg_users_edit: Optional[int]
    avg_users_view: Optional[int]

    access_control: bool
    storage_mb: float

    sso: bool
    comments: int
    subscription_all: int
    subscription_my: int
    subscription_other: int


def count_page_created(org_id: int) -> dict[str, int]:
    per_month = Counter()

    page_qs = (
        Page.objects.filter(
            status__gt=0,
            org_id=org_id,
            is_autogenerated=False,
            redirects_to__isnull=True,
        )
        .exclude(supertag__iregex=REGEX_PAGE_EXCLUDE)
        .values_list('created_at', flat=True)
    )

    for created in page_qs:
        per_month[created.strftime('%Y %m')] += 1

    if not per_month:
        print(f'[{org_id}] Pages not found in org')

    return per_month


def count_user_edited_page(org_id: int) -> dict[str, int]:
    per_month = defaultdict(set)

    revision_qs = (
        Revision.objects.filter(page__org_id=org_id)
        .exclude(page__supertag__iregex=REGEX_PAGE_EXCLUDE)
        .exclude(author__username__in=USER_ROBOT_LOGIN)
        .select_related('page', 'author')
        .values_list('author_id', 'created_at')
    )

    for author_id, created in revision_qs:
        per_month[created.strftime('%Y %m')].add(author_id)

    if not per_month:
        print(f'[{org_id}] Users not found in org')

    return {month: len(users) for month, users in per_month.items()}


def check_access_control(org_id: int) -> bool:
    qs = (
        Access.objects.filter(page__org_id=org_id, page__status__gt=0)
        .exclude(page__supertag__iregex=REGEX_PAGE_EXCLUDE)
        .exclude(page__supertag__startswith='users/', is_owner=True)
        .select_related('page')
    )
    return qs.exists()


def count_files_size(org_id: int) -> float:
    file_qs = (
        File.objects.filter(status=1, page__org_id=org_id, page__status__gt=0)
        .exclude(user__username__in=USER_ROBOT_LOGIN)
        .select_related('page')
        .aggregate(file_size=Sum('size'))
    )
    return round((file_qs['file_size'] or 0) / (1 << 20), 2)


def check_sso(org: Organization) -> bool:
    if org.cloud_id is None:
        return False

    # 9000000000000000 <= user.dir_id <= 9999999999999999  # 16 cимволов, первый символ 9
    sso_users = org.get_users().annotate(len_dir_id=Length('dir_id')).filter(len_dir_id=16, dir_id__startswith='9')
    return sso_users.exists()


def count_comments(org_id: int) -> int:
    return Comment.objects.filter(page__org_id=org_id).count()


def count_subscriptions(org_id: int, type_: Literal['my', 'other']) -> int:
    if type_ == 'my':
        watch_type, subscr_type = EventTypes.watch, SubscriptionType.MY
    else:
        watch_type, subscr_type = EventTypes.subscribe_other_user, SubscriptionType.OTHER

    watch_count = (
        PageEvent.objects.filter(event_type=watch_type, page__org_id=org_id, page__status__gt=0)
        .exclude(page__supertag__iregex=REGEX_PAGE_EXCLUDE)
        .prefetch_related('page')
        .count()
    )
    subscr_count = (
        Subscription.objects.filter(type=subscr_type, page__org_id=org_id, page__status__gt=0)
        .prefetch_related('page')
        .count()
    )

    return watch_count + subscr_count


def count_subscriptions_all(org_id: int) -> int:
    watch_count = (
        PageWatch.objects.filter(page__org_id=org_id, page__status__gt=0)
        .exclude(page__supertag__iregex=REGEX_PAGE_EXCLUDE)
        .prefetch_related('page')
        .count()
    )
    subscr_count = Subscription.objects.filter(page__org_id=org_id, page__status__gt=0).prefetch_related('page').count()
    return watch_count + subscr_count


def calculate_org(org_id: int) -> OrgStat:
    org = Organization.objects.get(id=org_id)
    page_qs = org.page_set.filter(status__gt=0).exclude(supertag__iregex=REGEX_PAGE_EXCLUDE)

    first_created = page_qs.order_by('created_at').values_list('created_at', flat=True).first()
    last_modified = page_qs.order_by('-modified_at').values_list('modified_at', flat=True).first()
    lifetime = last_modified - first_created

    page_created = count_page_created(org_id)
    user_edited = count_user_edited_page(org_id)

    return OrgStat(
        id=org_id,
        dir_id=org.dir_id,
        name=org.name,
        lifetime_mth=math.ceil(lifetime.days / 30),
        pages=page_qs.count(),
        users=org.user_set.exclude(username__in=USER_ROBOT_LOGIN).count(),
        pages_create=OrderedDict(sorted(page_created.items())),
        users_edit=OrderedDict(sorted(user_edited.items())),
        users_view={},  # заполняется отдельно через логи
        avg_pages_create=avg(page_created.values()) if page_created else 0,
        avg_users_edit=avg(user_edited.values()) if user_edited else 0,
        avg_users_view=None,  # заполняется отдельно через логи
        access_control=check_access_control(org_id),
        storage_mb=count_files_size(org_id),
        sso=check_sso(org),
        comments=count_comments(org_id),
        subscription_all=count_subscriptions_all(org_id),
        subscription_my=count_subscriptions(org_id, type_='my'),
        subscription_other=count_subscriptions(org_id, type_='other'),
    )


def get_alive_org_ids() -> List[int]:
    org_ids = Organization.objects.filter(status='enabled').values_list('id', flat=True)

    # чтобы смотреть только "живые" организации
    cnt_revision = (
        Revision.objects.filter(
            page__org_id__in=org_ids,
            page__status__gt=0,
            page__is_autogenerated=False,
        )
        .exclude(page__supertag__iregex=REGEX_PAGE_EXCLUDE)
        .exclude(author__username__in=USER_ROBOT_LOGIN)
        .select_related('page', 'author')
        .values('page__org_id')
        .annotate(cnt=Count('page__supertag', distinct=True))
    )

    alive_org_ids = [rev['page__org_id'] for rev in cnt_revision if rev['cnt'] > 0]
    return alive_org_ids


def save(stats: List[OrgStat], path: str):
    with open(path, 'w') as f:
        data = [item.dict() for item in stats]
        json.dump(data, f, ensure_ascii=False, indent=2)

    print(f'[{len(stats)}] stats save by path [{path}]')


def main(sleep=0.01):
    alive_org_ids = get_alive_org_ids()

    print(f'\n=====================\nGet [{len(alive_org_ids)}] alive orgs\n=====================\n')

    results: List[OrgStat] = []
    try:
        for i, org_id in enumerate(alive_org_ids):
            print(f'[{i}/{len(alive_org_ids)}] {org_id}')

            stat = calculate_org(org_id)
            results.append(stat)

            time.sleep(sleep)

    except KeyboardInterrupt:
        print()
    finally:
        save(results, path=SAVE_PATH)
