import re
from urllib.parse import parse_qs, urlparse

from django.conf import settings
from lxml import etree

from wiki.integrations.ms.client import DocRetrieverClient
from wiki.integrations.ms.exceptions import Ms365BadLink
from wiki.integrations.ms.stub_client import DocRetrieverStubClient

NS_REGEX = re.compile(r'^(/:(?P<app>[wxp]):/r)?(?P<ns>[\w\d_\-/]*?)_layouts/(?P<layout_id>\d+)/Doc\.aspx$')
ALLOWED_NETLOCS = frozenset(['yandexteam.sharepoint.com', 'yandexteam-my.sharepoint.com'])


def is_embed_iframe(smth):
    return '<iframe' in smth.lower()


def extract_src_from_iframe(html):
    parsed_html = etree.HTML(html)
    iframes = parsed_html.xpath('//iframe')
    if len(iframes) == 0:
        raise Ms365BadLink()
    return iframes[0].attrib.get('src')


reserved_params = frozenset(['sourcedoc', 'action', 'wdorigin'])


def extract_presentation_params(url):
    try:
        parts = urlparse(url)
    except Exception:
        raise Ms365BadLink()
    queryparams = parse_qs(parts.query)

    return {k: v[0] for k, v in list(queryparams.items()) if k.lower() not in reserved_params}


def parse_ms365_url(url):
    try:
        parts = urlparse(url)
    except Exception:
        raise Ms365BadLink()

    queryparams = parse_qs(parts.query)
    match = NS_REGEX.match(parts.path)

    if parts.netloc.lower() not in ALLOWED_NETLOCS:
        raise Ms365BadLink()

    if match is None:
        raise Ms365BadLink()

    return parts.netloc, match.groupdict()['ns'], queryparams['sourcedoc'][0]


def get_doc_retriever_client():
    return DocRetrieverStubClient() if settings.DOC_RETRIEVER_USE_STUB else DocRetrieverClient()
