# coding: utf-8
import codecs
import shortuuid
from socket import timeout
from ssl import SSLError
from http.client import InvalidURL, BadStatusLine

from urllib.parse import urlparse, urlunparse, ParseResult
from urllib.error import URLError
from urllib.parse import quote, unquote

from .logger import logger
from .settings import TRUSTED_DOMAINS, REFERRER_LEAKS, DEFAULT_PROTOCOL, REPLACE_SHEME_FOR
from .language import detect_language
from .ext_urls import get_external_url
from .viruses.main import is_infected
from .cache import WasInCacheException
from .responses import (
    Redirect302Response,
    MetaRedirectResponse,
    InfectedResponse,
    ImageResponse,
    NotModifiedResponse,

)


def is_trusted_host(purl):
    """
    Test if purl.host is in subdomains of trusted domains,
    and not in referrer leaks list

    @param purl: parsed url
    @return: True/False
    """
    assert type(purl) == ParseResult

    rhost = purl.hostname

    if not rhost:
        raise Exception('%s is not a valid url' % str(purl))

    if rhost.startswith('www.'):
        rhost = rhost[4:]

    # if target host in white list of subdomains of trusted domains,
    # but not in leaks list or subdomain from leak list
    if any(rhost.endswith('.' + t_domain) for t_domain in TRUSTED_DOMAINS) \
        and not any(''.join(('.', rhost)).endswith(''.join(('.', leak)))
                    for leak in REFERRER_LEAKS):
        return True

    return False


def prepare_url(url):
    """
    From URL query string get the url user shall visit

    Quote all the parameters that needed etc
    This is to prevent XSS:
    http://h.yandex.net/?http://wikipedia.org/"/><script>alert(1);</script>
    """
    if not isinstance(url, str):
        url = url.decode('utf-8')

    # check if url starts with known scheme
    if not '://' in url.split('?', 1)[0] and not url.startswith('//'):
        # urlparse parses actual netloc as path if url has no scheme or //
        url = '//' + url

    purl = urlparse(url, scheme=DEFAULT_PROTOCOL)

    # quote symbols
    unreserved_chars = "-_.!~*'()"  # + alphanum
    reserved_chars = ":@&=+$,"

    path = quote(purl.path, '/%;' + unreserved_chars + reserved_chars)
    query = quote(purl.query, '/%;?' + unreserved_chars + reserved_chars)
    parameters = quote(purl.params, '+;=')

    fragment = purl.fragment
    fragment = quote(fragment, '!/+')

    scheme = purl.scheme

    # replace scheme to https for selected domains
    if purl.hostname and purl.hostname in REPLACE_SHEME_FOR:
        scheme = 'https'

    # check if netloc is cyrillic, i.e. 'президент.рф'
    try:
        netloc = purl.netloc

        if not isinstance(netloc, str):
            netloc = purl.netloc.decode('utf-8')

        netloc = codecs.encode(netloc, 'idna').decode('utf-8')  # 'президент.рф' -> 'xn--d1abbgf6aiiy.xn--p1ai'
    except Exception as exc:
        logger.exception('could not encode netlock: %s', purl.netloc)
        netloc = purl.netloc

    # последнее место куда еще можно засунуть CF-LF это netloc и схема
    # В теории можно было просто вырезать CF-LF, но потом появится какая-нибудь еще экстраординарная уязвимость на
    # неэкранированные символы. поэтому хочется чтобы абсолютно все секции проходили через экранирование с разными
    # исключениями

    # RFC 3986 огромен и везде он имплементируется с разными косячками, мы в теории можем поломать что-то экзотическое

    netloc = quote(netloc, ':@')
    scheme = quote(scheme)

    sanitized = urlunparse((scheme, netloc, path, parameters, query, fragment))
    sanitized = sanitized.replace('\n', '')
    sanitized = sanitized.replace('\r', '')
    return sanitized


def get_destination_url(request):
    """
    Get destination url from request
    @param request
    @return destination url
    """
    language = detect_language(request.get('HTTP_ACCEPT_LANGUAGE'))

    if 'QUERY_STRING' not in request or not request['QUERY_STRING']:
        return None, language

    if not isinstance(request['QUERY_STRING'], str):
        request['QUERY_STRING'] = request['QUERY_STRING'].decode('utf-8')

    # urls in query accidently may be quoted multiple times
    query = unquote(request['QUERY_STRING'])
    counter = 0
    while query.startswith('%25') or query.startswith('%2F') and counter < 7:
        query = unquote(query)
        counter += 1

    if counter > 0:
        logger.warning('reunquoted %s - %d times', query, counter)

    return prepare_url(query), language


def get_external_resource_redirect(dest_url, orig_request):
    """
    Get redirect for external resource (possibly an image)
    """
    try:
        resource = get_external_url(dest_url, orig_request)
    except (URLError, InvalidURL, BadStatusLine,) as exc:
        if getattr(exc, 'code', None) == 304:
            # not modified url
            return NotModifiedResponse(exc)
        else:
            # leave client to handle unknown URLError
            return MetaRedirectResponse()

    except (SSLError, timeout) as exc:
        # network errors will not be cached
        # https timeout raises SSLError
        logger.warning('response from %s was too slow', dest_url)

        # leave client to handle slow requests
        return MetaRedirectResponse()

    except WasInCacheException as exc:
        logger.info('got response for %s from cache', dest_url)
        return MetaRedirectResponse()

    except ConnectionResetError:
        logger.warning('connection with url %s was reset', dest_url)
        return MetaRedirectResponse()

    else:
        # is it an image?
        if resource.headers.get('content-type', '').startswith('image/'):
            return ImageResponse(resource)

        else:
            resource.close()
            return MetaRedirectResponse()


def response_strategy(dest_url, orig_request):
    """
    Choose response strategy and produce response
    """
    # parse requested url
    purl = urlparse(dest_url)

    # maybe link leads to trusted host?
    if is_trusted_host(purl):
        return Redirect302Response()

    # .. or is it infected by bad people?
    if is_infected(dest_url):
        return InfectedResponse()

    # .. or request doesn't have referer from start
    if 'HTTP_REFERER' not in orig_request:
        logger.debug('request for url %s had no referer: %s',
                     dest_url, orig_request)
        return Redirect302Response()

    # ok, I got it, trying to redirect seriously
    return get_external_resource_redirect(dest_url, orig_request)


def get_request_id(request):
    return request.get(
        'HTTP_X_REQ_ID',
        request.get(
            'HTTP_X_REQUEST_ID',
            'auto-' + shortuuid.uuid(),
        )
    )
