import logging
import urllib.parse
from django.conf import settings

from ..dto import UrlObject

logger = logging.getLogger(__name__)


class UrlsParser:

    def parse_urls(self, urls, compiled_workers):
        return filter(None, (self.parse_url(url, compiled_workers) for url in urls))

    def parse_url(self, url, compiled_workers):
        """
        Converts raw string link into Link object
        :type url: str
        :type compiled_workers: dict[worker_file_name: MatchOptions]
        :rtype: UrlObject
        """
        try:
            split_result = urllib.parse.urlsplit(url)
        except Exception as e:
            logger.warning(f'Cannot split url "{url}": {e}')
            return

        hide_ref = False
        fragment_match = None
        query_match = None
        if split_result.netloc == settings.HIDE_REF_NETLOC:
            url = urllib.parse.unquote(split_result.query)
            split_result = urllib.parse.urlsplit(url)
            hide_ref = True
        for worker_class_file, options in compiled_workers.items():
            hostname_match = options.hostname_regex.match(split_result.netloc)
            if hostname_match:
                path_match = options.path_regex.match(split_result.path)
                if path_match:
                    fragment_regex = options.fragment_regex
                    if fragment_regex:
                        fragment_match = fragment_regex.match(split_result.fragment)
                        if not fragment_match:
                            continue
                    query_regex = options.query_regex
                    if query_regex:
                        query_match = query_regex.match(split_result.query)
                        if not query_match:
                            continue
                    return UrlObject(url=url, split_result=split_result,
                                     hostname_match=hostname_match, path_match=path_match,
                                     fragment_match=fragment_match, query_match=query_match,
                                     worker_class_file=worker_class_file, hide_ref=hide_ref,
                                     )


urls_parser = UrlsParser()
