# coding: utf-8



import urllib.parse
from lxml import etree

from django.utils.encoding import force_text

from .base import RepoCrawler, RepoTuple


class HgWebCrawler(RepoCrawler):
    def get_repos(self, url=None, prefix=None, **kwargs):
        url = url or self.source.web_url
        auth = self.get_auth()
        response = self.session.get(url=url, **auth)
        response.raise_for_status()

        doc = etree.fromstring(response.content, parser=etree.HTMLParser())

        for node in doc.xpath("//tr[@class='parity0' or @class='parity1']"):
            tds = node.xpath('./td')

            name = force_text(tds[0].xpath("./a[@class='list']/b/text()")[0]).strip('/')
            if prefix:
                name = '%s/%s' % (prefix, name)
            href = urllib.parse.urljoin(url, force_text(tds[0].xpath("./a[@class='list']/@href")[0]))

            description = tds[1].text

            if not description:
                # это папка
                for rep in self.get_repos(href, name):
                    yield rep
            else:
                yield RepoTuple(
                    name=force_text(self.normalize_name(name)),
                    owner='root',
                    vcs_name=force_text(name),
                    description=force_text(description),
                    default_branch='default',
                    is_public=True,
                )

    def get_repo_url(self, repo):
        """
        Для аркадии особая логика.
        """
        if 'arcadia-hg.yandex-team.ru' in repo.source.web_url:
            return 'https://a.yandex-team.ru'
        if 'arcadia-hg04.search.yandex.net' in repo.source.web_url:
            return 'https://a.yandex-team.ru'
        return '%s/%s' % (repo.source.web_url, repo.vcs_name)

    def get_commit_url(self, repo, id_):
        if 'arcadia-hg.yandex-team.ru' in repo.source.web_url:
            return self.get_repo_url(repo) + '/commit/' + id_
        if 'arcadia-hg04.search.yandex.net' in repo.source.web_url:
            return self.get_repo_url(repo) + '/commit/' + id_
        return self.get_repo_url(repo) + '/rev/%s' % id_
