import logging

from lxml import html
from django.utils.encoding import force_bytes


log = logging.getLogger(__name__)


def deserialize_xml(xml, unhighlight=False):
    if isinstance(xml, dict):
        return xml

    if xml.find('hlword') is not None and unhighlight:
        return ''.join(xml.itertext())

    count = None
    data = None
    type = xml.get('type')

    for count, item in enumerate(xml.iterchildren()):
        if type == 'list':
            if data is None:
                data = []
            data.append(deserialize_xml(item, unhighlight))
        elif type == 'dict':
            if data is None:
                data = {}
            data[item.tag] = deserialize_xml(item, unhighlight)
        else:
            if data is None:
                data = {}
            if item.tag not in data:
                data[item.tag] = []
            data[item.tag].append(deserialize_xml(item, unhighlight))

    if count is None:
        return xml.text

    return data


def parse_html(content):
    try:
        return html.document_fromstring(content)
    except ValueError:
        return html.document_fromstring(force_bytes(content))
