# -*- coding: utf-8 -*-
"""
This module converts JSON Direct geotree into text ADVQ geotree.

ADVQ geotree is a collection of lines where each line is tab-separated list of numbers:

<node_id>\\t<child_id_1>\\t<child_id_2>\\t...\\t<child_id_N>

where N >= 0.  Lines are sorted by numerical value of <node_id>.

Copied from ADVQ git repository, with minimal modifications.
"""
import re
from collections import defaultdict, deque


RE_COMMA = re.compile(r'\s*,\s*', re.DOTALL)


class InvalidGeotreeError(ValueError):
    """
    В дереве, скачанном из геоэкспорта, обнаружены ошибки,
    оно невалидное и не может быть использовано.
    """
    pass


def validate_regions(regions):
    """Проверяет дерево regions на валидность

    * Регион 10000 (Земля) должен быть единственным корнем
    * Регион 10001 (Евразия) должен быть дочерним у 10000
    * Регион 225 (Россия) должен быть дочерним у 10001
    * Регион 977 (Крым) должен быть дочерним у 10000
    * Обход всего дерева не вызывает циклов
    """
    if 10000 not in regions:
        raise InvalidGeotreeError('Region 10000 (Earth) is missing in downloaded data')
    if 10001 not in regions:
        raise InvalidGeotreeError('Region 10001 (Eurasia) is missing in downloaded data')
    if 10001 not in regions[10000]:
        raise InvalidGeotreeError('Region 10001 (Eurasia) is not a child of 10000')
    if 225 not in regions:
        raise InvalidGeotreeError('Region 225 (Russia) is missing in downloaded data')
    if 225 not in regions[10001]:
        raise InvalidGeotreeError('Region 225 (Russia) is not a child of 10001')
    if 977 not in regions:
        raise InvalidGeotreeError('Region 977 (Crimea) is missing in downloaded data')
    if 977 not in regions[10000]:
        raise InvalidGeotreeError('Region 977 (Crimea) is not a child of 10000')
    allchildren = set()
    for region_id, children in regions.iteritems():
        allchildren.update(children)
        for child_id in children:
            if child_id not in regions:
                raise InvalidGeotreeError('Region %d has a missing child %d' % (region_id, child_id))
    roots = sorted(set(regions) - allchildren)
    if roots != [10000]:
        raise InvalidGeotreeError('Region 10000 is not a unique root of the tree: %r' % (roots,))
    queue = deque([10000])
    visited = set()
    while queue:
        region_id = queue.popleft()
        for child_id in regions[region_id]:
            if child_id in visited:
                raise InvalidGeotreeError('Region %d is participating in a cycle' % (child_id,))
            visited.add(child_id)
            queue.append(child_id)


def parse_json_geotree(response_data, fixup=True):
    regions = {}
    reverse = defaultdict(list)
    allchildren = set()
    if not isinstance(response_data, list):
        raise ValueError("geoexport should return list of region data")

    for regdata in response_data:
        regid = int(regdata['id'])
        if regid == 0:
            continue
        parentid = int(regdata['parent'])
        if parentid != 0:
            reverse[parentid].append(regid)
        if regdata['children']:
            children = [int(childid) for childid in RE_COMMA.split(regdata['children'])]
        else:
            children = []
        children.sort()
        regions[regid] = children
        allchildren.update(children)
    if fixup:
        # Исправляем проблемы в данных
        for regid, children in reverse.iteritems():
            if regid not in regions:
                children.sort()
                regions[regid] = children
        for childid in allchildren:
            if childid not in regions:
                regions[childid] = []
    validate_regions(regions)
    return sorted(regions.items())


def make_geotree_txt(regions):
    """
    Convert list of region lines into text.
    """
    text = []
    for regid, children in regions:
        text.append('%d' % (regid,))
        for childid in children:
            text.append('\t%d' % (childid,))
        text.append('\n')
    return ''.join(text)
