import maps.wikimap.stat.libs.geobase_regions as gb
import copy


MAJOR_CFG_TABLE = '//home/maps/core/nmaps/analytics/geo-data/major_cfg'

REGION_FACETS_PATH = '//home/data_coverage/region_fasets'
# format: region_geoid, faset
#   region_geoid - geobase id
#   faset - facet name

FACETS_CFG_TABLE = '//home/maps/core/nmaps/analytics/geo-data/facets_cfg'
# format: facet, name, facet_id, parent_id, with_children
#   facet - facet name, as it used in 'faset' column of 'region_fasets' file
#   name - facet name, used in reports
#   facet_id - fake geobase id for facet
#   parent_id - geobase id of facet parent
#   expand_children -  show children twice - with their real parent, and with facet as a parent


ALL_MAJORS_FOR_REGION_SCHEMA = [
    {'name': 'region_id', 'type': 'utf8', 'required': True},
    {'name': 'least_major', 'type': 'boolean'},
    {'name': 'major_id', 'type': 'utf8', 'required': True},
    {'name': 'region_name', 'type': 'utf8', 'required': True},
    {'name': 'region_tree', 'type': 'utf8', 'required': True},
    {'name': 'population', 'type': 'utf8'}
]
# format: region_id, self_name, major_id, name, population
#    region_id:   geobase id of region
#    least_major: true, if major_id represents the least major region for region_id
#    major_id:    one of major regions-ancestors, self-including if region is major. If region
#                 belongs to any facet, there are should be a record with major_id=facet_id
#    region_name: tree-like name of appropriate major region, including all major ancestors
#    region_tree: region_name represented as tab-separted ids
#    population:  population of appropriate major region

REGIONS_INFO_SCHEMA = [
    {'name': 'region_id', 'type': 'int64', 'required': True},
    {'name': 'region_name', 'type': 'utf8', 'required': True},
    {'name': 'facet', 'type': 'utf8', 'required': False},
]
# format: region_id, region_name, facet
#   region_id:    geobase id of region
#   region_name:  tree-like name of the least major region (which is either region itself or it's ancestor)
#   facet:        facet to which region or one of it's ancestors belongs to

EARTH_REGION_ID = 10000
NULL_REGION_ID = '0'
RUSSIA_REGION_ID = '225'
RUSSIAN_0_PLUS_FACET = '0+'
NAME_DELIMETER = '/'
TREE_DELIMETER = '\t'
GEOBASE_TYPE_CITY = 6

MAJOR_POPULATION = 100000


def collect_geobase_descendants(region_id, sublevel):
    nodes = [region_id]
    while sublevel > 0:
        descendants_nodes = []
        for id in nodes:
            children = gb.lookup().get_children_ids(id)
            descendants_nodes.extend(children)
        nodes = descendants_nodes
        sublevel -= 1
    return nodes


def collect_geobase_regions(client):
    gb.init_geodata_from_yt_copy(client)

    major_ids = []
    for row in client.read_table(MAJOR_CFG_TABLE):
        sublevel = row['sublevel']
        region_id = row['region_id']
        major_ids.extend(collect_geobase_descendants(region_id, sublevel))

    regions = {}
    for region_data in gb.lookup().get_tree(EARTH_REGION_ID):
        region_id = region_data['id']
        region_type = region_data['type']
        population = region_data['population']
        is_major = region_id in major_ids \
            or (population >= MAJOR_POPULATION and region_type == GEOBASE_TYPE_CITY)

        regions[str(region_id)] = {
            'region_id': str(region_id),
            'parent_id': str(region_data['parent_id']),
            'name': region_data['name'].decode('utf-8'),
            'population': str(population),
            'full_name': None,
            'region_tree': None,
            'major_regions': None,
            'facet_id': None,
            'is_facet': False,
            'is_major': is_major,
        }
    return regions


def add_facet_data(client, regions):
    # read facet data
    facets = {}
    altname2facet_id = {}
    for row in client.read_table(FACETS_CFG_TABLE):
        facet_id = str(row['facet_id'])
        alt_name = str(row['facet'])
        facets[facet_id] = {
            'facet_id': facet_id,
            'name': str(row['name']),
            'parent_id': str(row['parent_id']),
            'altname': alt_name,
            'with_children': row['with_children']
        }
        altname2facet_id[alt_name] = facet_id

    # add facet link to geobase regions
    for row in client.read_table(REGION_FACETS_PATH):
        altname = str(row['faset'])
        region_id = str(row['region_geoid'])
        facet_id = altname2facet_id[altname]
        regions[region_id]['facet_id'] = facet_id
        if facets[facet_id]['with_children']:
            regions[region_id]['is_major'] = True

    # special facet for all out-of-town regions in Russia
    regions[RUSSIA_REGION_ID]['facet_id'] = altname2facet_id[RUSSIAN_0_PLUS_FACET]

    # add fake facet regions
    for facet_id in facets:
        facet = facets[facet_id]
        regions[facet_id] = {
            'region_id': facet_id,
            'parent_id': facet['parent_id'],
            'name': facet['name'],
            'population': None,
            'full_name': None,
            'region_tree': None,
            'major_regions': None,
            'facet_id': None,
            'is_facet': True,
            'is_major': True,
        }


def collect_all_regions(client):
    regions = collect_geobase_regions(client)
    add_facet_data(client, regions)
    return regions


def update_earth_node(regions):
    earth_id = str(EARTH_REGION_ID)
    regions[earth_id]['least_major_id'] = earth_id
    regions[earth_id]['major_regions'] = [earth_id, ]
    regions[earth_id]['full_name'] = regions[earth_id]['name']
    regions[earth_id]['region_tree'] = TREE_DELIMETER + earth_id + TREE_DELIMETER
    regions[earth_id]['population'] = '0'
    regions[earth_id]['is_major'] = True


def update_node(region_id, regions):
    if regions[region_id]['major_regions'] is not None:
        return

    parent_id = regions[region_id]['parent_id']
    update_node(parent_id, regions)

    # construct major_regions from parent - deepcopy
    major_regions = copy.deepcopy(regions[parent_id]['major_regions'])
    full_name = regions[parent_id]['full_name']
    region_tree = regions[parent_id]['region_tree']
    least_major_id = regions[parent_id]['least_major_id']

    if regions[region_id]['is_major'] is True:
        least_major_id = region_id
        major_regions.append(region_id)
        full_name += NAME_DELIMETER + regions[region_id]['name']
        region_tree += region_id + TREE_DELIMETER

    regions[region_id]['least_major_id'] = least_major_id
    regions[region_id]['major_regions'] = major_regions
    regions[region_id]['full_name'] = full_name
    regions[region_id]['region_tree'] = region_tree


def update_major_data(regions):
    update_earth_node(regions)
    for region_id in regions:
        update_node(region_id, regions)


def find_region_with_facet(region_id, regions):
    while region_id != str(EARTH_REGION_ID):
        if regions[region_id]['facet_id']:
            return region_id
        region_id = regions[region_id]['parent_id']
    return None


def update_facets_population(regions):
    for region_id in regions:
        # skip top-level region
        if region_id == str(EARTH_REGION_ID):
            continue

        # skip regions without facet
        facet_id = regions[region_id]['facet_id']
        if facet_id is None:
            continue

        # skip facet Russia/0+
        if regions[facet_id]['name'] == RUSSIAN_0_PLUS_FACET:
            continue

        # skip regions that inherit facets from their parents
        parent_id = regions[region_id]['parent_id']
        parent_facet_id = regions[parent_id]['facet_id']
        if facet_id == parent_facet_id:
            continue

        # skip regions without population
        if regions[region_id]['population'] is None:
            continue
        else:
            region_population = int(regions[region_id]['population'])

        if regions[facet_id]['population'] is None:
            facet_population = 0
        else:
            facet_population = int(regions[facet_id]['population'])
        regions[facet_id]['population'] = str(facet_population + region_population)


def all_majors_for_region(regions, region_id):
    # yield major regions
    least_major_id = regions[region_id]['least_major_id']
    major_ids = regions[region_id]['major_regions']
    for major_id in major_ids:
        yield {
            'region_id': region_id,
            'region_name': regions[major_id]['full_name'],
            'region_tree': regions[major_id]['region_tree'],
            'population': regions[major_id]['population'],
            'major_id': major_id,
            'least_major': major_id == least_major_id
        }

    # yield facet and it's parents
    region_with_facet_id = find_region_with_facet(region_id, regions)
    if region_with_facet_id is None:
        return
    facet_id = regions[region_with_facet_id]['facet_id']
    facet_major_ids = regions[facet_id]['major_regions']
    for facet_major_id in facet_major_ids:
        if facet_major_id in major_ids:
            continue
        yield {
            'region_id': region_id,
            'region_name': regions[facet_major_id]['full_name'],
            'region_tree': regions[facet_major_id]['region_tree'],
            'population': regions[facet_major_id]['population'],
            'major_id': facet_major_id,
        }

    # yield alternative facet name for major regions with facet
    if regions[region_with_facet_id]['is_major']:
        # skip alternative name for facet for it's own parent (Russia/0+/Russia)
        if regions[facet_id]['parent_id'] == region_with_facet_id:
            return
        facet_full_name = regions[facet_id]['full_name']
        region_name = regions[region_with_facet_id]['name']
        alternative_name = facet_full_name + NAME_DELIMETER + region_name
        facet_tree = regions[facet_id]['region_tree']
        alternative_tree = facet_tree + region_with_facet_id + TREE_DELIMETER
        yield {
            'region_id': region_id,
            'region_name': alternative_name.encode('utf-8'),
            'region_tree': alternative_tree,
            'population': regions[region_with_facet_id]['population'],
            'major_id': region_with_facet_id,
        }


def get_region_info(regions, region_id):
    least_major_id = regions[region_id]['least_major_id']
    region_with_facet_id = find_region_with_facet(least_major_id, regions)
    facet = None
    if region_with_facet_id is not None:
        facet_id = regions[region_with_facet_id]['facet_id']
        facet = regions[facet_id]['name']

    return {
        'region_id': int(region_id),
        'region_name': regions[least_major_id]['full_name'],
        'facet': facet,
    }
