#!/usr/bin/env python2
# -*- coding: utf-8 -*-

from __future__ import print_function

import argparse
import geobase6
import json
import sys


COUNTRY_TYPE = 3
REGION_TYPE = 5
CITY_TYPE = 6
VILLAGE_TYPE = 7
DISTRICT_TYPE = 10
FOREIGN_REGIONS_TYPE = 12

EARTH_ROOT_ID = 10000
RU_ID = 225
CIS_ID = 166

center_ok_no_borders_ids = [
    # in pacific
    111152,  # Лорд-Хау-Айленд, 21761,211, гео ок. нет границы, не входит в Уэльс|Австралия
    21533,   # Паго-Паго, 20860, город не провязан, остров не входит в границы Самоа

    # man island, GB
    112779,  # Дуглас, 109540,102, остров не привязан к региону, нет границы у города
    112780,  # Онкан, 109540,102
    112781,  # Рамси, 109540,102

    # jercey island, GB
    153107,  # Сент-Брелад, 145365,109541,102 - нет границы/привязки острова и города

    112693,  # Банда Нейра, 120825,10095,183 деревня на острове в Индонезии, остров не в границах провинции
    107920,  # Грютвикен, 101523 - нет границ у островов и города

    104110,  # Дахла, 10020, спор Марокко/Западная Сахара
    112783,  # Наблус, город, спор Палестина/Израиль
    114035,  # Равалакот (и Кашмир в целом) 144598,10102 - спор Индия/Пакистан
    114040,  # Гилгит, там же, Кашмир
    111199,  # 111199, Боливия, 123430,10015 - у нас граница между странами прямая. но это не так
]

known_excludes_ids = [
]

wanted_good_limits = {
    "t#6_in_children": {"min_qty":86, "is_strong":False, "ok_lost_level%":2.},
    "t#6_same_country": {"min_qty":14933, "is_strong":True},
    "t#6_same_reg": {"min_qty":11741, "is_strong":True},
    "t#6_total": {"min_qty":14944, "is_strong":True},
    "t#7_same_country": {"min_qty":73666, "is_strong":True},
    "t#7_same_district": {"min_qty":1366, "is_strong":False, "ok_lost_level%":1.},
    "t#7_same_reg": {"min_qty":69914, "is_strong":True},
    "t#7_total": {"min_qty":78461, "is_strong":True},
}

stats = {}


def inc_stat_counter(counter_name):
    stats[counter_name] = stats.get(counter_name, 0) + 1


def get_stats_as_str(stats):
    return json.dumps(stats, sort_keys=True, indent=4, separators=(',',':'), ensure_ascii=False)


def parse_args():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('-g', '--geodata',
                        default='geodata6.bin',
                        help="path to geodata file")
    parser.add_argument('-c', '--countries-ids',
                        default="cis",
                        help="comma separated ids list of checked countries; 'cis' := RU + CIS-only; 'all' := ids of known countries")
    parser.add_argument('-w', '--wanted-regs-ids',
                        default="",
                        help="comma separated ids for cpecial check")
    parser.add_argument('-e', '--prepare-excludes-ids',
                        action="store_true",
                        help="comma separated ids for cpecial check")
    parser.add_argument('-E', '--apply-excludes',
                        action="store_true",
                        help="let's skip known 'possible bad' ids")
    parser.add_argument('-V', '--verbose-log',
                        action="store_true",
                        help="...")

    return parser.parse_args()


def get_first_n_parents_ids(lookup, reg_id, qty):
    parents_ids = lookup.get_parents_ids(reg_id)
    cutted_list = parents_ids[1:][:qty]  # skip self-id; only parents on 1-N levels
    # print(">>> parents_ids [%s] => [%s]" % (parents_ids, cutted_list))
    return cutted_list


def prepare_cid2iso(lookup):
    ids = {c['id']:c['iso_name'] for c in lookup.get_regions_by_type(COUNTRY_TYPE)}
    ids.update({c['id']:c['iso_name'] for c in lookup.get_regions_by_type(FOREIGN_REGIONS_TYPE)})
    return ids


def parse_countries_ids(lookup, ids_string, cid2iso):
    result_ids = []

    if ids_string == "all":
        result_ids = cid2iso.keys()
    elif ids_string == "cis":
        result_ids = [RU_ID] + lookup.get_children_ids(CIS_ID)
    else:
        result_ids = [int(s) for s in ids_string.split(",")]

    return sorted(result_ids)


def str_without_spaces(v):
    return str(v).replace(' ', '')


def prepare_reg_desc(lookup, r):
    desc = "#%d/%d/%s(%f,%f)/pids:(%s)" % (r['id'], r['type'], r['name'], r['latitude'], r['longitude'], str_without_spaces(lookup.get_parents_ids(r['id'])))
    return desc


def to_lower(v):
    return str(v).lower()


def main(args):
    lookup = geobase6.Lookup(args.geodata)
    cid2iso = prepare_cid2iso(lookup)
    checked_countries = parse_countries_ids(lookup, args.countries_ids, cid2iso)
    wanted_ids = [int(s) for s in args.wanted_regs_ids.split(",")] if args.wanted_regs_ids else None

    critical_qty = 0
    problems_qty = 0
    fails_ids = []

    for r in lookup.get_tree(EARTH_ROOT_ID):
        inc_stat_counter("total_regs")

        if r['type'] <= 0:
            inc_stat_counter("skip_obsolete_regs")
            continue

        def inc_qty(counter_name):
            inc_stat_counter("t#%d_%s" % (r['type'], counter_name))

        def inc_problems_qty():
            if args.apply_excludes and r['id'] in known_excludes_ids:
                inc_stat_counter("t#%d_skip_known_exclude" % r['type'])
                return
            fails_ids.append(r['id'])

        if r['type'] not in [CITY_TYPE, VILLAGE_TYPE]:
            inc_stat_counter("skip_non_settlements")
            continue

        inc_qty("total")

        if r['latitude'] == 0. and r['longitude'] == 0.:
            inc_qty("skip_zero_center")
            continue

        reg_country_id = lookup.get_country_id(r['id'])
        if reg_country_id not in checked_countries:
            inc_qty("skip_unwanted_countries_%d" % reg_country_id)
            continue

        if wanted_ids and r['id'] not in wanted_ids:
            inc_qty("skip_unwanted_regs")
            continue

        reg_id_by_ll = lookup.get_region_id_by_location(r['latitude'], r['longitude'])
        if reg_id_by_ll in center_ok_no_borders_ids:
            inc_qty("center_ok_no_border_FIX")

        if reg_id_by_ll <= 0:
            mark = ""
            if r['id'] in center_ok_no_borders_ids:
                inc_qty("exclude_geoloc")
                mark = "EXCLUDE-GEOLOC"
            else:
                inc_qty("bad_geoloc")
                mark = "BAD-GEOLOC"
                inc_problems_qty()
                critical_qty += 1

            print("[%s]\t%s\t=> %d" % (mark, prepare_reg_desc(lookup, r), reg_id_by_ll))
            continue

        country_id_by_ll = lookup.get_country_id(reg_id_by_ll)
        is_same_country = reg_country_id == country_id_by_ll
        if not is_same_country:
            mark = ""

            if r['id'] in center_ok_no_borders_ids:
                inc_qty("exclude_country")
                mark = "EXCLUDE-COUNTRY"
            elif country_id_by_ll > 0:
                inc_qty("bad_country")
                mark = "BAD-COUNTRY"
                critical_qty += 1
            else:
                inc_qty("bad_geoloc")
                mark = "BAD-GEOLOC"
                critical_qty += 1

            print("[%s]\t%s\t=> r#%d/c#%d" % (mark, prepare_reg_desc(lookup, r), reg_id_by_ll, country_id_by_ll))
            continue
        else:
            inc_qty("same_country")

        if reg_id_by_ll == r['id']:
            inc_qty("same_reg")
            continue

        if r['id'] in get_first_n_parents_ids(lookup, reg_id_by_ll, 2):
            inc_qty("in_children")
            continue

        if reg_id_by_ll == r['parent_id']:
            inc_qty("in_parent")
            if args.verbose_log:
                print("[IN-PARENT]\t%s" % prepare_reg_desc(lookup, r))
            continue

        reg_district_id = lookup.get_parent_id_with_type(r['id'], DISTRICT_TYPE)
        district_id_by_ll = lookup.get_parent_id_with_type(reg_id_by_ll, DISTRICT_TYPE)
        if reg_district_id > 0 and reg_district_id == district_id_by_ll:
            inc_qty("same_district")
            if args.verbose_log:
                print("[IN-DISTRICT]\t%s\t%d\t%d/%d" % (prepare_reg_desc(lookup, r), reg_district_id, reg_id_by_ll, district_id_by_ll))
            continue

        reg_by_ll = lookup.get_region_by_id(reg_id_by_ll)
        mb_omonim = r['type'] == reg_by_ll['type'] and r['name'] == reg_by_ll['name']
        if mb_omonim:
            inc_qty("mb_omonim")

        if args.apply_excludes and r['id'] in known_excludes_ids:
            inc_problems_qty()
            continue

        print("[WITH-PROBLEM]\t%s\t%s\t%d\t%d"
              % (prepare_reg_desc(lookup, r), prepare_reg_desc(lookup, reg_by_ll), is_same_country, mb_omonim))
        inc_stat_counter("fail_%s_#%d" % (cid2iso[reg_country_id], r['type']))
        inc_qty("with_problem")

    print(get_stats_as_str(stats), file=sys.stderr)
    if args.prepare_excludes_ids:
        print("excludes: %s" % sorted(fails_ids), file=sys.stderr)

    if critical_qty > 0:
        print(">>> CHECK FAILED <<< critical_qty == %d" % critical_qty, file=sys.stderr)

    limit_fails = 0
    for stat_counter, limit_traits in wanted_good_limits.items():
        min_value = limit_traits['min_qty']
        stat_value = stats.get(stat_counter)
        if stat_value < min_value:
            err_msg = ">>> LIMIT FAILED <<< qty[%s]; MIN == %d; GOT - %d" % (stat_counter, min_value, stat_value)
            if limit_traits['is_strong']:
                limit_fails += 1
            else:
                max_lost_perc = limit_traits.get('ok_lost_level%', 0)
                curr_lost_perc = ((min_value - stat_value) / (min_value * 1.)) * 100
                if max_lost_perc < curr_lost_perc:
                    limit_fails += 1
                err_msg += " +non-strong lost%% fail; %.3f (limit - %.3f)" % (curr_lost_perc, max_lost_perc)
            print(err_msg, file=sys.stderr)
        elif stat_value > min_value:
            print(">>> NOTA BENE! <<< qty[%s]; MIN == %d; GOT - %d (+)" % (stat_counter, min_value, stat_value), file=sys.stderr)

    if critical_qty == 0 and limit_fails == 0:
        print(">>> CHECK COMPLETED. VALID DATA <<<", file=sys.stderr)

    return critical_qty > 0 or limit_fails > 0


if __name__ == "__main__":
    sys.exit(main(parse_args()))
