#! /usr/bin/env python
# simple checker of ISO-codes

import argparse
import geobase6
import sys


def parse_args():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--geodata', help="path to geofile for checking")
    parser.add_argument('--skip-no-iso', action='store_true', help="skip all regions with no data in 'iso_name'-field")
    args = parser.parse_args()
    return args


EARTH_ID = 10000

COUNTRY_TYPE = 3
EXT_TERRITORY_TYPE = 12
COUNTRY_PART_TYPE = 5
DISTRICT_TYPE = 10
CITY_TYPE = 6
VILLAGE_TYPE = 7
AIRPORT_TYPE = 11
VALUABLE_TYPES = [COUNTRY_TYPE, EXT_TERRITORY_TYPE, COUNTRY_PART_TYPE, DISTRICT_TYPE, CITY_TYPE, VILLAGE_TYPE, AIRPORT_TYPE]

ISO_3166_1_LEN = 2
ISO_3166_2_MIN_LEN = 4

LOCODE_MIN_LEN = 4

CODE_MAX_LEN = 6

stats = {}


def inc_stat_counter(counter_name):
    stats[counter_name] = stats.get(counter_name, 0) + 1


def is_iso3166_1_ok(iso_code):
    return ISO_3166_1_LEN == len(iso_code)


def is_iso3166_2_ok(iso_code):
    parts = iso_code.split('-')
    return (    ISO_3166_2_MIN_LEN <= len(iso_code)
            and 2 == len(parts)
            and 2 == len(parts[0])
            and 1 <= len(parts[1]))


def is_locode_ok(locode):
    parts = locode.split(' ')
    return (    LOCODE_MIN_LEN <= len(locode)
            and 2 == len(parts)
            and 2 == len(parts[0])
            and 1 <= len(parts[1]))


def make_warn_note(reg, notes):
    print "%d\t%d\t%s\t%s\t%s\t%s" % (reg['id'], reg['type'], reg['name'], reg['short_en_name'], reg['iso_name'], notes)


def main_check(args):
    lookup = geobase6.Lookup(args.geodata)

    for reg in lookup.get_tree(EARTH_ID):
        inc_stat_counter('regs_total')

        reg_type = reg['type']
        if reg_type < COUNTRY_TYPE:
            inc_stat_counter('useless_regs')
            continue

        iso_field = reg['iso_name']
        short_en_field = reg['short_en_name']

        if not iso_field and not short_en_field:
            if args.skip_no_iso:
                inc_stat_counter('no_both_iso')
                continue
        elif not iso_field:
            inc_stat_counter('no_iso')
        elif not short_en_field:
            inc_stat_counter('no_short_en')

        notes = []

        if reg_type in VALUABLE_TYPES and not iso_field and not short_en_field:
            inc_stat_counter('no-data-valuable-type')
            notes.append("/VALUEBLE-NO-DATA")

        if reg_type not in VALUABLE_TYPES and (iso_field or short_en_field):
            inc_stat_counter('data-non-valuable-type')
            notes.append("/NON-VALUEBLE-DATA")

        if short_en_field and short_en_field != iso_field and not iso_field.endswith(short_en_field):
            inc_stat_counter('non-eq-iso')
        else:
            inc_stat_counter('eq-iso')

        if reg_type in [COUNTRY_TYPE, EXT_TERRITORY_TYPE] and not is_iso3166_1_ok(iso_field):
            inc_stat_counter('bad-3166-1')
            notes.append("/BAD-3166-1")

        if reg_type in [COUNTRY_PART_TYPE, DISTRICT_TYPE]  and not is_iso3166_2_ok(iso_field):
            inc_stat_counter('bad-3166-2')
            notes.append("/BAD-3166-2")

        if reg_type in [CITY_TYPE, VILLAGE_TYPE, AIRPORT_TYPE] and not is_locode_ok(iso_field):
            inc_stat_counter('bad-locode')
            notes.append("/BAD-LOCODE")

        if CODE_MAX_LEN < len(iso_field):
            inc_stat_counter('bad-len')
            notes.append("/BAD-LEN")

        if notes:
            make_warn_note(reg, notes)


if __name__ == "__main__":
    args = parse_args()
    main_check(args)
    print >>sys.stderr, stats
