#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import sys


def parse_args():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--datafile', \
                        default="-", \
                        help="path to file with maxmind-isp-data (GeoIP2-ISP-Blocks-IPv[46].csv); '-' for stdin.")
    parser.add_argument('--traits', \
                        default="", \
                        help="path to file with unique ranges traits list.")
    parser.add_argument('--proc', \
                        default="ipreg", \
                        help="ipreg|asset")
    parser.add_argument('-D', '--debug-counter', \
                        type=int, \
                        default=0, \
                        help="print '.' for each K-th row; 0 - disable")
    return parser.parse_args()


stats = {}


def inc_stat_counter(counter_name):
    stats[counter_name] = stats.get(counter_name, 0) + 1


def load_traits(traits_fname):
    all_traits = {}
    row_num = 0

    for line in open(traits_fname):
        traits_line = line.strip().split('\t')[0]
        row_num += 1
        all_traits[traits_line] = row_num

    if 0 == row_num or not all_traits:
        raise Exception('no data in %s' % traits_fname)

    return all_traits


def parse_ipreg_row(line):
    skipped_prefix = '{"region_id":'
    ip_range, traits_line = line.split('\t')
    traits_line = traits_line[len(skipped_prefix):-1]  # only useful part of data
    return ip_range, traits_line


def parse_asset_row(line):
    tab_idx = line.find('\t')
    if -1 == tab_idx:
        raise Exception("BAD tab POS in %s" % line)

    ip_range = line[0:tab_idx]
    traits_line = line[tab_idx + 1:]

    return ip_range, traits_line


def process_source(args):
    data_input = sys.stdin  if '-' == args.datafile else open(args.datafile, 'r')
    all_traits = load_traits(args.traits)

    proc_fn = parse_ipreg_row if args.proc == "ipreg" else ""
    proc_fn = parse_asset_row if args.proc == "asset" else proc_fn
    if not proc_fn:
        raise Exception("NO PROCESSOR")

    for line in data_input:
        inc_stat_counter("lines")
        if args.debug_counter and 0 == stats["lines"] % args.debug_counter:
            sys.stderr.write(".")

        try:
            ip_range, traits_line = proc_fn(line.strip())
            traits_num = all_traits[traits_line]

            if not traits_num:
                inc_stat_counter("bad_trait_num")
                raise Exception("unable to detect traits_num")

            print "%s\t%d" % (ip_range, traits_num)

        except Exception as ex:
            print >>sys.stderr, "err[%s] in [%s]" % (ex, line)
            inc_stat_counter("exceptions")
            inc_stat_counter("bad_input_lines")

    print >>sys.stderr, "\n", stats
    return stats.get("bad_input_lines")


if __name__ == "__main__":
    sys.exit(process_source(parse_args()))
