# -*- coding: utf-8 -*-
import logging
from sklearn.externals.joblib import Parallel, delayed

from datacloud.dev_utils.yt.yt_utils import get_yt_client, create_folders
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.features.geo.constants import ADDRS_TYPES, EXT_ID_KEY
from datacloud.dev_utils.geo.addrs_resolve import AddrsResolver, CoordNotFoundException

logger = get_basic_logger(name=__name__)


def load_adresses(yt_client, yt_table, addrs_types, verbose=0, ext_id_key=EXT_ID_KEY,
                  max_rows=0):

    keys_to_load = [ext_id_key] + list(addrs_types)
    res = []
    for i, row in enumerate(yt_client.read_table(yt_table)):
        res.append({
            key: row[key] for key in keys_to_load
        })

        if verbose and i and i % verbose == 0:
            logger.info(' loaded {} rows'.format(i))

        if max_rows and i >= max_rows:
            break

    return res


def resolve_for_eid(eid_dict, addrs_types, resolver, ext_id_key=EXT_ID_KEY):
    coords = []
    external_id = eid_dict[EXT_ID_KEY]
    for addr_type in addrs_types:
        addr = eid_dict[addr_type]
        try:
            lon, lat = resolver.resolve_addr(addr)
        except CoordNotFoundException:
            continue

        coords.append({
            EXT_ID_KEY: external_id,
            'type': addr_type,
            'lon': lon,
            'lat': lat,
        })

    return coords


def resolve_addrs(build_config, addrs_types=None, yt_client=None, n_jobs=16, memory_on=True,
                  load_verbose=300000, parallel_verbose=2):

    yt_client = yt_client or get_yt_client()
    addrs_types = addrs_types or ADDRS_TYPES
    create_folders((build_config.data_dir,), yt_client)

    addrs = load_adresses(yt_client, build_config.addresses_table, addrs_types, load_verbose)

    resolver = AddrsResolver(memory_on=memory_on, tvm_log_level=logging.WARN)
    data_for_yt = Parallel(n_jobs=n_jobs, verbose=parallel_verbose, backend='threading')(
        delayed(resolve_for_eid)(eid_dict, addrs_types, resolver) for eid_dict in addrs
    )
    data_for_yt = [item for sublist in data_for_yt for item in sublist]

    logger.info(' resolved {} addresses'.format(len(data_for_yt)))
    yt_client.write_table(build_config.resolved_addrs, data_for_yt)
