import math

from crypta.data_import.proto.realty_pb2 import TRealty
from crypta.lib.python.yql import yql_helpers
from crypta.lib.python.yt import (
    schema_utils,
    yt_helpers,
)


class IdHomeInfo:
    id_value = 'id'
    id_type = 'id_type'
    latitude = 'latitude'
    longitude = 'longitude'


def get_latest_homework_date(yt_client, config):
    return max(yt_client.list(config.HomeworkStateDir))


def get_parsed_user_homes_schema():
    return schema_utils.get_strict_schema([
        {'name': IdHomeInfo.id_value, 'type': 'string', 'required': True},
        {'name': IdHomeInfo.id_type, 'type': 'string', 'required': True},
        {'name': IdHomeInfo.latitude, 'type': 'double', 'required': True},
        {'name': IdHomeInfo.longitude, 'type': 'double', 'required': True},
    ])


def great_circle_distance(lat1, lon1, lat2, lon2):
    earth_radius = 6373.0

    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a_value = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
    c_value = 2 * math.atan2(math.sqrt(a_value), math.sqrt(1 - a_value))

    return earth_radius * c_value


def get_closest_manual_to_predicted_home_coords(predicted_home, manual_homes):
    predicted_lat = predicted_home[IdHomeInfo.latitude]
    predicted_lon = predicted_home[IdHomeInfo.longitude]
    min_distance, res_manual_lat, res_manual_lon = None, None, None

    for manual_home in manual_homes:
        cur_manual_lat = manual_home[IdHomeInfo.latitude]
        cur_manual_lon = manual_home[IdHomeInfo.longitude]
        cur_distance = great_circle_distance(cur_manual_lat, cur_manual_lon, predicted_lat, predicted_lon)
        if min_distance is None or cur_distance < min_distance:
            min_distance = cur_distance
            res_manual_lat = cur_manual_lat
            res_manual_lon = cur_manual_lon

    return res_manual_lat, res_manual_lon


def parse_user_identification(row):
    manual_homes = row.get('manual_homes')
    predicted_home = row.get('predicted_home')
    unified_id = row['unified_id']
    source_unified_id = row['source_unified_id']
    if manual_homes is not None:
        if predicted_home is None or len(manual_homes) == 1:
            manual_home = manual_homes[0]
            yield {
                IdHomeInfo.id_value: unified_id,
                IdHomeInfo.id_type: source_unified_id,
                IdHomeInfo.latitude: manual_home[IdHomeInfo.latitude],
                IdHomeInfo.longitude: manual_home[IdHomeInfo.longitude],
            }
        else:
            manual_lat, manual_lon = get_closest_manual_to_predicted_home_coords(predicted_home, manual_homes)
            yield {
                IdHomeInfo.id_value: unified_id,
                IdHomeInfo.id_type: source_unified_id,
                IdHomeInfo.latitude: manual_lat,
                IdHomeInfo.longitude: manual_lon,
            }
    elif predicted_home is not None:
        yield {
            IdHomeInfo.id_value: unified_id,
            IdHomeInfo.id_type: source_unified_id,
            IdHomeInfo.latitude: predicted_home[IdHomeInfo.latitude],
            IdHomeInfo.longitude: predicted_home[IdHomeInfo.longitude],
        }


def update_data(yt_client, date, config, logger):
    yt_client.create("map_node", config.Yt.TmpDir, recursive=True, ignore_existing=True)

    with yt_client.Transaction() as tx, \
            yt_client.TempTable(attributes={
                'schema': get_parsed_user_homes_schema(),
                'optimize_for': 'scan',
            }) as parsed_user_homes_tmp_table:
        yt_client.run_map(
            parse_user_identification,
            config.HomeworkUnifiedIdTable.format(date),
            parsed_user_homes_tmp_table,
        )

        yt_helpers.create_empty_table(
            yt_client=yt_client,
            path=config.DataTable,
            schema=schema_utils.get_schema_from_proto(TRealty, key_columns=['UserId', 'UserIdType']),
            additional_attributes={
                config.DataOutputAttributeName: date,
            },
            force=True,
        )

        yql_helpers.run_query(
            query_template='/query/realty_import.yql',
            yt_config=config.Yt,
            query_params={
                'crypta_identifier_udf_url': config.CryptaIdentifierUdfUrl,
                'house_prices_stat_table': config.HousePricesStatTable,
                'parsed_user_homes_table': parsed_user_homes_tmp_table,
                'user_home_prices_table': config.DataTable,
            },
            logger=logger,
            tx=tx,
        )

        yql_helpers.run_query(
            query_template='/query/calc_stats.yql',
            yt_config=config.Yt,
            query_params={
                'input_table': config.DataTable,
                'output_table': config.StatsTable,
                'date': date,
            },
            logger=logger,
            tx=tx,
        )


def run(config, logger):
    yt_client = yt_helpers.get_yt_client(config.Yt.Proxy, config.Yt.Pool)

    latest_homework_date = get_latest_homework_date(yt_client, config)
    last_update_date = yt_client.get_attribute(
        config.DataTable,
        config.DataOutputAttributeName,
        '1970-01-01',
    )

    logger.info("Latest homework date: %s", latest_homework_date)
    logger.info("Last update date: %s", last_update_date)
    if last_update_date < latest_homework_date:
        update_data(yt_client, latest_homework_date, config, logger)
    else:
        logger.info("No need to update the data")
