from datetime import timedelta
import json
import os

from crypta.lib.python import time_utils
from crypta.lib.python.yql import yql_helpers
from crypta.lib.python.yt import (
    schema_utils,
    yt_helpers,
)
from crypta.profile.lib import date_helpers


def get_latest_orgvisits_export_date(yt_client, config):
    return max(yt_client.list(config.OrgvisitsExportDir))


def get_latest_geocube_date(yt_client, config):
    return max(yt_client.list(config.GeocubeDir))


def get_latest_aggregated_orgvisits_date(yt_client, config):
    try:
        return max(yt_client.list(config.AggregatedOrgvisitsDir))
    except Exception:
        return '1970-01-01'


class UserOrgvisitsToBigbMapper(object):
    def __init__(self, timestamp, keyword_id):
        self.timestamp = timestamp
        self.keyword_id = keyword_id

    def __call__(self, row):
        yield {
            'value': json.dumps(
                {
                    'crypta_id': row['crypta_id'],
                    'timestamp': self.timestamp,
                    'commands': [
                        {
                            'add': {
                                'items': [
                                    {
                                        'keyword_id': self.keyword_id,
                                        'update_time': self.timestamp,
                                        'uint_values': row['permalinks'],
                                    },
                                ],
                            },
                        },
                    ],
                },
            ),
        }


def update_data(yt_client, date, config, logger):
    with yt_client.Transaction() as tx:
        aggregated_table_path = os.path.join(config.AggregatedOrgvisitsDir, date)
        yql_helpers.run_query(
            query_template='/query/aggregate_orgvisits.yql',
            yt_config=config.Yt,
            query_params={
                'users_by_permalink_threshold': config.UsersByPermalinkThreshold,
                'bigb_orvisits_limit': config.BigbOrgvisitsLimit,
                'default_org_weight': config.DefaultOrgWeight,
                'first_date': date_helpers.get_date_from_past(date, months=config.MonthsBackToAggregateOrgvisits),
                'last_date': date,
                'good_gdu_types': config.GoodGduTypes,
                'geocube_table_names': config.GeocubeTableNames,
                'geocube_dir': config.GeocubeDir,
                'orgvisits_export_dir': config.OrgvisitsExportDir,
                'company_pretty_format_table': config.OrgInfoTable,
                'businesses_rubrics_table': config.BusinessesRubricsTable,
                'orgs_weights_table': max(yt_client.list(config.OrgsWeightsDir, absolute=True)),
                'matching_puid_table': config.MatchingPuidTable,
                'matching_yandexuid_tables': config.MatchingYandexuidTables,
                'matching_devid_tables': config.MatchingDevidTables,
                'cryptaid_userdata_table': config.CryptaidUserdataTable,
                'output_table': aggregated_table_path,
            },
            logger=logger,
            tx=tx,
        )

        bigb_table_path = os.path.join(config.BigbOrgvisitsDir, date)
        yt_helpers.create_empty_table(
            yt_client=yt_client,
            path=bigb_table_path,
            schema=schema_utils.get_strict_schema([
                {'name': 'value', 'type': 'string'},
            ]),
            force=True,
        )

        yt_client.run_map(
            UserOrgvisitsToBigbMapper(
                timestamp=time_utils.get_current_time(),
                keyword_id=config.BigbKeywordId,
            ),
            aggregated_table_path,
            bigb_table_path,
        )

        for table in (aggregated_table_path, bigb_table_path):
            yt_helpers.set_ttl(table=table, ttl_timedelta=timedelta(days=config.OutputTablesTtlDays), yt_client=yt_client)


def run(config, logger):
    yt_client = yt_helpers.get_yt_client(config.Yt.Proxy, config.Yt.Pool)

    latest_orgvisits_export_date = get_latest_orgvisits_export_date(yt_client, config)
    latest_geocube_date = get_latest_geocube_date(yt_client, config)
    latest_aggregated_orgvisits_date = get_latest_aggregated_orgvisits_date(yt_client, config)

    logger.info('Latest orgvisits export date: %s', latest_orgvisits_export_date)
    logger.info('Latest geocube date: %s', latest_geocube_date)
    logger.info('Latest aggregated orgvisits date: %s', latest_aggregated_orgvisits_date)
    if latest_aggregated_orgvisits_date < latest_orgvisits_export_date and latest_aggregated_orgvisits_date < latest_geocube_date:
        update_data(yt_client, latest_orgvisits_export_date, config, logger)
    else:
        logger.info('No need to update the data')
