import json
import logging
import time


from yp.common import YpTimestampOutOfRangeError, YpNoSuchTransactionError
from yt.wrapper.errors import YtResponseError


def handle_duplicate_records(yp_client, remove=False, remove_limit=None, delay=0, logger=None):
    if logger is None:
        logger = logging

    continuation_token = None
    limit = 2000
    timestamp = yp_client.generate_timestamp()
    last_seen_id = ''

    total_number_of_duplicates = 0
    total_number_of_updated_record_sets = 0

    while True:
        objects = yp_client.select_objects('dns_record_set', selectors=['/meta/id', '/spec'],
                                           filter='[/meta/id]>"{}" and [/spec/records/1] != #'.format(last_seen_id),
                                           timestamp=timestamp, limit=limit,
                                           options={'continuation_token': continuation_token},
                                           enable_structured_response=True)
        continuation_token = objects['continuation_token']

        logger.debug('Selected {} objects with timestamp {} and continuation token {}'.format(
            len(objects['results']), timestamp, continuation_token
        ))

        update_reqs = []
        number_of_duplicates = 0

        for record_set in objects['results']:
            records = record_set[1]['value']['records']

            is_duplicate = [False] * len(records)
            appeared_records = set()

            for (i, record) in enumerate(records[::-1]):
                if (record['type'], record['data'], record['class']) in appeared_records:
                    is_duplicate[-1 - i] = True
                    number_of_duplicates += 1
                else:
                    appeared_records.add((record['type'], record['data'], record['class']))

            if any(is_duplicate):
                edited_records = []

                for (is_duplicate, record) in zip(is_duplicate, records):
                    if not is_duplicate:
                        edited_records.append(record)

                logger.info('Candidate for deduplication: {}\nbefore: {}\n->\nafter: {}'.format(
                    record_set[0]['value'],
                    json.dumps(records, indent=2),
                    json.dumps(edited_records, indent=2)
                ))
                update_reqs.append({
                    'object_type': 'dns_record_set',
                    'object_id': record_set[0]['value'],
                    'set_updates': [
                        {
                            'path': '/spec/records',
                            'value': edited_records,
                        }
                    ]
                })

        if len(update_reqs) > 0:
            try:
                if remove_limit:
                    number_of_record_sets_to_update = min(len(update_reqs), remove_limit - total_number_of_updated_record_sets)
                else:
                    number_of_record_sets_to_update = len(update_reqs)

                if remove:
                    number_of_updated_record_sets = 0
                    transaction_id = yp_client.start_transaction(start_timestamp=timestamp)

                    batch_size = 500
                    for i in range(0, len(update_reqs), batch_size):
                        reqs = update_reqs[i:i + batch_size]
                        yp_client.update_objects(reqs, transaction_id=transaction_id)

                    yp_client.commit_transaction(transaction_id)
            except (YpTimestampOutOfRangeError, YpNoSuchTransactionError, YtResponseError):
                continuation_token = None
                timestamp = yp_client.generate_timestamp()
                continue
            else:
                if remove:
                    logger.debug('Update following dns_record_sets: {}'.format(json.dumps(update_reqs[:number_of_record_sets_to_update], indent=2)))
                logger.info('Number of dns_record_sets with duplicates in batch: {}'.format(number_of_record_sets_to_update))
                logger.info('Number of duplicates in batch: {}'.format(number_of_duplicates))

                total_number_of_duplicates += number_of_duplicates
                total_number_of_updated_record_sets += number_of_record_sets_to_update

        if len(objects['results']) < limit or (remove_limit and total_number_of_updated_record_sets >= remove_limit):
            break

        last_seen_id = objects['results'][-1][0]['value']

        time.sleep(delay)

    logger.info('Total number of dns_record_sets with duplicates: {}'.format(total_number_of_updated_record_sets))
    logger.info('Total number of duplicates: {}'.format(total_number_of_duplicates))

    return total_number_of_duplicates, total_number_of_updated_record_sets
