# -*- coding: utf-8 -*-

import logging

from collections import defaultdict
import os
import re

from django.core.management.base import BaseCommand
import yt.wrapper as yt

from smarttv.alice.tv_proxy.proxy.indexer import index, IndexingError
from smarttv.alice.tv_proxy.proxy.index_messages import DeleteMessage

logger = logging.getLogger(__name__)

yt.config.set_proxy("arnold")

ts_re = re.compile(r'ts=(\d+)')
device_id_re = re.compile(r'device_id=(\w+)')


def get_latest_full_dump_table(tables_paths):
    latest_version = ''
    max_timestamp = -1
    for table_path in tables_paths:
        filename = os.path.basename(table_path)
        filename_match_full = re.match(r'^full\.(\d+)$', filename)
        if filename_match_full:
            timestamp = int(filename_match_full.group(1))
            if timestamp > max_timestamp:
                max_timestamp = timestamp
                latest_version = table_path

    if latest_version:
        return latest_version
    return None


def get_parameter_from_url(url, parameter_re):
    match_object = parameter_re.search(url)
    if match_object is None:
        return None
    return match_object.group(1)


def get_outdated(table):
    rows_without_device_id_count = 0
    rows_without_timestamp_count = 0
    max_timestamp = defaultdict(int)
    parsed_table = []
    for row in table:
        url = row['subkey']
        device_id = get_parameter_from_url(url, device_id_re)
        timestamp = get_parameter_from_url(url, ts_re)
        if device_id is None or timestamp is None:
            if device_id is None:
                rows_without_device_id_count += 1
            if timestamp is None:
                rows_without_timestamp_count += 1
            continue
        timestamp = int(timestamp)
        parsed_table.append({'url': url, 'timestamp': timestamp, 'device_id': device_id})
        max_timestamp[device_id] = max(max_timestamp[device_id], timestamp)

    outdated = []
    for row in parsed_table:
        if row['timestamp'] < max_timestamp[row['device_id']]:
            outdated.append(row['url'])

    logger.info('Rows without ts: %s', rows_without_timestamp_count)
    logger.info('Rows without device_id: %s', rows_without_device_id_count)
    return outdated


class Command(BaseCommand):
    help = 'Deletes duplicated documents with outdated timestamps from index'

    def handle(self, *args, **options):
        logger.info('Starting...')

        directory_with_tables_path = '//home/saas/ferryman-stable/smart_tv_channels/ytpull'
        tables_paths = yt.search(
            directory_with_tables_path,
            node_type=['table']
        )
        latest_full_dump_table_path = get_latest_full_dump_table(tables_paths)

        if not latest_full_dump_table_path:
            logger.warning('Table with full dump not found')
            exit(1)

        full_table = yt.read_table(latest_full_dump_table_path)

        outdated_rows = get_outdated(full_table)

        try:
            messages = [
                DeleteMessage(row) for row in outdated_rows
            ]
            index(messages)
        except IndexingError:
            logger.error('Failed to delete')
