import datetime
import logging
import os
import pytz
import re

from django.utils import timezone

import yt.wrapper as yt

from cars.core.util import make_yt_client
from cars.settings import REFUEL as settings
from .mapper import fuel_level_extractor_mapper


LOGGER = logging.getLogger(__name__)


def localized_datetime_to_timestamp(dt):
    epoch_start = pytz.utc.localize(
        datetime.datetime(1970, 1, 1)
    )
    return int(
        (dt - epoch_start).total_seconds()
    )


def moscow_date_string_to_last_timestamp(datestring):
    date = datetime.datetime.strptime(datestring, '%Y-%m-%d').date()
    dt = pytz.timezone('Europe/Moscow').localize(
        datetime.datetime.combine(date, datetime.time(23, 59, 59))
    )
    return localized_datetime_to_timestamp(dt)


def moscow_datetime_string_to_timestamp(datestring):
    dt = pytz.timezone('Europe/Moscow').localize(
        datetime.datetime.strptime(datestring, '%Y-%m-%dT%H:%M:%S')
    )
    return localized_datetime_to_timestamp(dt)


class FuelLevelExtractor:

    STREAM_LOG_PERIOD = 300

    def __init__(self, *, yt_client, source_dir, source_stream_dir, target_dir):
        self._yt_client = yt_client
        self._source_dir = source_dir
        self._source_stream_dir = source_stream_dir
        self._target_dir = target_dir

    @classmethod
    def from_settings(cls):
        return cls(
            yt_client=make_yt_client('data'),
            source_dir=settings['refuel_extraction']['yt_source_dir'],
            source_stream_dir=settings['refuel_extraction']['yt_source_stream_dir'],
            target_dir=settings['refuel_extraction']['yt_target_dir'],
        )

    def extract(self, table_name):
        LOGGER.info('extracting fuel information from table %s', table_name)
        source_path = os.path.join(self._source_dir, table_name)
        target_path = os.path.join(self._target_dir, table_name)
        self._yt_client.run_map(fuel_level_extractor_mapper, source_path, target_path)
        self._yt_client.run_sort(target_path, sort_by='timestamp')

    def get_existing_tables(self):
        return [
            t for t in self._yt_client.list(self._target_dir)
            if self._yt_client.row_count(os.path.join(self._target_dir, t))
        ]

    def extract_all_missed(self, need_tables=None):
        if need_tables is None:
            need_tables = set(self._yt_client.list(self._source_dir))
        existing_tables = self.get_existing_tables()
        missed_tables = set(need_tables) - set(existing_tables)
        LOGGER.debug('exctracting missed tables: %s',
                     ', '.join(missed_tables))
        for table_name in missed_tables:
            self.extract(table_name)

    def delete_tables_in_dir(self, target_dir, do_not_delete_tables=None):
        if do_not_delete_tables is None:
            do_not_delete_tables = []
        tables_to_delete = [
            t for t in (set(self._yt_client.list(target_dir)) -
                        set(do_not_delete_tables))
            if re.match(r'^\d{4}-\d{2}-\d{2}$', t)
        ]
        for table_name in tables_to_delete:
            table_path = os.path.join(target_dir, table_name)
            LOGGER.info('removing table %s', table_path)
            self._yt_client.remove(table_path)

    def extract_fuel_level_for_days(self, days=30):
        need_tables = []
        current = timezone.now()
        for i in range(1, days + 1):
            current -= datetime.timedelta(days=1)
            need_tables.append(current.strftime('%Y-%m-%d'))
        self.extract_all_missed(need_tables)

        self.delete_tables_in_dir(
            self._target_dir,
            do_not_delete_tables=need_tables
        )

    def extract_fuel_level_current_stream(self):
        current_stream_table_name = 'current_stream'

        existing_tables = self.get_existing_tables()

        day_tables = [
            t for t in existing_tables
            if re.match(r'^\d{4}-\d{2}-\d{2}$', t)
        ]

        current_stream_exists = current_stream_table_name in existing_tables

        timestamps = [
            moscow_date_string_to_last_timestamp(ds)
            for ds in day_tables
        ]
        last_day_timestamp = max(timestamps) if timestamps else 0

        current_stream_path = os.path.join(
            self._target_dir, current_stream_table_name
        )
        tmp_current_stream_path = current_stream_path + '_tmp'

        if current_stream_exists:
            current_stream = list(self._yt_client.read_table(current_stream_path))
            last_timestamp = max(
                last_day_timestamp,
                current_stream[-1]['timestamp']  # not empty because of get_existing_tables
            )
        else:
            current_stream = []
            last_timestamp = last_day_timestamp

        # remove old records - based on dayly target tables
        if any(rec['timestamp'] <= last_day_timestamp for rec in current_stream):
            current_stream = [
                rec for rec in current_stream
                if rec['timestamp'] > last_day_timestamp
            ]
            self._yt_client.write_table(tmp_current_stream_path, current_stream)
            self._yt_client.move(
                tmp_current_stream_path,
                current_stream_path,
                force=True
            )

        # find source stream table names newer than last timestamp
        table_names = self._yt_client.list(self._source_stream_dir)
        fresh_table_names = [
            t for t in table_names
            if moscow_datetime_string_to_timestamp(t) + self.STREAM_LOG_PERIOD > last_timestamp
        ]

        LOGGER.info('getting refuel current stream logs from tables: {}'.format(fresh_table_names))
        if fresh_table_names:
            target_tmp_paths = []

            # create temporary stream path tables in target dir
            tracker = yt.OperationsTracker()
            for t in fresh_table_names:
                source_path = os.path.join(self._source_stream_dir, t)
                target_path = os.path.join(self._target_dir, t)
                if (self._yt_client.exists(target_path)
                        and self._yt_client.row_count(target_path)):
                    continue

                target_tmp_paths.append(target_path)
                tracker.add(
                    self._yt_client.run_map(
                        fuel_level_extractor_mapper,
                        source_path,
                        target_path,
                        sync=False,
                    )
                )
            tracker.wait_all()

            succeded_tmp_target_paths = list(set([
                os.path.join(self._target_dir, t)
                for t in self._yt_client.list(self._target_dir)
            ]) & set(target_tmp_paths))

            self._yt_client.concatenate(
                (([current_stream_path] if current_stream_exists else [])
                 + succeded_tmp_target_paths),
                tmp_current_stream_path
            )
            self._yt_client.run_sort(tmp_current_stream_path, sort_by='timestamp')
            self._yt_client.move(
                tmp_current_stream_path,
                current_stream_path,
                force=True
            )
            for t in succeded_tmp_target_paths:
                self._yt_client.remove(t)
