import logging

import numpy as np
import os
import yenv
import yt.wrapper as yt
from flask import current_app as app

from jafar.datasets.filters import (
    fix_invalid_timestamps, filter_item_min_install_count,
    filter_duplicates, filter_invalid_items
)
from jafar.utils.io import get_cluster
from jafar.utils.structarrays import DataFrame

logger = logging.getLogger(__name__)


def get_file_name(country):
    return '{}.tsv'.format(country).lower()


def get_table_name(table, country):
    return '{}_{}'.format(table, country).lower()


class BaseDatasetProcessor(object):
    """Base class for processors of data on YT"""
    source = None  # processor identifier
    interactions_mapper = None  # class for mapping
    yt_table_source = None  # YPath url of table to proceed
    yt_table_result = None  # YPath url of table with result
    interactions_schema = None
    interactions_dtype = None

    filters = (
        filter_duplicates, fix_invalid_timestamps,
        filter_invalid_items,
        filter_item_min_install_count
    )

    def __init__(self):
        assert self.source is not None, "Dataset source could not be None"
        self.path = os.path.join(app.config['DATASET_PATH'], self.source)
        if not os.path.exists(self.path):
            os.makedirs(self.path)

        self.yt_table_result = yt.ypath_join(app.config['YT_PATH_JAFAR'], self.source)

        yt.update_config(app.config['YT_CONFIG'])

    def get_data(self, country):
        """
        Reads data from filesystem
        :param force: whether to download or read from filesystem
        :param country: str of country to read
        :return: structarray
        """
        return self._download_table(get_table_name(self.yt_table_result, country),
                                    self.interactions_dtype)

    def _download_table(self, table_name, dtype):
        directory = os.path.join(self.path,
                                 table_name.split('/')[-1])
        path = os.path.join(directory,
                            yt.get_attribute(table_name,
                                             attribute='modification_time'))

        unicoded = [name for name, (type_, _) in np.dtype(dtype).fields.iteritems()
                    if type_.type == np.unicode_]

        if os.path.exists(path):
            logger.debug('Reading dataset from filesystem: {}'.format(path))
            df = DataFrame.from_tsv(path)
            for column in unicoded:
                df[column] = [item.decode("utf-8") for item in df[column]]
            return df.astype(dtype)

        dtype = np.dtype(dtype)
        table_name += '{%s}' % ','.join(list(dtype.names))

        def prepare_tuples(row):
            for column in unicoded:
                row[column] = row.get(column, '').decode('utf-8')
            return tuple(row[column] for column in dtype.names)

        result = DataFrame(np.array(map(prepare_tuples, yt.read_table(table_name)), dtype))
        logger.info('Read table %s: %s records', table_name, len(result))
        if yenv.type == 'development':
            if not os.path.exists(directory):  # prepare the dirs
                os.mkdir(directory)
            for p in os.listdir(directory):  # cleanup old tables
                os.remove(os.path.join(directory, p))
            try:
                result.to_tsv(path)  # trying to save to filesystem
            except Exception as e:
                logger.error('Failed to write table to filesystem: %s', e)
                if os.path.exists(path):  # clean potentially corrupted file
                    os.remove(path)

        return result

    def update_interactions(self, country):
        """
        Updates user-item interaction dataframes (this can
        be app installs, app launches or other forms of
        interaction).
        """
        logger.info("Updating %s interactions for country %s", self.source, country)
        cluster = get_cluster()
        job = cluster.job()
        stream = job.table(
            self.yt_table_source
        ).map(
            self.interactions_mapper(country),
            intensity='cpu'
        )
        for _filter in self.filters:
            stream = stream.call(_filter, job=job, country=country)

        stream.put(get_table_name(self.yt_table_result, country), schema=self.interactions_schema)
        job.run()

    def update_many(self, countries):
        for country in countries:
            self.update(country)

    def update(self, country):
        """
        Prepare data on YT.
        :return:
        """
        self.update_interactions(country)
