import logging
import os
from collections import defaultdict

import pandas as pd
import yenv
import yt.wrapper as yt
from flask import current_app as app
from nile.api.v1 import clusters
from nile.files import DevelopPackage, Package

from jafar.storages.memmap.storage import parse_version_info, get_current_version_info

logger = logging.getLogger(__name__)


def cleanup_files_local(folder_path, limit):
    """
    If `folder_path` contains more files then
    `limit`, removes extra files starting from
    (supposedly) the oldest. Files are assumed
    to be sortable by time/relevance
    """
    logger.info('Cleaning up %s', folder_path)
    files = os.listdir(folder_path)
    if len(files) > limit:
        remove_files = sorted(files)[:-limit]
        for file in remove_files:
            logger.info("Cleaning up obsolete file: %s", file)
            os.remove(os.path.join(folder_path, file))
    logger.info("Done")


def get_cluster(packages=None, backend='yt'):
    if yenv.type == 'development':
        cluster_packages = [
            DevelopPackage(os.path.join(app.config['BASE_DIR'], 'jafar_yt')),
        ]
    else:
        cluster_packages = [
            Package('jafar_yt', path=app.config['NILE_WHEELS_DIR'])
        ]

    dependencies = [
        'Pillow',
        'beautifulsoup4',
        'colorwiz',
        'numpy',
        'pymongo',
        'yandex_http_geobase',
        'PyStemmer',
        'langdetect',
    ]
    cluster_packages.extend(Package(name, path=app.config['NILE_WHEELS_DIR']) for name in dependencies)

    for package in packages or []:
        cluster_packages.append(package)
    cluster = clusters.yql.YQLProduction if backend == 'yql' else clusters.yt.YT

    return cluster(
        proxy=app.config['YT_PROXY'],
        token=app.config['YT_TOKEN'],
        pool=app.config['YT_POOL']
    ).env(
        packages=cluster_packages,
        templates=dict(
            tmp_root=app.config['YT_PATH_TMP']
        )
    )


def read_yt_table_as_dataframe(table):
    last_indices = defaultdict(int)
    columns = defaultdict(list)
    for index, row in enumerate(yt.read_table(table, format='json')):
        for name, value in row.iteritems():
            empty_cells_number = index - last_indices[name] - 1
            if empty_cells_number > 0:
                columns[name].extend([None] * empty_cells_number)

            columns[name].append(value)
            last_indices[name] = index

    for name, last_index in last_indices.iteritems():
        if last_index < index:
            columns[name].extend([None] * (index - last_index))

    return pd.DataFrame(columns)


def qloud_snapshot_is_ready():
    """
    Pre-packaged snapshot should be the same as the snapshot
    currently using in storage. Otherwise (storage is empty,
    pre-packaged snapshot is absent, pre-packaged snapshot version
    doesn't match the one currently used), returns False
    """
    if app.config['DEPLOYMENT_TYPE'] != app.config['QLOUD']:
        return False
    if not app.config['QLOUD_PREPACKAGED_SNAPSHOT_PATH']:
        return False
    try:
        return parse_version_info(
            os.path.basename(os.path.realpath(app.config['QLOUD_PREPACKAGED_SNAPSHOT_PATH']))
        ) == get_current_version_info()
    except (ValueError, OSError, IOError):
        # could be parse error, file missing, or permission denied
        return False


def get_apps_table():
    return yt.list(app.config['YT_PATH_PARSED_ITEMS'], absolute=True)[-1]
