import abc
import os
import re
from yt.wrapper import ypath_join
from datacloud.config.yt import MODELS_FOLDER, DATACLOUD_FOLDER
from datacloud.dev_utils.logging.logger import get_basic_logger
from datacloud.dev_utils.yt import yt_utils, yt_files
from datacloud.dev_utils.time.patterns import RE_DAILY_LOG_FORMAT
from datacloud.dev_utils.s3.lib import s3_utils
from datacloud.dev_utils.crypta import crypta_snapshot
from datacloud.batch.lib.storage import (
    S3Storage, YtTableStorage, retrieve_tmp_file, move)
from datacloud.batch.lib.tables import(
    batch_status_table, batch_description_table)
from datacloud.batch.lib.tables.batch_description_table import (
    StorageType, TaskType, SubscriptionType)
from datacloud.batch.lib.tables.batch_status_table import Status
from datacloud.batch.lib import subscription
from datacloud.batch.lib import validator
from datacloud.batch.lib import mr


logger = get_basic_logger(__name__)
YT_BATCH_FOLDER = ypath_join(DATACLOUD_FOLDER, 'batch')

# for Py2/3
ABC = abc.ABCMeta('ABC', (object,), {'__slots__': ()})


class TaskInfo(object):
    def __init__(self, task_id, key, partner_id, task_type, storage_type,
                 subscription_type,
                 storage_root,
                 task_data,
                 log_data,):
        self.task_id = task_id
        self.key = key
        self.partner_id = partner_id
        self.task_type = task_type
        self.storage_type = storage_type
        self.subscription_type = subscription_type
        self.storage_root = storage_root
        self.task_data = task_data
        self.log_data = log_data


# # # Checkers
class BaseNewTaskChecker(ABC):
    @abc.abstractmethod
    def check(self, descr):
        pass


class BatchChecker(BaseNewTaskChecker):
    def __init__(self, yt_client):
        self._yt_client = yt_client

    def check(self, descr):
        task_id = descr['task_id']
        storage = _get_storage(descr['storage'], descr['partner_id'])
        status_table = batch_status_table.BatchStatusTable(yt_client=self._yt_client)
        old_items = status_table.get_existing_input_names(task_id)
        path = os.path.join(descr['path'], 'input')
        for input_name in subscription.NewFileTrigger()(storage, path, old_items):
            yield task_id, input_name, {'input_name': input_name}
# # # End checkers


# # # Jobs
class BaseJob(ABC):
    @abc.abstractmethod
    def validate_and_prepare(self, task):
        pass

    @abc.abstractmethod
    def perform(self, task, date_str=None):
        pass


class ScoringJob(BaseJob):
    def __init__(self, yt_client):
        self._yt_client = yt_client

    def validate_and_prepare(self, task):
        storage = _get_storage(task.storage_type, task.partner_id)
        input_storage_path = os.path.join(
            task.storage_root, 'input', task.log_data['input_name'])

        logger.info('Storage path is: {}'.format(input_storage_path))

        if not storage.exists(input_storage_path):
            raise 'NOT NOW, not exists'
            return Status.LOST

        local_path = retrieve_tmp_file(storage, input_storage_path)
        validation_status, problems = _validate_file(local_path)

        if validation_status is False:
            raise 'NOT NOW, failed validation'
            return Status.INVALID

        _upload_on_yt(self._yt_client, task.task_id, task.key, local_path)
        if os.path.exists(local_path):
            os.remove(local_path)
        return Status.SCHEDULED

    def perform(self, task, date_str=None):
        yt = self._yt_client
        partner_id = task.task_data['score_partner_id']
        score_name = task.task_data['score_name']
        score_folder = yt_utils.ypath_join(MODELS_FOLDER, partner_id, score_name)
        date_str = date_str or \
                   yt_utils.get_last_table(score_folder, yt).split('/')[-1]

        basename = _get_basename(task.key)
        input_table = yt_utils.ypath_join(_get_clear_yt_folder(task.task_id),
                                          basename)
        output_folder = _get_output_yt_folder(task.task_id)
        out_table = yt_utils.ypath_join(output_folder, basename)
        yt_utils.create_folders([output_folder], yt)

        snapshot = crypta_snapshot.get_snapshot(yt, date_str)
        score_table = yt_utils.ypath_join(score_folder, date_str)
        mr.id_value_to_score(yt, input_table, score_table, snapshot, out_table)
        return Status.WAIT_UPLOAD
# # # End Jobs


# # # Start uploaders
class BaseUploader(ABC):
    @abc.abstractmethod
    def upload(self, task):
        pass


class BatchUploader(ABC):

    def __init__(self, yt_client):
        self._yt_client = yt_client

    def upload(self, task):
        basename = _get_basename(task.key)
        table = yt_utils.ypath_join(_get_output_yt_folder(task.task_id), basename)
        assert yt_utils.check_table_exists(table, self._yt_client)
        input_name = task.log_data['input_name']
        result_storage_path = os.path.join(task.storage_root, 'result', input_name)
        move(YtTableStorage(self._yt_client), table,
             _get_storage(task.storage_type, task.partner_id),
             result_storage_path)
        return Status.DONE
# # # End uploaderd


def worker_factory(yt_client, task_type, storage_type, subscription_type):
    assert isinstance(task_type, TaskType)
    assert isinstance(storage_type, StorageType)
    assert isinstance(subscription_type, SubscriptionType)

    class Worker(object):
        def check_new(self, descr):
            for item in self.checker.check(descr):
                yield item

        def validate_and_prepare(self, task):
            return self.job.validate_and_prepare(task)

        def do(self, task):
            return self.job.perform(task)

        def upload_result(self, task):
            return self.uploader.upload(task)

    worker = Worker()
    if subscription_type is SubscriptionType.BATCH:
        worker.checker = BatchChecker(yt_client)
        worker.uploader = BatchUploader(yt_client)
    else:
        raise 'SubscriptionType {} is not implemented'.format(subscription_type)

    if task_type is TaskType.SCORING:
        worker.job = ScoringJob(yt_client)
    else:
        raise 'TaskType {} is not implemented'.format(task_type)

    return worker


class BatchProcessor(object):
    def __init__(self, yt_client):
        self._yt_client = yt_client
        self._status_table = batch_status_table.BatchStatusTable(
            yt_client=yt_client)
        self._description_table = batch_description_table.BatchDescriptionTable(
            yt_client=yt_client)

    def _set_status(self, task_id, input_name, status):
        assert isinstance(status, batch_status_table.Status)
        logger.info('Set [{}] {} {}'.format(status, task_id, input_name))
        self._status_table.update_status(task_id, input_name, status)

    def _get_task_info(self, task_id, key):
        description = self._get_description(task_id)
        status_rec = self._status_table.get(task_id, key)
        storage_root = description['path']
        return TaskInfo(
            task_id=task_id,
            key=key,
            partner_id=description['partner_id'],
            task_type=description['task_type'],
            storage_type=description['storage'],
            subscription_type=description['subscription_type'],
            storage_root=storage_root,
            task_data=description['data'],
            log_data=status_rec['data'],
        )

    def _get_description(self, task_id):
        return self._description_table.get_description(task_id)

    def _get_worker(self, task_id):
        # TODO: Get this from task_id
        return worker_factory(self._yt_client,
                              TaskType.SCORING,
                              StorageType.S3,
                              SubscriptionType.BATCH)

    def step_find_new(self, task_id):
        """ Chech storage and add tasks [NEW]"""
        logger.info('Check new {}'.format(task_id))
        descr = self._get_description(task_id)
        worker = self._get_worker(task_id)
        for task_id, key, data in worker.check_new(descr):
            self._status_table.add(task_id, key, data)

    def step_prepare(self, task_id, key):
        """ [NEW] Download, Validate, Upload on Yt -> [LOST/INVALID/SCHEDULED]"""
        logger.info('Step NEW {} {}'.format(task_id, key))
        task = self._get_task_info(task_id, key)
        status = self._get_worker(task_id).validate_and_prepare(task)
        self._set_status(task_id, key, status)

    def step_do_work(self, task_id, key):
        """ [SCHEDULED] Do real work, change [WAIT_UPLOAD]"""
        logger.info('step SCHEDULED {} {}'.format(task_id, key))
        task = self._get_task_info(task_id, key)
        status = self._get_worker(task_id).do(task)
        self._set_status(task_id, key, status)

    def step_upload(self, task_id, key):
        """ [WAIT_UPLOAD] Upload task -> [DONE]"""
        logger.info('step UPLOAD {} {}'.format(task_id, key))
        task = self._get_task_info(task_id, key)
        status = self._get_worker(task_id).upload_result(task)
        self._set_status(task_id, key, status)


def _get_raw_yt_folder(task_id):
    return yt_utils.ypath_join(YT_BATCH_FOLDER, task_id, 'input/raw')


def _get_clear_yt_folder(task_id):
    return yt_utils.ypath_join(YT_BATCH_FOLDER, task_id, 'input/clear')


def _get_output_yt_folder(task_id):
    return yt_utils.ypath_join(YT_BATCH_FOLDER, task_id, 'output')


def _get_basename(name):
    return name.split('/')[-1].split('.')[0]


# TODO: Create Validator object and move to strategy
def _validate_filename(filename):
    max_filename_length = 50
    parts = filename.split('.')
    if len(parts) != 2:
        logger.warn('Wrong filename {}'.format(filename))
        return False
    name, extention = parts
    if extention != 'csv':
        logger.warn('Wrong extention {}'.format(filename))
        return False
    if not re.match(RE_DAILY_LOG_FORMAT, name[:10]):
        logger.warn('Wrong date-prefix in filename {}'.format(filename))
        return False
    if len(filename) > max_filename_length:
        logger.warn('Too long filename {}'.format(filename))
        return False
    return True


def _validate_file(filename):
    logger.info('Start validate: {}'.format(filename))
    max_fail_rate = 0.1
    broken_rows, problems = [], []
    total_lines = 0
    with open(filename, 'r') as fh:
        for line in fh:
            total_lines += 1

    with open(filename, 'r') as fh:
        broken_rows, problems = validator.validate(fh)

    fail_rate = len(broken_rows) / total_lines
    logger.info('Fail rate: {}'.format(fail_rate))
    logger.info('Done')

    if 1 in broken_rows:
        return False, problems
    if fail_rate > max_fail_rate:
        return False, problems
    return True, None


def _upload_on_yt(yt_client, task_id, input_name, local_path):
    basename = _get_basename(input_name)
    raw_yt_folder = _get_raw_yt_folder(task_id)
    clear_yt_folder = _get_clear_yt_folder(task_id)
    raw_yt_path = yt_utils.ypath_join(raw_yt_folder, basename)
    clear_yt_path = yt_utils.ypath_join(clear_yt_folder, basename)

    with yt_client.Transaction():
        yt_utils.create_folders([raw_yt_folder, clear_yt_folder], yt_client)
        yt_files.upload_file(yt_client, local_path, raw_yt_path)
        storage = YtTableStorage(yt_client, schema=[
            {'name': 'external_id', 'type': 'string'},
            {'name': 'id_value', 'type': 'string'},
            {'name': 'id_type', 'type': 'string'},
        ])
        storage.store(clear_yt_path, local_path)


def _get_storage(storage_type, partner_id):
    if storage_type == 's3':
        bucket = 'r9-sample'  # TODO: Compute bucket from partner_id
        return S3Storage(s3_utils.get_client(), bucket)
    else:
        raise 'Only s3 storage is supported'
