import math
from datetime import timedelta
import logging
from multiprocessing.dummy import Pool as ThreadPool

import json

from sandbox.sandboxsdk import environments
from sandbox import sdk2
from datetime import datetime

from sandbox.projects.cloud.analytics.common.utils import ISO_FORMAT
from sandbox.projects.cloud.analytics.common.analytics_task import AnalyticsTask
from sandbox.projects.cloud.analytics.common.resource import MRTableResource


class ThreadErrors(Exception):
    def __init__(self, errors):
        self.errors = errors

    def __str__(self):
        return '\n'.join([str(e) for e in self.errors])


class CloudIamCloudFoldersToYT(AnalyticsTask):
    """ Task to import data from IAM folders to YT for cloud analytics"""
    CLEANUP_INTERVAL_DAYS = 3

    class Requirements(AnalyticsTask.Requirements):
        # TODO(syndicut): Use prebuilt wheels here
        environments = (
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet'),
            environments.PipEnvironment('requests'),
        )

    class Parameters(AnalyticsTask.Parameters):
        dst_cluster = sdk2.parameters.String(
            'Destination YT cluster',
            default='hahn',
            required=True
        )
        cloud_folders_dst_yt_prefix = sdk2.parameters.String(
            'Destination YT prefix for CloudFolders tables',
            default='//home/cloud_analytics/import/iam/cloud_folders/1h',
            required=True
        )
        cloud_creators_table = sdk2.parameters.String(
            'Cloud creators table to get cloud ids from',
            required=True
        )
        iam_api_endpoint = sdk2.parameters.String(
            'IAM api endpoint',
            default='https://identity.private-api.cloud.yandex.net:14336/v1',
            required=True
        )
        yt_token_name = sdk2.parameters.String(
            'YT Token secret name',
            default='robot-clanalytics-yt-yt-token',
            required=True
        )
        threads_count = sdk2.parameters.Integer(
            'Threads Count',
            default=10,
            required=True
        )

    def on_execute(self):
        logging.getLogger().setLevel(logging.INFO)
        import yt.wrapper as yt
        from sandbox.projects.cloud.analytics.common.iam.client import IamAPI
        from sandbox.projects.cloud.analytics.common.iam_to_yt import cloud_folders_schema

        yt.config['token'] = sdk2.Vault.data(
            self.owner,
            self.Parameters.yt_token_name
        )
        yt.config['proxy']['url'] = self.Parameters.dst_cluster

        iam_api = IamAPI(endpoint=self.Parameters.iam_api_endpoint)
        now = datetime.utcnow()
        cloud_folders_table = '/'.join([self.Parameters.cloud_folders_dst_yt_prefix, now.strftime(ISO_FORMAT)])
        folders = list()
        thread_errors = list()

        def folders_download_thread(clouds_chunk):
            logging.info("Started handling chunk")
            for cloud in clouds_chunk:
                try:
                    raw_folders = iam_api.cloud_folders(cloud['cloud_id'])
                    for raw_folder in raw_folders:
                        folders.append(
                            {
                                'cloud_id': raw_folder['cloudId'],
                                'folder_id': raw_folder['id'],
                                'folder_created_at': raw_folder['createdAt'],
                                'folder_name': raw_folder['name'],
                                'cloud_status': cloud['cloud_status']
                            }
                        )
                except Exception as e:
                    thread_errors.append(e)
                    return
            logging.info("Stopped handling chunk")

        def add_resource():
            resource = MRTableResource(
                task=sdk2.task.Task.current,
                description="MR Table",
                path="mr_table.json"
            )
            mr_table = {
                "cluster": sdk2.task.Task.current.Parameters.dst_cluster,
                "table": cloud_folders_table
            }
            mr_table = unicode(json.dumps(mr_table))
            data = sdk2.ResourceData(resource)
            data.path.write_text(mr_table)
            sdk2.ResourceData(resource).ready()

        result = yt.read_table(yt.TablePath(self.Parameters.cloud_creators_table, columns=["cloud_id", "cloud_status"]))
        result = list(result)

        num_clouds = len(result)
        logging.info("num clouds: {}".format(num_clouds))

        chunk_length = int(math.ceil(num_clouds / float(self.Parameters.threads_count)))
        logging.info("chunk length: {}".format(chunk_length))

        chunks = [result[x:x + chunk_length] for x in range(0, len(result), chunk_length)]

        pool = ThreadPool()
        pool.map(folders_download_thread, chunks)
        pool.close()
        pool.join()

        yt.create_table(
            cloud_folders_table,
            attributes={"schema": cloud_folders_schema}
        )
        yt.write_table(cloud_folders_table, folders)

        if thread_errors:
            yt.remove(cloud_folders_table)
            raise ThreadErrors(errors=thread_errors)

        add_resource()

        logging.info('Cleanup tables older than {} days'.format(self.CLEANUP_INTERVAL_DAYS))
        dst_prefixes_to_clean = [
            self.Parameters.cloud_folders_dst_yt_prefix
        ]
        for dst_prefix in dst_prefixes_to_clean:
            for table in yt.list(dst_prefix, attributes=["creation_time"]):
                table_ts = datetime.strptime(table.attributes["creation_time"], "%Y-%m-%dT%H:%M:%S.%fZ")
                if now - table_ts > timedelta(days=self.CLEANUP_INTERVAL_DAYS):
                    logging.info('Remove {}'.format(table))
                    yt.remove('/'.join([dst_prefix, table]))
