from sandbox.sandboxsdk import environments
from sandbox import sdk2


class LinkInfo:
    def __init__(self, link_path, target_path):
        self.target_path = target_path
        self.link_path = link_path


class LayersReplicatorTask(sdk2.Task):
    class Parameters(sdk2.Task.Parameters):
        meta_cluster = sdk2.parameters.String(
            'Meta YT cluster',
            default="hume",
            required=True,
        )
        yt_token_vault_name = sdk2.parameters.String(
            'YT token vault name',
            required=True,
        )
        yt_directories = sdk2.parameters.List(
            'YT path layer locations',
            sdk2.parameters.String,
            default=['//home/arapova/geobase_layers', '//home/arapova/porto_layers'],
            required=True,
        )
        target_clusters = sdk2.parameters.List(
            'YT target clusters for layers data replication',
            sdk2.parameters.String,
            default=['freud'],
            required=True,
        )

    class Requirements(sdk2.Task.Requirements):
        environments = (
            environments.PipEnvironment("yandex-yt"),
            environments.PipEnvironment("yandex-yt-yson-bindings-skynet"),
            environments.PipEnvironment('yandex-yt-transfer-manager-client'),
        )

    @staticmethod
    def _get_yt_client(cluster, token):
        from yt.wrapper import YtClient
        return YtClient(cluster, token=token)

    @staticmethod
    def get_all_data_from_cluster_from_directory(client, directory):
        all_data = client.list(directory, attributes=['type', 'target_path'])
        all_files = ["{}/{}".format(directory, file)
                     for file in all_data if file.attributes['type'] == 'file']
        all_links = [LinkInfo("{}/{}".format(directory, file), file.attributes['target_path'])
                     for file in all_data if file.attributes['type'] == 'link']
        return all_files, all_links

    @staticmethod
    def update_link_in_cluster(target_client, all_links):
        for link_info in all_links:
            if target_client.exists(link_info.target_path):
                LayersReplicatorTask.update_link(target_client, link_info.link_path, link_info.target_path)

    @staticmethod
    def update_link(client, link_path, target_path):
        client.link(target_path, link_path, force=True)

    def on_execute(self):
        oauth_token = sdk2.Vault.data(self.Parameters.yt_token_vault_name)

        from yt.transfer_manager.client import TransferManager
        tm = TransferManager(token=oauth_token)

        meta_cluster_client = self._get_yt_client(self.Parameters.meta_cluster, oauth_token)
        target_clusters_clients = dict()

        for cluster in self.Parameters.target_clusters:
            target_clusters_clients[cluster] = self._get_yt_client(cluster, oauth_token)

        for directory in self.Parameters.yt_directories:
            all_files_in_meta_cluster, all_links_in_meta_cluster = self.get_all_data_from_cluster_from_directory(
                meta_cluster_client,
                directory,
            )

            for cluster in self.Parameters.target_clusters:
                client_for_cluster = target_clusters_clients[cluster]
                if not client_for_cluster.exists(directory):
                    client_for_cluster.mkdir(directory)

                all_files, _ = self.get_all_data_from_cluster_from_directory(client_for_cluster, directory)

                for filename_in_meta_cluster in all_files_in_meta_cluster:
                    if filename_in_meta_cluster not in all_files:
                        tm.add_tasks(
                            source_cluster=self.Parameters.meta_cluster,
                            source_pattern=filename_in_meta_cluster,
                            destination_cluster=cluster,
                            destination_pattern=filename_in_meta_cluster,
                            include_files=True,
                            sync=True,
                            params={"yt_tmp_dir": "//porto_layers"}
                        )
                for filename_in_cluster in all_files:
                    if filename_in_cluster not in all_files_in_meta_cluster:
                        client_for_cluster.remove(filename_in_cluster)

                self.update_link_in_cluster(client_for_cluster, all_links_in_meta_cluster)
