from __future__ import absolute_import

import json
import logging
import os
from sandbox import sdk2

from datetime import datetime
from sandbox.projects.collections.mixins import (
    ExtractDumpMixin,
    YasmReportable,
    MongoDBMixin,
    FetchDumpMixin,
    _wait_all,
)
from sandbox.projects.collections import resources
from sandbox.projects.collections.CollectionsHitmanLauncher import HitmanProcessLaunchTask
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sdk2 import parameters
from sandbox.sdk2.helpers import subprocess
from sandbox.common.types import task as sandbox_task
from sandbox.common.types import notification as sandbox_notification


PODBORKI = 'podborki'
PDB_CG_DATA = 'pdb-cg-data'
PDB_ADMIN = 'pdbadmin'
PDB_ADMIN_TEST = 'pdbadmintest'
TEST = 'test'
PROD = 'prod'

SORTED_BY_ID = {
    'board',
    'card',
    'card_comment',
    'card_like'
    'channel',
    'channel_container',
    'competition',
    'complaint',
    'event',
    'recipe',
    'subscription',
    'user',
    'user_feed',
    'vertical',
    'user_container',
}

CUSTOM_SORT_ORDER = {
}

TEST_COLLECTIONS = [
    'action', 'admin', 'banned_md5', 'board', 'card', 'card_comment', 'fotki_content_index', 'hidden_boards',
    'hidden_cards', 'pool', 'state', 'wizard_request'
]

DEFAULT_COLLECTIONS = {
    PODBORKI: [
        'board', 'card', 'card_comment', 'card_like', 'channel', 'channel_container', 'competition', 'event', 'recipe',
        'subscription', 'user', 'vertical', 'krypta_interest', 'user_container', 'object_answer', 'ballot', 'poll',
        'vote'
    ],
    PDB_CG_DATA: ['features-images', 'hidden_rec_board', 'hidden_rec_channel', 'promoted_channels'],
    PDB_ADMIN_TEST: TEST_COLLECTIONS,
    PDB_ADMIN: TEST_COLLECTIONS,
}
OLD_COLLECTIONS = {'card_like', 'user_container'}

YDB_CONVERTED_DUMPS_BASEDIR = '//home/collections-backups/ydb-converted'
YDB_COLLECTIONS = {
    PROD: {
        PODBORKI: 'ru/collections/production/collections',
    },
    TEST: {
        PODBORKI: 'ru-prestable/collections/testing/collections',
    },
}

FROM_YDB_TABLE_PREFIX = '__from_ydb'
SUFFIX_OLD = '_old'


class CopyCollectionsMongodbDumpToYt(sdk2.Task, ExtractDumpMixin, YasmReportable, MongoDBMixin, FetchDumpMixin):
    """ Create collections backup from encrypted dump of mongodb. """

    class Requirements(sdk2.Task.Requirements):
        environments = (
            PipEnvironment("yandex-yt-yson-bindings-skynet"),
            PipEnvironment("yandex-yt-transfer-manager-client"),
            PipEnvironment('yandex-yt', version='0.9.35'),
        )

    class Parameters(sdk2.Task.Parameters):
        mongo_dump_id = parameters.Resource(
            'Resource with mongo dump',
            resource_type=resources.CollectionsMongodbDump,
        )
        components = parameters.Dict(
            'rb torrents',
        )
        mongo_id = parameters.Resource(
            'Resource with mongodb',
            resource_type=resources.PdbMongodb,
            required=True,
        )
        gpg_key_owner = parameters.String(
            'Gpg key owner',
            required=True,
        )
        dump_converter = parameters.Resource(
            'Resource with dump converter',
            resource_type=resources.CollectionsDumpConverter,
            required=True,
        )
        yt_base_directory = parameters.String(
            'Yt base directory',
            required=True,
        )
        yt_proxy = parameters.String(
            'Yt proxy',
            required=True,
            default='arnold',
        )
        additional_yt_destinations = parameters.Dict(
            'additional Yt destinations (cluster: states to keep)',
            default={'hahn': 2},
        )
        database_name = parameters.String(
            'Database to dump',
            value=PODBORKI,
            choices=[
                ('Data', PODBORKI),
                ('CG Data', PDB_CG_DATA),
                ('Admin test', PDB_ADMIN_TEST),
                ('Admin', PDB_ADMIN),
            ],
        )
        monitoring_server_host = parameters.String(
            'Monitoring server',
            default='monit.n.yandex-team.ru',
        )
        service_name = parameters.String(
            'Service name',
            required=False,
            default='',
        )
        env = parameters.String(
            'Environment that launched this task (test, prod)',
            required=False,
            # for backward compatibility
            default=None,
        )
        with parameters.RadioGroup('Dump type') as dump_type:
            dump_type.values.bson = dump_type.Value('bson', default=True)
            dump_type.values.files = dump_type.Value('files')
        timestamp = parameters.Float(
            'Dump creation timestamp',
        )

    def on_save(self):
        # analytics team use only //home/collections-backups/ydb/eu/collections/production/collections
        if self.Parameters.database_name == PODBORKI and self.Parameters.env == PROD:
            self.Parameters.notifications += [
                sdk2.Notification(
                    [
                        sandbox_task.Status.FAILURE,
                        sandbox_task.Status.SUCCESS,
                        sandbox_task.Status.Group.BREAK,
                    ],
                    ['collections-dumps@yandex-team.ru'],
                    sandbox_notification.Transport.EMAIL,
                ),
            ]

    @property
    def _database(self):
        return self.Parameters.database_name

    @property
    def _service(self):
        return self.Parameters.service_name

    def dir_table(self, path):
        from yt.wrapper import ypath_split
        return ypath_split(path)

    def _dbs_with_collections(self):
        if self.Parameters.components:
            mapping = {}
            for key in self.Parameters.components.iterkeys():
                db, collection = key.split('.', 1)
                if db in mapping:
                    mapping[db].append(collection)
                else:
                    mapping[db] = [collection]
            return mapping
        else:
            return {
                self._database: DEFAULT_COLLECTIONS[self._database]
            }

    def _get_type(self, db_name, _type):
        if db_name == PODBORKI:
            if _type in [
                'board', 'card', 'card_like', 'channel', 'channel_container',
                'event', 'recipe', 'subscription', 'user',
                'user_feed', 'vertical'
            ]:
                return _type
            return "_raw"
        elif db_name == PDB_CG_DATA:
            return "_raw"
        elif db_name in (PDB_ADMIN, PDB_ADMIN_TEST):
            if _type == 'pool':
                return 'pool'
            return "_raw"
        raise Exception("Unknown database")

    def get_dump_yt_path(self, db_name):
        from yt.wrapper import ypath_join
        current_time = (  # local time isn't great, but it's there for backward compatibility, should probably replace with explicit timezone
            datetime.fromtimestamp(self.Parameters.timestamp)
            if self.Parameters.timestamp else
            datetime.now()  # kept as is for backward compatibility
        ).isoformat()
        return ypath_join(
            self.Parameters.yt_base_directory,
            db_name,
            current_time,
        )

    def get_ydb_dump_yt_path(self, db_name):
        from yt.wrapper import ypath_join
        return ypath_join(
            YDB_CONVERTED_DUMPS_BASEDIR,
            db_name,
            'latest',
        )

    def latest_dump_yt_path(self, db_name):
        from yt.wrapper import ypath_join
        return ypath_join(
            self.Parameters.yt_base_directory,
            db_name,
            'latest',
        )

    def get_collection_dump_path(self, base_path, _type, consider_migration=True):
        from yt.wrapper import ypath_join
        path = ypath_join(base_path, _type)
        if consider_migration and _type in OLD_COLLECTIONS:
            path = ''.join([path, SUFFIX_OLD])
        return path

    def find_tables(self, client, path):
        """
        :param client: YtClient
        :param path:
        :return: path (it can be symlink), real path to table
        """
        if not client.exists(path):
            logging.info('Path %s does not exist', path)
            return
        tables = client.search(
            root=path,
            node_type=['table'],
            attributes=['path'],
        )
        for table in tables:
            path = str(table)
            # path and real path
            yield (path, table.attributes['path'])

    def _launch_index_builder(self):
        process_ids = []
        if self.Parameters.service_name == 'pdb_mongo_testing':
            process_ids = ['collections-ferryman-test', 'collections_standalone_test2', 'collections-suggest-test5']
        elif self.Parameters.service_name == 'pdb_mongodb_data_production':
            process_ids = ['collections-ferryman-prod', 'collections_standalone_prod2', 'collections-suggest-prod', 'unified-data-source-prod']
        for process_id in process_ids:
            subtask = HitmanProcessLaunchTask(
                self,
                description='launch index builder'
            )
            subtask.Parameters.process_id = process_id
            subtask.save().enqueue()

    def on_enqueue(self):
        sdk2.Task.on_enqueue(self)

    def on_execute(self):
        self.task_dir = os.getcwd()

        self._extract_dump(self._fetch_dump())

        mongo_path = str(
            sdk2.ResourceData(
                sdk2.Resource[self.Parameters.mongo_id]
            ).path
        )
        converter_path = str(
            sdk2.ResourceData(
                sdk2.Resource[self.Parameters.dump_converter]
            ).path
        )

        db_dump_path = self._get_db_dump_path()

        yt_token = sdk2.Vault.data('YASAP', 'yt-hahn-token')
        yt_proxy = self.Parameters.yt_proxy

        environment = os.environ.copy()
        environment['YT_TOKEN'] = yt_token
        environment['YT_PROXY'] = yt_proxy
        environment['YT_LOG_LEVEL'] = 'DEBUG'

        from yt.wrapper import YtClient
        from yt.wrapper import mkdir, run_sort, Transaction
        import yt.yson as yson

        _yt = YtClient(
            proxy=yt_proxy,
            token=yt_token,
        )

        if self.Parameters.dump_type == 'files':
            mongo_port = self._start_mongo(mongo_path, db_dump_path, cache_size=self._recommended_cache_size()/6)

        dbs_with_collections = self._dbs_with_collections()
        dump_yt_paths = {db_name: self.get_dump_yt_path(db_name) for db_name in dbs_with_collections.iterkeys()}

        with Transaction(client=_yt) as tx:
            with sdk2.helpers.ProcessLog(
                self,
                logging.getLogger(
                    'yt_dump'
                ),
            ) as pl:
                writers = []
                for db_name, types in dbs_with_collections.iteritems():
                    dump_yt_path = dump_yt_paths[db_name]
                    mkdir(dump_yt_path, recursive=True, client=_yt)
                    for _type in types:
                        schema = subprocess.Popen(
                            [
                                os.path.join(converter_path, "convert_dump"),
                                '--type',
                                self._get_type(db_name, _type),
                                '--schema',
                            ],
                            stdout=subprocess.PIPE,
                            stderr=pl.stderr,
                        )

                        out, err = schema.communicate()
                        schema = json.loads(out)
                        schema = yson.YsonList(schema)
                        schema.attributes["strict"] = False

                        if self.Parameters.dump_type == 'bson':
                            export = subprocess.Popen(
                                [
                                    os.path.join(mongo_path, "bin", "bsondump"),
                                    os.path.join(db_dump_path, db_name, "{}.bson".format(_type)),
                                ],
                                stdout=subprocess.PIPE,
                                stderr=pl.stderr,
                            )
                        else:
                            export = subprocess.Popen(
                                [
                                    os.path.join(mongo_path, "bin", "mongoexport"),
                                    "--port", str(mongo_port),
                                    "--db", db_name,
                                    "--collection", _type,
                                ],
                                stdout=subprocess.PIPE,
                                stderr=pl.stderr,
                            )

                        converter = subprocess.Popen(
                            [
                                os.path.join(converter_path, "convert_dump"),
                                '--type',
                                self._get_type(db_name, _type),
                            ],
                            stdin=export.stdout,
                            stdout=subprocess.PIPE,
                            stderr=pl.stderr,
                        )
                        writer = subprocess.Popen(
                            [
                                "yt",
                                "--tx",
                                tx.transaction_id,
                                "write",
                                "--table",
                                "<schema={schema}>{path}".format(
                                    schema=yson.dumps(schema),
                                    path=self.get_collection_dump_path(dump_yt_path, _type),
                                ),
                                "--format",
                                "<encode_utf8=%false>json",
                                "--config",
                                "{write_parallel={enable=%true;unordered=%true;}}",
                                "--table-writer",
                                "{{max_row_weight={};}}".format(17*1024*1024),
                            ],
                            stdin=converter.stdout,
                            stdout=pl.stdout,
                            stderr=pl.stderr,
                            env=environment,
                        )
                        writers.append(writer)
                _wait_all(writers)

            sorters = []
            for db_name, types in dbs_with_collections.iteritems():
                dump_yt_path = dump_yt_paths[db_name]
                for _type in types:
                    if _type in SORTED_BY_ID:
                        sort_by = 'id'
                    elif _type in CUSTOM_SORT_ORDER:
                        sort_by = CUSTOM_SORT_ORDER[_type]
                    else:
                        continue
                    sorter = run_sort(
                        self.get_collection_dump_path(dump_yt_path, _type),
                        sort_by=sort_by,
                        client=_yt,
                        sync=False,
                    )
                    sorters.append(sorter)

            _wait_all(sorters)

            # make symlinks to every converted ydb dump from base mongo dump paths for backward compatibility
            for db_name, dump_yt_path in dump_yt_paths.items():
                logging.info('Start work with mongo %s %s', db_name, dump_yt_path)
                ydb_base = YDB_COLLECTIONS.get(self.Parameters.env, {}).get(db_name)
                if ydb_base is None:
                    logging.info('Mongo table %s doesn\'t match to ydb', db_name)
                    continue
                logging.info('Found ydb db %s for mongo %s', ydb_base, db_name)
                ydb_dump_path = self.get_ydb_dump_yt_path(ydb_base)
                logging.info('Found ydb path %s', ydb_dump_path)
                for _, ydb_collection_path in self.find_tables(client=_yt, path=ydb_dump_path):
                    logging.info('Found real yt table for ydb dump %s', ydb_collection_path)
                    _, collection = self.dir_table(ydb_collection_path)
                    logging.info('Found ydb collection %s', collection)
                    # copy ydb dump to original collection path
                    path = self.get_collection_dump_path(dump_yt_path, collection, consider_migration=False)
                    # don't rewrite existed mongo tables
                    # it should copy to original path after disabling mongo imports
                    if _yt.exists(path):
                        path = '_'.join([path, FROM_YDB_TABLE_PREFIX])
                    _yt.copy(source_path=ydb_collection_path, destination_path=path)

            for db_name in dbs_with_collections.iterkeys():
                self._ensure_latest_link(_yt, db_name, dump_yt_paths[db_name])

        for db_name in dbs_with_collections.iterkeys():
            self._report_lag(
                "collections_{}_copy_{}_to_yt".format(
                    self._service, db_name,
                ),
            )

        self._launch_index_builder()

        if PODBORKI in dbs_with_collections:
            db_name = PODBORKI
            dump_yt_path = dump_yt_paths[db_name]
            for dest_proxy, states_count in self.Parameters.additional_yt_destinations.iteritems():
                self._copy_to_other_cluster(db_name, yt_token, yt_proxy, dest_proxy, dump_yt_path, states_count)

    def _copy_to_other_cluster(self, db_name, yt_token, src_cluster, dst_cluster, dump_yt_path, states_count):
        from yt.transfer_manager.client import TransferManager
        from yt.wrapper import YtClient, ypath_dirname, ypath_join

        transfer_client = TransferManager(token=yt_token)
        transfer_client.add_tasks(source_cluster=src_cluster,
                                  source_pattern=dump_yt_path,
                                  destination_cluster=dst_cluster,
                                  destination_pattern=dump_yt_path,
                                  sync=True)

        yt_client = YtClient(proxy=dst_cluster, token=yt_token)
        self._ensure_latest_link(yt_client, db_name, dump_yt_path)

        # remove old states
        dir_name = ypath_dirname(dump_yt_path)
        states = yt_client.list(dir_name, absolute=False)
        try:
            states.remove('latest')
        except ValueError:
            pass
        states_to_remove = sorted(states, reverse=True)[int(states_count):]
        for state in states_to_remove:
            yt_client.remove(ypath_join(dir_name, state), recursive=True)

    def _create_link(self, yt_client, target_path, link_path):
        from yt.wrapper import link
        logging.info('Make link from %s to %s', target_path, link_path)
        link(target_path=target_path, link_path=link_path, client=yt_client, force=True, recursive=True)

    def _ensure_latest_link(self, _yt, db_name, dump_yt_path):
        from yt.wrapper import YtHttpResponseError
        from yt.wrapper import link, ypath_join
        latest_dump_yt_path = self.latest_dump_yt_path(db_name)
        try:
            if dump_yt_path < _yt.get(ypath_join(latest_dump_yt_path, '@path')):
                return  # not latest
        except YtHttpResponseError as e:
            if not e.is_resolve_error():
                raise
        self._create_link(target_path=dump_yt_path, link_path=latest_dump_yt_path, yt_client=_yt)


__TASK__ = CopyCollectionsMongodbDumpToYt
