import os
import json
import logging
import pymongo
import gzip
import msgpack

from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.errors import SandboxTaskUnknownError
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import SandboxStringParameter
from sandbox.projects import resource_types


class GencfgHostsDataUrl(SandboxStringParameter):
    name = 'hosts_data_url'
    description = 'hosts data url'
    default_value = 'arcadia:/arc/trunk/data/gencfg_db/hardware_data/hosts_data'


class MongoUri(SandboxStringParameter):
    name = 'mongo_uri'
    description = 'mongo uri'
    default_value = 'iva1-0934.search.yandex.net,iva1-0951.search.yandex.net,myt0-4012.search.yandex.net,sas1-1330.search.yandex.net,sas1-3002.search.yandex.net'


class UpdateHostsData(SandboxTask):
    """
        Update hosts data
    """
    type = 'UPDATE_HOSTS_DATA'

    input_parameters = [
        GencfgHostsDataUrl,
        MongoUri
    ]

    def on_execute(self):
        Arcadia.export(self.ctx.get(GencfgHostsDataUrl.name), 'hosts_data')
        if not os.path.exists('hosts_data'):
            raise SandboxTaskUnknownError('Failed to download hosts_data from svn')
        hosts = json.load(open('hosts_data'))
        logging.info('Total %s hosts', len(hosts))

        coll = get_mongo_collection(self.ctx.get(MongoUri.name))
        topology_path = self.path('topology.msgpack.gz')
        with gzip.open(topology_path, 'wb') as stream:
            for i, host in enumerate(hosts):
                if i % 1000 == 0:
                    logging.info('%s %s', i, host)

                invnum = host['invnum'] or host['name']
                coll.update(
                    {'invnum': invnum},
                    host,
                    upsert=True,
                )

                stream.write(msgpack.packb(host))

        self.create_resource(
            description='gencfg hosts data',
            resource_path=topology_path,
            resource_type=resource_types.GENCFG_HOSTS_DATA,
            arch='any'
        )


def get_mongo_collection(uri):
    return pymongo.MongoReplicaSetClient(
        uri,
        connectTimeoutMS=500,
        replicaSet='heartbeat_mongodb_c',
        w='majority',
        wtimeout=5000,
        read_preference=pymongo.ReadPreference.SECONDARY_PREFERRED,
    )['topology']['hosts']


__Task__ = UpdateHostsData
