# -*- coding: utf-8 -*-

import logging
import re

from sandbox.sandboxsdk import parameters

from sandbox.projects import resource_types
from sandbox.projects.common.search.settings import INDEX_MAIN
from sandbox.projects.common.search.settings import VideoSettings
from sandbox.projects.common import BaseLoadMediaBasesearchResources as blr
from sandbox.projects.GetVideoSearchDatabase import GetTier0Parameter

from sandbox.sdk2.helpers import subprocess as sp
import urllib2
import httplib

_RELATED = '(&|^)relev[^&]*(=|;)related(%|=|&|$|;)'
_VTOP = '(&|^)relev[^&]*(=|;)vtop(&|$|;)'
_SEARCH = '(&|^)haha=da(&|$)'
_FACTORS = '(&|^)allfctrs=da(&|$)'
_VIDEOHUB = '(&|^)relev[^&]*(=|;)vhub_request(&|$|;|=|%)'
_MODELSHUB = '(&|^)fetch_doc_data=da(&|$)'
# TODO: change after modelshub release to _MODELSHUB = '(&|^)relev[^&]*(=|;)mhub_request(&|$|;|=|%)'

_META_SOURCE_DEPLOY_VLA_RE = re.compile(r'sd://vla@vla-video-search.vla-video-platinum-base.(\d+)/yandsearch')
_META_SOURCE_DEPLOY_TIER0_VLA_RE = re.compile(r'sd://vla@vla-video-search.vla-video-tier0-base.(\d+)/yandsearch')
shard_num = 9


def _filter_meta_sources(meta_host, meta_port, base_regexp):
    logging.info('Retrieving config for {}:{}'.format(meta_host, meta_port))
    try:
        meta_url = 'http://{}:{}/yandsearch?info=getconfig'.format(meta_host, meta_port)
        data = urllib2.urlopen(meta_url).read()
        base_shards = set(int(m.group(1)) for m in base_regexp.finditer(data))
        if shard_num in base_shards:
            return True
        else:
            return False
    except (IOError, httplib.HTTPException) as e:
        logging.info('Retrieving config for {}:{} failed, {}'.format(meta_host, meta_port, e))
        return False


class MinSerpSearchQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'min_serp_search_queries'
    description = 'Minimum queries (serp seaerch)'
    default_value = 100000


class SerpSearchQueriesAttrsParameter(parameters.SandboxStringParameter):
    name = 'serp_search_queries_attributes'
    description = 'Set attrs to serp search queries (e.g. attr1=v1, attr2=v2)'
    default_value = blr._DEFAULT_ATTRIBUTES

    queries_file = 'serp_search_queries'
    queries_description = 'only serp search queries'
    queries_filter = re.compile(r"^(?:(?!{0}).)+{1}(?:(?!{0}).)+$".format(_RELATED, _SEARCH))


class MinSerpFactorsQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'min_serp_factors_queries'
    description = 'Minimum factors queries (serp)'
    default_value = 100000


class SerpFactorsQueriesAttrsParameter(parameters.SandboxStringParameter):
    name = 'serp_factors_queries_attributes'
    description = 'Set attrs to serp factors queries'
    default_value = blr._DEFAULT_ATTRIBUTES

    queries_file = 'serp_factors_queries'
    queries_description = 'only serp factors queries'
    queries_filter = re.compile(r"^(?:(?!{0}).)+{1}(?:(?!{0}).)+$".format(_RELATED, _FACTORS))


class MinRelatedSearchQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'min_related_search_queries'
    description = 'Minimum search queries (related)'
    default_value = 10000


class RelatedSearchQueriesAttrsParameter(parameters.SandboxStringParameter):
    name = 'related_search_queries_attributes'
    description = 'Set attrs to related search queries (e.g. attr1=v1, attr2=v2)'
    default_value = blr._DEFAULT_ATTRIBUTES

    queries_file = 'related_search_queries'
    queries_description = 'only related search queries'
    queries_filter = re.compile(r"{0}.*{1}|{1}.*{0}".format(_RELATED, _SEARCH))


class MinRelatedFactorsQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'min_related_factors_queries'
    description = 'Minimum factors queries (related)'
    default_value = 10000


class RelatedFactorsQueriesAttrsParameter(parameters.SandboxStringParameter):
    name = 'related_factors_queries_attributes'
    description = 'Set attrs to related factors queries'
    default_value = blr._DEFAULT_ATTRIBUTES

    queries_file = 'related_factors_queries'
    queries_description = 'only related factors queries'
    queries_filter = re.compile(r"{0}.*{1}|{1}.*{0}".format(_RELATED, _FACTORS))


class MinVideotopQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'min_videotop_queries'
    description = 'Minimum queries (videotop)'
    default_value = 0


class VideotopQueriesAttrsParameter(parameters.SandboxStringParameter):
    name = 'videotop_queries_attributes'
    description = 'Set attrs to videotop queries (e.g. attr1=v1, attr2=v2)'
    default_value = blr._DEFAULT_ATTRIBUTES

    queries_file = 'videotop_queries'
    queries_description = 'only videotop queries'
    queries_filter = re.compile(r"{0}.*{1}|{1}.*{0}".format(_VTOP, _SEARCH))


class MinVideohubQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'min_videohub_queries'
    description = 'Minimum queries (videohub)'
    default_value = 0


class VideohubQueriesAttrsParameter(parameters.SandboxStringParameter):
    name = 'videohub_queries_attributes'
    description = 'Set attrs to videohub queries (e.g. attr1=v1, attr2=v2)'
    default_value = blr._DEFAULT_ATTRIBUTES

    queries_file = 'videohub_queries'
    queries_description = 'only videohub queries'
    queries_filter = re.compile(r"{0}.*{1}|{1}.*{0}".format(_VIDEOHUB, _SEARCH))


class MinModelshubQueriesParameter(parameters.SandboxIntegerParameter):
    name = 'min_modelshub_queries'
    description = 'Minimum queries (modelshub)'
    default_value = 2000


class ModelshubQueriesAttrsParameter(parameters.SandboxStringParameter):
    name = 'modelshub_queries_attributes'
    description = 'Set attrs to modelshub queries (e.g. attr1=v1, attr2=v2)'
    default_value = blr._DEFAULT_ATTRIBUTES

    queries_file = 'modelshub_queries'
    queries_description = 'only modelshub queries'
    queries_filter = re.compile(r"{0}".format(_MODELSHUB))


class VideoLoadBasesearchResources(blr.BaseLoadMediaBasesearchResources):
    type = 'VIDEO_LOAD_BASESEARCH_RESOURCES'

    common_parameters = (
        GetTier0Parameter,
        MinSerpSearchQueriesParameter,
        MinRelatedSearchQueriesParameter,
        MinSerpFactorsQueriesParameter,
        MinRelatedFactorsQueriesParameter,
        MinVideohubQueriesParameter,
        MinVideotopQueriesParameter,
        MinModelshubQueriesParameter
    ) + blr.BaseLoadMediaBasesearchResources.common_parameters

    queries_parameters = (
        SerpSearchQueriesAttrsParameter,
        SerpFactorsQueriesAttrsParameter,
        RelatedSearchQueriesAttrsParameter,
        RelatedFactorsQueriesAttrsParameter,
        VideohubQueriesAttrsParameter,
        VideotopQueriesAttrsParameter,
        ModelshubQueriesAttrsParameter
    ) + blr.BaseLoadMediaBasesearchResources.queries_parameters

    input_parameters = common_parameters + queries_parameters

    def _get_min_queries(self):
        min_queries = {
            SerpSearchQueriesAttrsParameter.name: self.ctx[MinSerpSearchQueriesParameter.name],
            SerpFactorsQueriesAttrsParameter.name: self.ctx[MinSerpFactorsQueriesParameter.name],
            RelatedSearchQueriesAttrsParameter.name: self.ctx[MinRelatedSearchQueriesParameter.name],
            RelatedFactorsQueriesAttrsParameter.name: self.ctx[MinRelatedFactorsQueriesParameter.name],
            VideohubQueriesAttrsParameter.name: self.ctx[MinVideohubQueriesParameter.name],
            VideotopQueriesAttrsParameter.name: self.ctx[MinVideotopQueriesParameter.name],
            ModelshubQueriesAttrsParameter.name: self.ctx[MinModelshubQueriesParameter.name],
        }
        min_queries.update(blr.BaseLoadMediaBasesearchResources._get_min_queries(self))
        return min_queries

    def _get_plan_resource_type(self):
        return resource_types.BASESEARCH_PLAN

    def _get_basesearch_tag(self, index_type, region):
        return VideoSettings.basesearch_tag(index_type, region, VideoSettings.get_nanny_token(self))

    def _get_base_instances(self, index_type, region):
        shard = str(shard_num).zfill(3)
        shardType = self.ctx[GetTier0Parameter.name] and "vidsidx" or "vidsidxpt"
        shardPrefix = shardType + "-" + shard
        instances = []
        command = "sky list Y@vla-video-search.vla-video-" + ("tier0" if self.ctx[GetTier0Parameter.name] else "platinum") + "-base"

        proc = sp.Popen(('bash', '-c', command), stdout=sp.PIPE, stderr=sp.PIPE)
        out, err = proc.communicate()
        if proc.wait() != 0:
            logging.info('Failed to get hosts for vla-video_shard_-base, {}'.format(err))
            return instances
        if (len(out) == 0):
            return instances
        hosts = out.split("\n")

        from sandbox.projects.common.base_search_quality.tree import meta_pb2
        for podHost in hosts:
            try:
                baseUrl = 'http://{}/yandsearch?ms=proto'.format(podHost)
                data = urllib2.urlopen(baseUrl).read()
                report = meta_pb2.TReport()
                report.ParseFromString(data)
                instance = {}
                for prop in report.SearcherProp:
                    if prop.Key == "HostName":
                        instance["host"], instance["port"] = prop.Value.split(":")
                    if prop.Key == "Shard":
                        if (prop.Value.startswith(shardPrefix)):
                            instance["shard"] = shard
                if "shard" in instance:
                    logging.info("Adding instance {}".format(instance))
                    instances.append(instance)
            except (IOError, httplib.HTTPException) as e:
                logging.info('Retrieving data for {} failed, {}'.format(podHost, e))

        return instances

    def _get_meta_type_and_instances(self, index_type, region):
        middle_type = VideoSettings.INT_ID if index_type == INDEX_MAIN else VideoSettings.MMETA_ID
        nanny_client = VideoSettings.get_nanny(VideoSettings.get_nanny_token(self))
        service_name = VideoSettings.MMETA_NANNY_SERVICE_NAME_VLA if middle_type == VideoSettings.MMETA_ID else VideoSettings.INT_NANNY_SERVICE_NAME
        base_regexp = _META_SOURCE_DEPLOY_TIER0_VLA_RE if self.ctx[GetTier0Parameter.name] else _META_SOURCE_DEPLOY_VLA_RE
        instances = nanny_client.get_service_current_instances(service_name)
        instances = instances['result']
        meta_instances = []
        for meta in instances:
            if _filter_meta_sources(meta['container_hostname'], meta['port'], base_regexp):
                meta_instances.append((meta['hostname'], meta['port'], set()))

        return middle_type, meta_instances

    def _filter_shard_metas(self, meta_instances, shard_hosts):
        # already filtered
        shard_metas = [
            {"host": meta[0], "port": meta[1]}
            for meta in meta_instances
        ]
        return shard_metas
