import json
import logging
import math
import threading
import time
import urllib2

from collections import defaultdict

from sandbox.projects.release_machine.tasks.ScrapeRequests2 import static as st

MAX_DELTA = "max_delta"
MAX_UNANSWER = "max_unanswer"
EPS = 1e-6


class TAskThread(threading.Thread):

    FirstKey = "tmpl_data"

    def __init__(self, query, region, uid, urls, collections, tries, headers=[{}, {}], jsonToStandardFormatFlags=[False, False]):
        threading.Thread.__init__(self)
        self.Query = query
        self.Region = region
        self.Uid = uid

        self.Requests = []
        for (url, header) in zip(urls, headers):
            final_url = self.Personalize(url.format(text=urllib2.quote(query), lr=region), header)
            self.Requests.append(urllib2.Request(final_url, headers=header))

        self.Collections = collections
        self.JsonToStandardFormatFlags = jsonToStandardFormatFlags

        self.Tries = tries
        self.ResponseBeta1 = self.ResponseBeta2 = ""
        self.SourcesTries = 0
        self.Sources = defaultdict(dict)
        self.SuccessOnlySources = []
        self.TryTillFullSuccess = True

    def Personalize(self, url, headers):
        if not self.Uid:
            return url

        param = 'yandexuid={}'.format(self.Uid)
        headers.update({'Cookie': param})

        return url + ('&' + param) if not url.endswith(('?', '&')) else param

    def GetQueryInfo(self):
        return "query '%s', region %s, uid %s" % (self.Query, self.Region, self.Uid)

    def SetUnansweredSources(self, sources, tries, threshold, successOnlySources, tryTillFullSuccess):
        self.TryTillFullSuccess = tryTillFullSuccess
        self.SuccessOnlySources = [item.strip() for item in successOnlySources.split(",") if item.strip()]
        self.SourcesTries = tries
        self.BestResponse = []
        sources = [item.strip() for item in sources.split(",") if item.strip()]
        for source in sources:
            if source.find(":") == -1:
                self.Sources[source][MAX_DELTA] = threshold
            else:
                source, params = source.rsplit(":", 1)
                params = params.strip().split("|")
                for param in params:
                    if param.count("=") == 1:
                        key, value = param.split("=")
                        key = key.strip()
                        self.Sources[source][key] = float(value)
                if MAX_DELTA not in self.Sources[source].keys():
                    self.Sources[source][MAX_DELTA] = threshold

    @staticmethod
    def GetUnansweredUpperNode(response):
        if "search_props.UPPER.0.properties" in response:
            return response["search_props.UPPER.0.properties"]
        else:
            return response[TAskThread.FirstKey]["search_props"]["UPPER"][0]["properties"]

    @staticmethod
    def HasUnansweredUpper(responses, collections):
        for idx in xrange(len(responses)):
            try:
                node = TAskThread.GetUnansweredUpperNode(json.loads(responses[idx]))
                if len(node) == 0:
                    return True
            except Exception, e:
                logging.info("Cannot load json data: %s" % e)
                return True
        return False

    def GetSourcesUnansweredRatio(self, response, firstKey, sources):
        result = {}
        for source in sources:
            try:
                data = self.GetUnansweredUpperNode(response)["Unanswer_" + source]
            except Exception, e:
                logging.info("Cannot get statistics about unanswered metas for %s source, %s: %s" % (source.encode("utf-8"), self.GetQueryInfo(), e))
                continue
            data = data.split("/")
            if float(data[0]) <= 0.0 or float(data[1]) < 0.0 or float(data[1]) > float(data[0]):
                raise Exception("Unable to calculate unanswered ratio for %s source. Ratio is %s/%s." % (source, data[1], data[0]))
            else:
                result[source] = float(data[1]) / float(data[0])
        return result

    def CheckUnansweredSources(self, responses):
        if len(responses) != 2:  # Always should be checked two betas
            return False
        stat = []
        statSuccessOnly = []
        for idx in xrange(len(responses)):
            response = json.loads(responses[idx])
            stat.append(self.GetSourcesUnansweredRatio(response, self.FirstKey, self.Sources.keys()))
            statSuccessOnly.append(self.GetSourcesUnansweredRatio(response, self.FirstKey, self.SuccessOnlySources))
        logging.info("Statistics about unanswered sources for %s: %s and %s for success only" % (self.GetQueryInfo(), stat, statSuccessOnly))

        for source in self.SuccessOnlySources:
            if max(statSuccessOnly[0].get(source, 0), statSuccessOnly[1].get(source, 0)) >= EPS:
                logging.info("Non zero unanswered state for source %s, skip %s" % (source.encode("utf-8"), self.GetQueryInfo()))
                return False

        allSucceeded = True
        for source in self.Sources.keys():
            if math.fabs(stat[0].get(source, 0) - stat[1].get(source, 0)) >= self.Sources[source][MAX_DELTA]:
                logging.info("Bad unanswered state for source %s, skip %s" % (source.encode("utf-8"), self.GetQueryInfo()))
                return False
            if MAX_UNANSWER in self.Sources[source] and self.Sources[source][MAX_UNANSWER] <= max(stat[0].get(source, 0), stat[1].get(source, 0)):
                logging.info("Too big unanswered ratio for source %s, skip %s" % (source.encode("utf-8"), self.GetQueryInfo()))
                return False
            allSucceeded = allSucceeded and max(stat[0].get(source, 0), stat[1].get(source, 0)) < EPS

        if allSucceeded:
            logging.info("There are no unanswered sources for %s, save responses" % self.GetQueryInfo())
            self.ResponseBeta1, self.ResponseBeta2 = responses
            return True

        if not self.TryTillFullSuccess:
            logging.info("There are some unanswered sources for %s, but not much, save responses" % self.GetQueryInfo())
            self.ResponseBeta1, self.ResponseBeta2 = responses
            return True

        if not self.BestResponse:
            logging.info("Update data for %s: new responses with same unanswered states" % self.GetQueryInfo())
            self.BestResponse = stat
            self.ResponseBeta1, self.ResponseBeta2 = responses
        else:
            check = True
            for source in self.Sources.keys():
                if min(stat[0].get(source, 0), stat[1].get(source, 0)) > min(self.BestResponse[0].get(source, 0), self.BestResponse[1].get(source, 0)):
                    check = False
                    logging.info("Previous response was better for source %s, ignore this one for %s" % (source.encode("utf-8"), self.GetQueryInfo()))
                    break
            if check:
                logging.info("Update data for %s: this responses is better than previous" % self.GetQueryInfo())
                self.BestResponse = stat
                self.ResponseBeta1, self.ResponseBeta2 = responses
        return False

    def run(self):
        for tries in xrange(self.Tries):
            try:
                for sourcesTries in xrange(max(1, self.SourcesTries)):
                    logging.info("Start getting response for %s, try #%d, source try #%d" % (self.GetQueryInfo(), tries, sourcesTries))
                    responses = []
                    for (request, jsonToStandardFormat) in zip(self.Requests, self.JsonToStandardFormatFlags):
                        responseJsonDump = urllib2.urlopen(request, timeout=60).read()
                        if jsonToStandardFormat:
                            responseJsonDump = st.JsonFormatter.JsonToStandardFormat(responseJsonDump)
                        responses.append(st.JsonFormatter.CheckJson(responseJsonDump))
                    logging.info("End getting response for %s, try #%d, source try #%d" % (self.GetQueryInfo(), tries, sourcesTries))
                    check = False
                    if self.HasUnansweredUpper(responses, self.Collections):
                        logging.info("Unanswered UPPER source for %s" % self.GetQueryInfo())
                    else:
                        if len(self.Sources) or len(self.SuccessOnlySources):
                            check = self.CheckUnansweredSources(responses)
                        else:
                            check = True
                            self.ResponseBeta1, self.ResponseBeta2 = responses
                    if check:
                        return
                    time.sleep(5)  # Timeout between tries for unanswered sources mode
                if len(self.ResponseBeta1):
                    return
            except Exception as e:
                logging.info("Error has been occurred while trying to get responses for %s: %s" % (self.GetQueryInfo(), e))
                time.sleep(10)  # Timeout between bad requests
        logging.info("Cannot get responses for %s, it would be skipped" % self.GetQueryInfo())
