# -*- coding: utf-8 -*-

import logging
import time
import json
import Queue
import requests
import re


from multiprocessing import Queue as MtpQueue
from multiprocessing import Process


import sandbox.sdk2 as sdk2
from sandbox.sdk2 import svn
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.sandboxsdk.channel import channel
from sandbox.projects.common.search.requester import Requester
from sandbox.projects.common import solomon
import sandbox.common.errors as errors


class GeosearchCacheUpdater(sdk2.Task):
    """
        Обновление кэша организаций с низким RPS(12)
    """
    class Parameters(sdk2.Parameters):

        project_name = sdk2.parameters.String('название проекта', default="realty", default_value="realty")
        with sdk2.parameters.Group("Параметры запросов") as requests_params:
            from_arcadia = sdk2.parameters.Bool('Таблица в Аркадии или из YT', default_value=False, default=False)
            with from_arcadia.value[True]:
                request_pool_url = sdk2.parameters.ArcadiaUrl('url корзины запросов(arcadia:/arc/trunk/arcadia/path_in_arcadia)', default_value="arcadia:/arc/trunk/arcadia/junk/gav1995/pools/org_relev_result_ids.csv")
            with from_arcadia.value[False]:
                yt_dir = sdk2.parameters.String('YT_dir', default_value='//home/geosearch-prod/snippets')
                yt_file = sdk2.parameters.String('file expr', default_value=r'geosearch-snippets-realty-\d+-finished')
                with sdk2.parameters.Output:
                    yt_table = sdk2.parameters.String("yt table")

            balanser = sdk2.parameters.String('url балансера', default='http://addrs.yandex.ru:17140', default_value='http://addrs.yandex.ru:17140')
            cgi_params = sdk2.parameters.String('cgi  параметры', default='lang=ru&ms=json&origin=wizgeo-common-new_tech&rearr=scheme_Local/Geo/DisableSnippetsSourceForBusinessV=1&snippets=afisha_json/1.x,afisha_json_geozen/1.x,businessimages/1.x,businessrating/2.x,categories_inside/1.x,chat_status/1.x,chat_xml/1.x,companies_inside/1.x,companies_inside_light/1.x,drug_info/1.x,edadeal/1.x,entity/1.x,events/1.x,exchange/1.x,experimental/1.x,fuel/1.x,links/1.x,masstransit/1.x,matchedobjects/1.x,matching_inner_orgs/1.x,offers_new_auto/1.x,offers_used_auto/1.x,org_wizard_factors/1.x,panoramas/1.x,phones/1.x,photos/1.x,photos/2.x,realty/1.x,related_places/1.x,route_distances/1.x,router/1.x,sprav_ugc/1.x,subtitle/1.x,toponym_afisha_cinema/1.x,toponym_afisha_events/1.x,user_oid_matching/1.x,visits_histogram/1.x,yabilet_reference/1.x,yandex_travel/1.x,chains/1.x&mslevel=1')

        with sdk2.parameters.Group("Параметры обработчика запросов") as requester:
            rps = sdk2.parameters.Integer('rps', default=14, default_value=14)
            process_count = sdk2.parameters.Integer('количество процессов', default=5, default_value=5)
            retry_limit = sdk2.parameters.Integer('retry limit', default=3, default_value=3)
            request_timeout = sdk2.parameters.Float('request_timeout', default=0.15, default_value=0.15)

        with sdk2.parameters.Group("Параметры Ferryman"):
            ferryman_url = sdk2.parameters.Url("url", default="http://geo-cache.ferryman.n.yandex-team.ru", default_value="http://geo-cache.ferryman.n.yandex-team.ru")
            ferryman_yt_table = sdk2.parameters.String("yt-table", default="//home/geosearch/gav1995/afisha/realty_for_ferryman", default_value="//home/geosearch/gav1995/afisha/realty_for_ferryman")
            ferryman_namespace = sdk2.parameters.String("namespace", default="1", default_value="1")
            ferryman_delta = sdk2.parameters.Bool("delta", default=False, default_value=False)
            with sdk2.parameters.Output:
                    time = sdk2.parameters.Integer("cooking time")

        with sdk2.parameters.Group("solomon parameters"):
            solomon_project = sdk2.parameters.String('solomon project', default="geo_cache_updater", default_value="geo_cache_updater")
            solomon_cluster = sdk2.parameters.String('solomon cluster', default="stat", default_value="stat")
            solomon_service = sdk2.parameters.String('solomon service', default="stat", default_value="stat")


    class Requirements(sdk2.Task.Requirements):
        environments = [
            PipEnvironment('yandex-yt')
            ]

    def on_execute(self):
        try:
            self.ids = self.get_permalinks()
            ticket = self.get_ticket()
            reqs = self.create_requests(self.ids, ticket)
            requester = MyRequester()
            requester.init(reqs, self.Parameters.rps, self.Parameters.process_count, self.Parameters.request_timeout, self.Parameters.retry_limit, self.ids)
            requester.send_requests()
            self.save_to_saas(requester.to_yt)
            self.save_stat(self.ids, requester.result)
        except Exception as ex:
            logging.critical('________Error:'+str(ex))
            self.save_stat({},{})
            raise ex

    def get_permalinks(self):
        perms = []
        if self.Parameters.from_arcadia:
            data_dir_path = '/'.join(self.Parameters.request_pool_url.split('/')[:-1])
            filename = self.Parameters.request_pool_url.split('/')[-1]
            data_path = svn.Arcadia.checkout(data_dir_path, "data")
            with open('{}/{}'.format(data_path, filename), 'r') as file:
                perms = [ids.rstrip() for ids in file]
        else:
            import yt.wrapper as yt
            yt.config["proxy"]["url"] = "hahn"
            yt.config["token"] = sdk2.Vault.data('gav1995', 'YT_token')
            tables_json = yt.list(self.Parameters.yt_dir, format='json')
            tables = [file for file in json.loads(tables_json) if re.match(self.Parameters.yt_file, file)]
            tables.sort()
            table_path = self.Parameters.yt_dir + '/'+tables[-1]
            logging.info('table is ' + table_path)
            self.Parameters.yt_table = table_path
            sleep_time = 0
            table = yt.read_table(yt.TablePath(table_path, columns=["permalink"]), format='json')
            perms = {row['permalink'] for row in table if not row['permalink'] is None}
            while not perms and sleep_time < 200:
                logging.info('table is empty!!')
                time.sleep(1)
                sleep_time += 1
                table = yt.read_table(yt.TablePath(table_path, columns=["permalink"]), format='json')
                perms = {row['permalink'] for row in table if not row['permalink'] is None}

            if sleep_time == 200:
                raise errors.TaskFailure('Table is empty!!')
            perms = list(perms)
            perms.sort()
            logging.info('table has read!!\n'+str(perms)+'\n')
        return perms

    def get_ticket(self):
        from ticket_parser2.api.v1 import ServiceContext

        secret = sdk2.Vault.data('gav1995', 'GEOWIZARD_TVM')
        tvm_api_url = 'tvm-api.yandex.net'
        ts = int(time.time())
        src = 2009743
        dst = 2001886

        # Getting TVM keys
        tvm_keys = requests.get('https://{tvm_api_url}/2/keys?lib_version=2.3.0'.format(tvm_api_url=tvm_api_url)).content

        # Create context
        service_context = ServiceContext(src, secret, tvm_keys)

        # Getting tickets
        ticket_response = requests.post(
            'https://%s/2/ticket/' % tvm_api_url,
            data={
                'grant_type': 'client_credentials',
                'src': src,
                'dst': dst,
                'ts': ts,
                'sign': service_context.sign(ts, dst)
            }
        ).json()
        logging.info('ticket'+ str(ticket_response))
        return ticket_response[str(dst)]['ticket']

    def create_requests(self, ids, ticket):
        header = {'X-Ya-Service-Ticket': ticket}
        for id in ids:
            yield ['{balanser}/yandsearch?relev_result_ids=b:{ids}&reqinfo=sandboxtaskid={taskid}&{cgi_params}'.format(balanser=self.Parameters.balanser, ids=id, taskid=channel.task.id, cgi_params=self.Parameters.cgi_params), {}, header]

    def save_stat(self, reqs, res):
        stat = {}
        stat[self.Parameters.project_name + '_request_count'] = len(reqs)
        stat[self.Parameters.project_name + '_response_count'] = len(res)
        stat[self.Parameters.project_name + '_success_rate'] = float(len(res))/ len(reqs) if len(reqs) else 0
        self.push_results(stat)

    def save_to_saas(self, data):
        import yt.wrapper as yt

        logging.info("saving to saas "+ str(len(data)) + " docs")
        data_to_yt = [{"JsonMessage":value} for value in data]
        yt.config["proxy"]["url"] = "hahn"
        yt.config["token"] =  sdk2.Vault.data('gav1995', 'YT_token')
        yt.write_table(yt.TablePath(self.Parameters.ferryman_yt_table), data_to_yt, format='json')
        logging.info("save data to "+self.Parameters.ferryman_yt_table)
        timestamp = int(time.time() * 10 ** 6)
        common_json = {
            'Path': self.Parameters.ferryman_yt_table,
            'Namespace': self.Parameters.ferryman_namespace,
            'Timestamp': timestamp,
            'Delta': self.Parameters.ferryman_delta,
            'Cluster': 'hahn'
        }
        query = {
            'tables': json.dumps([common_json]),
        }
        response = requests.get('{}/add-full-tables'.format(self.Parameters.ferryman_url), query)
        logging.info(query['tables'])
        logging.info(response.url)
        while(response.status_code != 202):
            logging.info('status_code = ' + str(response.status_code))
            response = requests.get('{}/add-full-tables'.format(self.Parameters.ferryman_url), query)
        logging.info(response.text)
        try:
            self.batch = response.json()['batch']
        except:
            raise errors.TaskError('Request to Ferryman is failed with code ' + str(response.status_code))

        cooking_time = 0
        while(self.wait_ferryman_batch()):
            cooking_time += 1
            time.sleep(1)
        self.Parameters.time = cooking_time
        logging.info("ferryman was cooked!")

    def wait_ferryman_batch(self):
        response = requests.get('{}/get-batch-status?batch={}'.format(self.Parameters.ferryman_url, self.batch))
        logging.info(response.text)
        try:
            response_json = response.json()
        except:
            raise errors.TaskError('Get batch status is failed. response = ' + str(response))

        if response_json['status'] in ('queue', 'processing', 'transfer', 'final'):
            return True
        elif (response_json['status'] == 'error'
              or (response_json['status'] == 'searchable' and response_json.get('invalid_input', []))):
            raise errors.TaskFailure('Error occurred in batch processing')
        elif response_json['status'] == 'searchable':
            return False
        else:
            raise errors.TaskFailure('Can\'t parse answer')



    def push_results(self, stats):
        """Send sensor values to solomon"""
        logging.info("stats is " + str(stats))
        sensors = solomon.create_sensors(stats)
        common_labels = {
            "host": "solomon-push",
            "project": self.Parameters.solomon_project,
            "cluster": self.Parameters.solomon_cluster,
            "service": self.Parameters.solomon_service
        }
        try:
            solomon.upload_to_solomon(common_labels, sensors)
        except:
            logging.error("can't push data solomon retry count lately")


class MyRequester(Requester):

    def init(self, requests_iterator, rps=10, process_count=5, request_timeout=0.15, retry_limit=0, ids=[]):
        self.requests_iterator = requests_iterator
        self.process_count = process_count
        self.request_timeout = request_timeout
        self.error = None
        self.responses_counter = 0
        self.rps = rps
        self.result = dict()
        self.ids = ids
        self.re_try_limit = retry_limit
        self.to_yt= []

    def send_requests(self):
        """
            Создаём пул процессов для отправки запросов/обработки ответов
            (данные для запросов уже должны быть в наличии в виде итератора self.requests_iterator)
        """
        # общаемся с пулом worker-ов через очереди
        # отправляем "пакет"
        # {
        #   'nreq': номер_запроса,
        #   'req': запрос
        #   'ignore_incomplete_read': True/False
        # }
        self.qreqs = MtpQueue(1024)
        self.qresps = MtpQueue(1024)  # получаем (номер_запроса, флаг_успешности, результат_ошибка_или_ответ)
        self.workers = []

        for wid in range(0, self.process_count):
            proc = Process(target=self.requester_worker, args=(self.qreqs, self.qresps, wid))
            self.workers.append(proc)
            proc.start()
        nreq = 0
        self.in_fly = {}
        try:
            for req in self.requests_iterator:
                time_put = time.time()
                while self.workers:
                    try:
                        # logging.debug('request '+str(nreq)+' = '+str(self.ids[nreq]) + 'sending')
                        self.qreqs.put_nowait(dict(
                            nreq=nreq,
                            req=req,
                            ignore_incomplete_read=self.ignore_incomplete_read,
                            re_try_limit=self.re_try_limit,
                            request_timeout=self.request_timeout,
                        ))
                        self.in_fly[nreq] = req
                        nreq += 1
                        if nreq % 100 == 0:
                            logging.info('Enqueued %s requests...', nreq)
                            self.handle_workers_result()
                        break
                    except Queue.Full:
                        self.handle_workers_result()
                to_sleep = 1.0/self.rps - time.time() + time_put
                if to_sleep > 0:
                    time.sleep(to_sleep)
            logging.debug('Total enqueued requests amount: %s', nreq)
            self.finish_workers()
        except Requester.WorkerError as we:
            logging.error('Worker error at req[%s]=%s : %s', we.nreq, self.ids[we.nreq], we)
            if not self.error:
                self.error = str(we)
            self.finish_workers(True)
        self.qreqs = None
        self.qresps = None

    def on_response(self, nreq, response):
        """
            Метод нужно перегрузить, если нужно обрабатывать содержимое ответа!
            Override for do something with response
        """
        try:
            logging.debug('recieve %s response size = %s', nreq, len(response))
            resp = json.loads(response)
            self.post_to_key_value(nreq, response)
            self.result[self.ids[nreq]] = resp
            self.responses_counter += 1
            del self.in_fly[nreq]
        except Exception as ex:
            logging.info("Error in response "+ str(nreq)+':'+str(ex))
            self.on_fail_request(nreq, response)

    def handle_workers_result(self, timeout=1):
        """
            получаем от worker-ов результаты работы
            (если результатов нет, то залипаем на несколько сек. и возвращаем управление)
        """
        try:
            while not self.qresps.empty():
                nresp, ok, result = self.qresps.get(True, timeout)
                if not ok:
                    self.on_fail_request(nresp, result)
                    return
                self.on_response(nresp, result)
        except Queue.Empty:
            self.refresh_workers()

    def post_to_key_value(self, nreq, response):
        info = {"action": "modify", "prefix": 0, "docs": [{}]}
        info['docs'][0]['url'] = str(self.ids[nreq])
        info['docs'][0]['data'] = {'type':'#p', 'value': json.dumps({'response':str(response),'time':time.ctime()})}
        self.to_yt.append(json.dumps(info))

    def raise_on_fail_request(self, nreq, err):
        return

