# -*- coding: utf-8 -*-

import logging
import random
import urllib
import urllib2

from sandbox import sdk2
import sandbox.projects.common.noapacheupper.request as na_req
from sandbox.projects.common.noapacheupper.request import Request as noapache_request
from sandbox.projects.common.search.requester import write_binary_data
from sandbox.projects import resource_types
from sandbox.sandboxsdk import parameters as sp
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.errors import SandboxTaskFailureError
from sandbox.sandboxsdk.task import SandboxTask


class Params:
    class ReportUrl(sp.SandboxStringParameter):
        name = "report_url"
        description = "Use this upper/report for 'perekoldovka' requests"
        default_value = "http://hamster.yandex.ru/yandsearch?"

    class RtmrUrl(sp.SandboxStringParameter):
        name = "rtmr_url"
        description = "Use rtmr for get cgi param yandexuid= possible values (not use param if url empty)"
        default_value = "http://rtmr.search.yandex.net:8080/yandsearch?"

    class TextQueries(sp.ResourceSelector):
        name = "text_queries_resource_id"
        description = "Users text queries (optionaly can have region-id after <TAB>)"
        resource_type = resource_types.USERS_QUERIES
        required = True

    class AdditionalCgiParams(sp.SandboxStringParameter):
        name = "additional_cgi_params"
        description = "Additional cgi params for report (string for trivial append to query for report)"
        default_value = ""

    class RequestsLimit(sp.SandboxIntegerParameter):
        name = "requests_limit"
        description = "Requests limit (0 == no limit)"
        default_value = 0

    lst = (
        ReportUrl,
        RtmrUrl,
        TextQueries,
        AdditionalCgiParams,
        RequestsLimit,
    )


class BuildNoapacheupperRequests(SandboxTask, object):
    """
        Построение запросов для обстрела noapacheupper, - отправка обычных текстовых запросов на report
        (с cgi параметрами search=remote&srcrwr=UPPER... для перенаправления запросов на наш noapacheupper)
        и выборка 'переколдованных' запросов из лога noapacheupper (ContextCreated/PostBody записи)

        На выходе - ресурсы с использованными пользовательскими запросами
        (не все запросы удаётся переколдовать, некоторые report не пробрасывает на noapache)
        и cgi параметрами запросов к noapacheupper (по запросу на строку).
    """

    type = "BUILD_NOAPACHEUPPER_REQUESTS"

    input_parameters = Params.lst

    requests_res_id = "cgi_requests_res_id"
    requests_file = "requests.txt"
    used_user_queries_res_id = "used_users_queries_res_id"
    used_user_queries_file = "used_users_queries.txt"

    def __init__(self, task_id=0):
        super(BuildNoapacheupperRequests, self).__init__(task_id)

    def on_enqueue(self):
        super(BuildNoapacheupperRequests, self).on_enqueue()
        channel.task = self
        self.ctx[self.requests_res_id] = self.create_resource(
            self.descr,
            self.requests_file,
            resource_types.BINARY_REQUESTS_FOR_NOAPACHEUPPER,
            attributes={
                "ttl": 90,
            },
            arch="any",
        ).id
        self.ctx[self.used_user_queries_res_id] = self.create_resource(
            self.descr,
            self.used_user_queries_file,
            resource_types.USERS_QUERIES,
            attributes={
                "ttl": 90,
            },
            arch="any",
        ).id

    def on_execute(self):
        """
            задать запросы из файла по указанному url(репорту), - добавлять к url как '&text=<текст>&lr=<регион>
            (если регион не указан в файле запросов через табуляцию, использовать 213-ый)
        """

        report_url = self.ctx[Params.ReportUrl.name]
        users_queries_filename = self.sync_resource(self.ctx[Params.TextQueries.name])
        noapache_requests_filename = channel.sandbox.get_resource(self.ctx[self.requests_res_id]).path
        used_user_queries_filename = channel.sandbox.get_resource(self.ctx[self.used_user_queries_res_id]).path
        additional_cgi_params = self.ctx[Params.AdditionalCgiParams.name]
        if additional_cgi_params and additional_cgi_params[0] != '&':
            additional_cgi_params = '&' + additional_cgi_params

        yandexuids = []
        loginhash = []
        if self.ctx[Params.RtmrUrl.name]:
            rtmr_common_url = self.ctx[Params.RtmrUrl.name] + "view=plain&table=active_keys&key="
            yandexuids.extend(_read_uids(rtmr_common_url + "user_actions_json"))
            # yandexuids.extend(_read_uids(rtmr_common_url + "watchlog_actions_json"))
            yandexuids.extend(_read_uids(rtmr_common_url + "spylog_actions_json"))
            loginhash.extend(_read_uids(rtmr_common_url + "user_actions_json.logins"))

        cnt = 0
        limit = self.ctx[Params.RequestsLimit.name]
        used_cnt = 0
        with open(users_queries_filename) as f, \
                open(noapache_requests_filename, 'w') as fw, \
                open(used_user_queries_filename, 'w') as uuq:
            for line in f:
                if cnt % 100 == 0:
                    self.current_action("ask queries via report (processed {} queries)".format(cnt))
                line = line.rstrip('\n')
                tk = line.split('\t')
                text = urllib.quote(tk[0], '')
                lr = tk[1] if len(tk) >= 2 else "213"
                qurl = "{}&search=remote&text={}&lr={}{}".format(report_url, text, lr, additional_cgi_params)
                if yandexuids:
                    qurl += "&yandexuid={}".format(random.choice(yandexuids))
                if loginhash and ((cnt % 4) == 0):  # apply to 25% requests
                    qurl += "&rearr=loginhash=" + random.choice(loginhash)

                try:
                    req = na_req.collect_request(qurl)
                    qtree = noapache_request(req).global_ctx.get('qtree')
                    if not qtree or not qtree[0]:  # SEARCH-1070
                        logging.info("skip[{}]: query not used (not has/empty global_ctx/qtree param required for SEARCH-1070)".format(cnt))
                    else:
                        write_binary_data(fw, req)
                        print >>uuq, line
                        used_cnt += 1
                except na_req.CollectRequestNotFound:
                    logging.info("skip[{}]: query not used (not found req body in eventlog)".format(cnt))

                if cnt % 100 == 0:
                    self.current_action("continue collect requests via report (processed {} queries)".format(cnt))
                cnt += 1
                if limit and used_cnt >= limit:
                    break

        # прописываем ресурсу переколдованных запросов в атрибуты id ресурса с простым текстом запроса
        # (его человечнее отображать в diff-тасках)
        channel.sandbox.set_resource_attribute(
            self.ctx[self.requests_res_id],
            Params.TextQueries.name,
            self.ctx[self.used_user_queries_res_id]
        )


def _read_uids(rtmr_url, maxrecords=1000):
    url = rtmr_url + '&maxrecords=' + str(maxrecords)
    uids = []
    for line in urllib2.urlopen(url, timeout=10).read().split('\n'):
        r = line.split('\t')
        if len(r) >= 2:
            uids.append(r[1])
    logging.info('read {} uids from {}'.format(len(uids), url))
    if len(uids) != maxrecords:
        raise SandboxTaskFailureError('fail getting {} records from RTMR: {} (receive only {})'.format(maxrecords, url, len(uids)))
    return uids


__Task__ = BuildNoapacheupperRequests


def create_task_BuildNoapacheupperRequests(
    report_url,
    text_queries_resource_id,
    descr,
    rtmr_url=Params.RtmrUrl.default_value,
    additional_cgi_params=None,
    requests_limit=0,
    current_sdk2_task=None,
):
    sub_ctx = {
        Params.ReportUrl.name: report_url,
        Params.RtmrUrl.name: rtmr_url,
        Params.TextQueries.name: text_queries_resource_id,
        Params.RequestsLimit.name: requests_limit
    }
    if additional_cgi_params:
        sub_ctx[Params.AdditionalCgiParams.name] = additional_cgi_params

    logging.info("create {}({})".format(BuildNoapacheupperRequests.type, str(sub_ctx)))
    if current_sdk2_task:
        return sdk2.Task[BuildNoapacheupperRequests.type](
            current_sdk2_task, description=descr, arch="linux", **sub_ctx
        ).enqueue()
    else:
        return channel.task.create_subtask(
            task_type=BuildNoapacheupperRequests.type,
            description=descr,
            input_parameters=sub_ctx,
            arch="linux",
        )
