# coding: utf-8

import time
import json
import requests
import urlparse
import re
import copy

from sandbox import sdk2
from sandbox.projects.turbo import resources
from sandbox.projects.turbo import component
from sandbox.projects.common.search import bugbanner2
from sandbox.common.types.client import Tag
import logging
from sandbox.sandboxsdk import paths
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils
from sandbox.projects.websearch.begemot.tasks import BegemotCreateResponsesDiff
from sandbox.sdk2.helpers import subprocess as sp
from sandbox import common
from multiprocessing import Process, JoinableQueue, Queue
from collections import namedtuple


RequestItem = namedtuple('RequestItem', 'url platform type number')

MAX_UNANSWER = 30
RETRIES_COUNT = 5

TOUCH_USER_AGENT = (
    'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) '
    'Version/11.0 Mobile/15A372 Safari/604.1'
)

DESKTOP_USER_AGENT = (
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) '
    'Chrome/72.0.3626.109 YaBrowser/19.3.0.2489 Yowser/2.5 Safari/537.36'
)


def normalize_host(host    # type: str
                   ):
    normalized_host = host.lower()
    for prefix in ('www.', 'm.'):
        if normalized_host.startswith(prefix):
            normalized_host = normalized_host[len(prefix):]
    return normalized_host


def get_block_type(block):
    return block.get('block', block.get('content_type'))


def iterate_json(doc, handler):
    if isinstance(doc, dict):
        for k, v in doc.items():
            handler(v, doc, key=k)
            iterate_json(v, handler)
    elif isinstance(doc, list):
        for idx, v in enumerate(doc):
            handler(v, doc, key=idx)
            iterate_json(v, handler)


PORTAL_PLACEMENT_RE = re.compile(u'reqid=[^|]+')
URL_SIGN_RE = re.compile(u'sign=[a-f0-9]+\\:\d+')


def cleanup_document(doc):
    def handler(value, parent, key):
        if isinstance(value, unicode) and key == 'url':
            if isinstance(parent, dict):
                parent[key] = URL_SIGN_RE.sub(repl='sign=XXX', string=value)
        elif not isinstance(value, dict):
            return
        elif get_block_type(value) == 'advert' and value.get('type') == 'Yandex':
            ep = value.get('extParams', {})
            pp = ep.get('portal_placement', '')
            if pp:
                pp = PORTAL_PLACEMENT_RE.sub(repl='reqid=dummy', string=pp)
            pp = pp.strip('|')
            ep['portal_placement'] = pp
            value['extParams'] = ep
        elif get_block_type(value) == 'categories' and value.get('items'):
            value['items'].sort(key=lambda cat: (cat or {}).get('url'))
            for item in value['items']:
                if 'count' in item:
                    item['count'] = 'dummy_canonized_diff'

    iterate_json(doc, handler)


def is_phony_turbo_key(turbo_key    # str
                       ):
    parsed_url = urlparse.urlparse(turbo_key)
    if not parsed_url.netloc:
        return False
    host = normalize_host(parsed_url.netloc)

    if host.startswith('news.yandex.'):
        return True
    if host.startswith('tv.yandex.'):
        return True
    if host.startswith('yandex.'):
        for weather_path_prefix in ('/pogoda', '/hava', '/weather'):
            if parsed_url.path.startswith(weather_path_prefix):
                return True
    return False


def get_turbo_key(url):
    parsed_url = urlparse.urlparse(url)
    if not parsed_url.query:
        return

    query = urlparse.parse_qs(parsed_url.query)
    turbo_keys = query.get('text')
    if not turbo_keys or len(turbo_keys) > 1:
        return
    return turbo_keys[0]


class CompareTurboResponses(bugbanner2.BugBannerTask):

    class Requirements(sdk2.Task.Requirements):

        client_tags = Tag.GENERIC & Tag.Group.LINUX & ~Tag.LXC

    class Parameters(sdk2.Parameters):
        hamster = sdk2.parameters.String("Hamster address: ", default="https://hamster.yandex.ru", required=True)
        hamster_cgi = sdk2.parameters.String("Additional cgi for hamster", default="")
        beta = sdk2.parameters.String("Beta address: ", default="https://hamster.yandex.ru", required=True)
        https_redirect_url_path = sdk2.parameters.String("https redirect url path: ", default="/turbo?text=about", required=True)
        beta_cgi = sdk2.parameters.String("Additional cgi for beta", default="")
        queries = sdk2.parameters.Resource(
            "Queries",
            resource_type=resources.TURBO_QUERIES,
        )
        merger_package = sdk2.parameters.Resource(
            "Package with turbo_merger for local launch (leave empty if you don't want local turbo)",
            resource_type=resources.TURBO_MERGER_PACKAGE
        )
        ddm_package = sdk2.parameters.Resource(
            "Package with turbo_ddm for local launch (leave empty if you don't want local turbo)",
            resource_type=resources.TURBO_DDM_PACKAGE
        )
        init_package = sdk2.parameters.Resource(
            "Package with turbo_init for local launch (leave empty if you don't want local turbo)",
            resource_type=resources.TURBO_INIT_PACKAGE
        )
        debug = sdk2.parameters.Bool("debug", default=False)

    def on_execute(self):

        self.add_bugbanner(bugbanner2.Banners.Turbo)

        paths.make_folder('diffs', delete_content=True)

        hamster_cgi = self.Parameters.hamster_cgi
        beta_cgi = self.Parameters.beta_cgi

        fqdn = common.config.Registry().this.fqdn
        port = utils.gimme_port()

        turbo, beta_cgi = self.init_turbo(port, fqdn, beta_cgi)

        diff_res = resources.TURBO_DIFF(
            self,
            'diffs',
            'diffs',
        )

        queries = self.Parameters.queries
        if not queries:
            queries = sdk2.Resource.find(
                resources.TURBO_QUERIES
            ).first()

        logging.debug("Queries resource id: %s", queries.id)

        beta_cgi, hamster_cgi = beta_cgi.strip(), hamster_cgi.strip()

        with turbo:
            def test_urls_generator():
                hamster = self.Parameters.hamster.strip()
                beta = self.Parameters.beta.strip()

                with open(str(sdk2.ResourceData(queries).path), 'r') as f:
                    urls = json.load(f)
                    for url_item in urls:
                        path = url_item['url_path']
                        platform = url_item.get('platform', 'touch')
                        yield (
                            {'url': hamster + path, 'platform': platform},
                            {'url': beta + path, 'platform': platform},
                            )

                https_redirect_path = self.Parameters.https_redirect_url_path.strip()
                yield {'url': hamster + https_redirect_path}, {'url': beta + https_redirect_path}

            remainder = self.check_diffs(enumerate(test_urls_generator()), beta_cgi, hamster_cgi, True)
            for i in xrange(RETRIES_COUNT + 1):
                logging.debug('%s retry of %s urls', i + 1, len(remainder))
                remainder = self.check_diffs(remainder, beta_cgi, hamster_cgi, i != RETRIES_COUNT)
                if not remainder:
                    break

            logging.debug(len(remainder))
        with open("diffs/unanswers.txt", "w") as fff:
            for num, (hamster_url, beta_url) in remainder:
                fff.write("{}: '{}' '{}'\r\n".format(num, hamster_url, beta_url))
        if len(remainder) > MAX_UNANSWER:
            eh.check_failed("Too many unanswers: {}".format(len(remainder)))
        if not self.Context.has_diff:
            with open("diffs/no_diff.txt", "w") as fff:
                fff.write("There is no diff")
        sdk2.ResourceData(diff_res).ready()
        if self.Context.has_diff:
            eh.check_failed("We have diffs")

    @staticmethod
    def get_resp(que, ans):
        while True:
            req = que.get()  # RequestItem
            try:
                request_time = int(time.time())
                if req.platform == 'desktop':
                    headers = {
                        'User-Agent': DESKTOP_USER_AGENT,
                    }
                else:
                    headers = {
                        'User-Agent': TOUCH_USER_AGENT,
                    }
                response = requests.get(req.url, timeout=(0.7, 0.7), verify=False, headers=headers)
                ans.put((response, req.platform, request_time))
            except Exception:
                logging.debug("No response for req_num: {num}, url: '{url}".format(num=req.number, url=req.url))
                ans.put(None)
            que.task_done()

    def canonize_result(self, response_json):
        result = copy.deepcopy(response_json)
        if not result:
            logging.debug('No document content')
            return result
        for doc in result.get('docs', {}):
            doc.pop('css', None)
            doc.pop('aab_cookie_of_the_day', None)
            cleanup_document(doc)
        return result

    def check_diffs(self, reqs, beta_cgi, hamster_cgi, recheck_possible):

        queue_ham, queue_beta = JoinableQueue(), JoinableQueue()

        results_queue_ham = Queue()
        results_queue_beta = Queue()

        p1 = Process(target=self.get_resp, args=(queue_ham, results_queue_ham))
        p2 = Process(target=self.get_resp, args=(queue_beta, results_queue_beta))
        p1.start()
        p2.start()
        to_check_after = []

        for num, (hamster_item, beta_item) in reqs:
            logging.debug('req num: {}'.format(num))

            hamster_url = hamster_item['url']
            beta_url = beta_item['url']

            req_to_ham = '{}{}&no-tests=da&srcrwr=TURBO_MERGER:::50000&srcrwr=SAAS%3ASAAS_ANSWERS&export=json'.format(
                hamster_url,
                hamster_cgi,
            )
            req_to_beta = '{}{}&no-tests=da&srcrwr=SAAS%3ASAAS_ANSWERS&export=json'.format(
                beta_url,
                beta_cgi,
            )
            queue_ham.put_nowait(RequestItem(url=req_to_ham, platform=hamster_item.get('platform'), type='ham', number=num))
            queue_beta.put_nowait(RequestItem(url=req_to_beta, platform=beta_item.get('platform'), type='beta', number=num))
            queue_ham.join()
            queue_beta.join()
            ham_ans = results_queue_ham.get()
            beta_ans = results_queue_beta.get()
            if not ham_ans or not beta_ans:
                to_check_after += [(num, (hamster_item, beta_item))]
                continue

            resp_ham = ham_ans[0]
            time_ham = ham_ans[2]
            resp_beta = beta_ans[0]
            time_beta = beta_ans[2]

            try:
                beta_body = resp_beta.json()['app_host']['result']
                hamster_body = resp_ham.json()['app_host']['result']

                beta_canonized = self.canonize_result(beta_body)
                hamster_canonized = self.canonize_result(hamster_body)

            except Exception as exc:
                logging.debug("cannnot get json answer: {}".format(exc))
                logging.debug('Request to beta: "{0}", request to hamster: "{1}"'.format(req_to_beta, req_to_ham))
                to_check_after += [(num, (hamster_item, beta_item))]
                continue

            # Some urls are noisy because of dynamicly changing state
            if recheck_possible and beta_canonized != hamster_canonized:
                turbo_key = get_turbo_key(hamster_url)
                if turbo_key:
                    logging.debug("Phony url type, try again, num: {}".format(num))
                    to_check_after += [(num, (hamster_item, beta_item))]
                    continue

            diff = BegemotCreateResponsesDiff.write_aggregated_diff(
                '{}_req'.format(num), hamster_canonized, beta_canonized
            )

            if diff:
                with open("diffs/{}_{}_resp".format(num, "hamster"), "w") as fff:
                    fff.write(resp_ham.request.url + '\n')
                    fff.write('Timestamp: {0}\n'.format(time_ham))
                    fff.write(resp_ham.content)
                with open("diffs/{}_{}_resp".format(num, "beta"), "w") as fff:
                    fff.write(resp_beta.request.url + '\n')
                    fff.write('Timestamp: {0}\n'.format(time_beta))
                    fff.write(resp_beta.content)

                self.Context.has_diff = True
        p1.terminate()
        p2.terminate()
        return to_check_after

    def unpack(self, package_to_unpack, folder):
        package_res_path = str(sdk2.ResourceData(package_to_unpack).path)
        paths.make_folder(folder, delete_content=True)
        unpack_cmd = ["tar", "-xvf", str(package_res_path), "-C", folder]
        with sdk2.helpers.ProcessLog(self, logger='tar_actions') as pl:
            return_code = sp.Popen(
                unpack_cmd,
                shell=False, stdout=pl.stdout, stderr=sp.STDOUT
            ).wait()
            if return_code != 0:
                raise Exception('Failed to unpack actions repos')

    def init_turbo(self, port, fqdn, beta_cgi):
        if self.Parameters.merger_package:

            self.unpack(self.Parameters.merger_package, "turbo_res")

            turbo_bin = resources.TURBO_EXECUTABLE(
                self,
                "turbo_binary",
                "turbo_res/bin/turbo_merger"
            )
            turbo_conf = resources.TURBO_CONFIG(
                self,
                "turbo_config",
                "turbo_res/config.json"
            )

            site_names_file = "turbo_res/site_names.json"

            turbo = component.TurboComponent(
                self,
                binary=turbo_bin,
                port=port,
                config=turbo_conf,
                comp_type=component.TURBO_MERGER,
                site_names_file=site_names_file,
            )

            beta_cgi = "&srcrwr=TURBO_MERGER:{a_fqdn}:{a_port}:{a_timeout}&srcrwr=TURBO_POST_PROCESSOR:{a_fqdn}:{a_port}:{a_timeout}&{a_beta_cgi}".format(
                a_fqdn=fqdn,
                a_port=port,
                a_timeout=50000,  # srcrwr timeout
                a_beta_cgi=beta_cgi,
            )
            logging.debug("new beta_cgi :{}".format(beta_cgi))
        elif self.Parameters.ddm_package:
            self.unpack(self.Parameters.ddm_package, "turbo_res")

            turbo_bin = resources.TURBO_EXECUTABLE(
                self,
                "turbo_binary",
                "turbo_res/bin/media_mass_api_walker"
            )

            turbo_conf = resources.TURBO_CONFIG(
                self,
                "turbo_config",
                "turbo_res/config.json"
            )

            tv_headers = None
            tv_headers_secret = sdk2.Vault.data("yandex_tv_program_headers_for_turbo")
            if tv_headers_secret:
                logging.debug("Obtained tv headers secret")
                tv_headers = json.loads(tv_headers_secret)

            turbo = component.TurboComponent(
                self,
                binary=turbo_bin,
                port=port,
                config=turbo_conf,
                threads=16,
                comp_type=component.TURBO_DDM,
                tv_headers=tv_headers,
            )

            beta_cgi = (
                '&srcrwr=DYNAMIC_DATA_MINER:{address}:{port}:{timeout}'
                '&srcrwr=DYNAMIC_DATA_MINER_NEWS:{address}:{port}:{timeout}'
                '&srcrwr=DYNAMIC_DATA_MINER_OTHER:{address}:{port}:{timeout}'
                '&srcrwr=TURBO_MERGER:::50000'
                '&{cgi}'
            ).format(
                address=fqdn,
                port=port,
                timeout=50000,
                cgi=beta_cgi,
            )
            logging.debug("new beta_cgi :{}".format(beta_cgi))
        elif self.Parameters.init_package:
            self.unpack(self.Parameters.init_package, "turbo_res")

            turbo_bin = resources.TURBO_EXECUTABLE(
                self,
                "turbo_binary",
                "turbo_res/bin/turbo_init"
            )

            turbo_conf = resources.TURBO_CONFIG(
                self,
                "turbo_config",
                "turbo_res/config.json"
            )

            turbo = component.TurboComponent(
                self,
                binary=turbo_bin,
                port=port,
                config=turbo_conf,
                threads=16,
                comp_type=component.TURBO_INIT
            )

            beta_cgi = "&srcrwr=TURBO_INIT_SERVICE:{}:{}:{}&srcrwr=TURBO_MERGER:::50000&{}".format(
                fqdn,
                port,
                50000,  # srcrwr timeout
                beta_cgi,
            )
            logging.debug("new beta_cgi :{}".format(beta_cgi))
        else:
            turbo = component.TurboComponent(fake_init=True)

        return turbo, beta_cgi
