# -*- coding: utf-8 -*-

import re
import sys
import six
import json
import logging
import time
import socket
import urllib
import urllib2
import collections

from sandbox.common.types.client import Tag
from sandbox.sandboxsdk import environments
from sandbox.sandboxsdk import parameters as params
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.projects.common import error_handlers as eh
from sandbox.projects.common import utils
from sandbox.projects.common import decorators
from sandbox.projects.common import string

_CURL_UA = 'curl/7.22.0 (x86_64-pc-linux-gnu) libcurl/7.22.0 OpenSSL/1.0.1 zlib/1.2.3.4 libidn/1.23 librtmp/2.3'


class Params:
    class Request(params.SandboxStringParameter):
        name = "request"
        description = "Sample request"
        default_value = "http://hamster.yandex.ru"
        required = True

    class Position(params.SandboxIntegerParameter):
        name = "doc_position"
        description = "Doc position in response where doc changed (starts from 1)"
        default_value = 1
        required = True

    class UserQuery(params.SandboxStringParameter):
        name = "user_query"
        description = "User query (will override &text=... cgi parameter if specified)"
        default_value = None
        required = False

    lst = (
        Request,
        Position,
        UserQuery,
    )


def get_build_info(testenv_db, testenv_test, rev_min, rev_max):
    """
        :return: Build tasks ordered by revision (testenv_test) info
        from given testenv_db
    """
    url = (
        'https://testenv.yandex-team.ru/handlers/grids/testResults?database={}&filter='.format(testenv_db) +
        urllib.quote(
            '[{{'
            '"type": "numeric", '
            '"comparison": "lt", '
            '"value": {}, '
            '"field": "revision" '
            '}},'
            '{{"type": "numeric", '
            '"comparison": "gt", '
            '"value": {}, '
            '"field": "revision" '
            '}}]'.format(rev_max + 1, rev_min - 1)) +
        '&hide_filtered=true&hide_not_checked=true&limit=10000&start=0&test_name={}'.format(testenv_test)
    )
    logging.debug('get_build_info: {}'.format(url))
    u = urllib2.urlopen(url)
    data = u.read()
    u.close()
    j = json.loads(data)
    # strip excess revisions
    bi = [row for row in j['rows'] if rev_min <= row['revision'] <= rev_max]
    bi.sort(key=lambda r: r['revision'])
    return bi


def branch_last_rev_first_minor_last_minor(arcadia_branch_path, post_fork_commit_limit=100, ver_minor=1):
    """
        @param: post_fork_commit_limit: limit depth branch history investigation
        @return: last commit (revision number) to trunk before given branch forking
    """
    fork_rev = Arcadia.log(
        arcadia_branch_path,
        revision_from=None,
        stop_on_copy=True,
    )[-1]
    logging.debug('Fork revision: %s', fork_rev)
    svnlog = Arcadia.log(
        arcadia_branch_path,
        revision_from=None,
        limit=post_fork_commit_limit,
    )
    find_rev = False
    # svnlog = [r32, r31, r30, ..., r2,   r1,  pre-fork, ...]
    #           tag  tag  tag       tag  fork
    for i, rev in enumerate(svnlog):
        if find_rev:
            logging.debug('Pre-fork revision: %s', rev)
            # if ver_minor is 5: svnlog[i-1] is r1, svnlog[i-ver_minor] is r5
            return int(rev['revision']), svnlog[i - 1]['revision'], svnlog[i - ver_minor]['revision']
        if rev == fork_rev:
            find_rev = True

    raise Exception('Cannot get last trunk revision before branch fork {}:\n{}'.format(arcadia_branch_path, svnlog))


def format_row(r):
    return 'rev={}, task={}/{}'.format(r['revision'], r['task_id'], r['task_status'])


class BisectInfo:
    version = 'Version'
    revisions = 'This version contain revisions'
    current_revisions = 'Narrow search diff to revisions'
    old_doc = 'Old doc'
    new_doc = 'New doc'
    result = 'Result'
    src_acc = '100% inaccessible sources'


_BISECT_INFO_LIST = (
    BisectInfo.version,
    BisectInfo.revisions,
    BisectInfo.current_revisions,
    BisectInfo.old_doc,
    BisectInfo.new_doc,
    BisectInfo.result,
    BisectInfo.src_acc,
)


class Bisect(SandboxTask, object):
    """
        Base class with bisect search algo (search revision - which cause diff on given url-request).
        Inheritor MUST implement get_docs() method.
    """
    execution_space = 80000
    environment = (
        environments.SvnEnvironment(),
    )
    input_parameters = Params.lst

    def __init__(self, task_id=0):
        super(Bisect, self).__init__(task_id)
        self.local_host = None
        self.doc_attrs = []
        self.cache = collections.defaultdict(dict)

    # clients in LXC containers don't have host names, only v6 address
    # see SEARCH-3333 for details why we should ignore them
    client_tags = Tag.GENERIC & Tag.Group.LINUX & ~Tag.LXC

    def on_enqueue(self):
        """
            Create necessary resource(s) and save them in context before
            executing the task.
            @return: None
        """
        super(Bisect, self).on_enqueue()
        channel.task = self
        self.ctx['bisect_info'] = {}
        self.ctx['cache'] = {}

    def bisect_info(self):
        return self.ctx['bisect_info']

    def set_bisect_info(self, pname, pval):
        self.ctx['bisect_info'][pname] = pval

    @property
    def footer(self):
        body = {
            "c1": [],
            "c2": [],
        }
        c1 = body["c1"]
        c2 = body["c2"]
        for pname in _BISECT_INFO_LIST:
            pval = self.bisect_info().get(pname)
            if pval:
                c1.append(pname)
                c2.append(str(pval))
        result = [
            {
                "helperName": '',
                "content": {
                    "<h3>Bisect process description</h3>": {
                        "header": [
                            {"key": "c1", "title": "Params"},
                            {"key": "c2", "title": "Values"},
                        ],
                        "body": body,
                    }
                },
            },
        ]
        position = self.ctx[Params.Position.name] - 1
        cache = self.ctx.get('cache')
        if cache:
            result.append(
                {
                    "helperName": '',
                    "content": {
                        "<h3>Runs</h3>": {
                            "header": [
                                {
                                    "key": "r{}".format(revision[0]),
                                    "title": "r{}".format(revision[0]),
                                } for revision in sorted(cache.items())
                            ],
                            "body": {
                                "r{}".format(revision[0]): [
                                    "<b>{}</b>".format(item) if position == i else item for i, item in enumerate(
                                        revision[1].values()[0]
                                    )
                                ] for revision in cache.items()
                            }
                        }
                    }
                }
            )
        return result

    def get_docs(self, row, requrl):
        raise NotImplementedError('MUST BE implemented in inheritor')

    def bisect(self, build_info):
        """
            main method for call from inheritor
        """
        self.cache = collections.defaultdict(dict)  # {rev: {requrl: docs1}}
        # FIXME(mvel): use sandbox.projects.common.network::get_my_ipv6()
        self.local_host = socket.gethostname()
        logging.info('Use localhost address for redirect requests: %s', self.local_host)

        self.doc_attrs = []  # TODO: calc diff by doc attrs (_SerpData, etc)

        url = utils.get_or_default(self.ctx, Params.Request)
        user_query = utils.get_or_default(self.ctx, Params.UserQuery)
        if user_query:
            url = re.sub(r'&text=[^&]+', '', url) + '&' + urllib.urlencode({'text': string.all_to_str(user_query)})

        try:
            self._bisect_bi(url, url_position_list=[self.ctx[Params.Position.name] - 1], build_info=build_info)
        finally:
            logging.info("***************************************** Cache: %s" % self.cache)
            self.ctx['cache'] = dict(self.cache)

    def _bisect_bi(self, requrl, url_position_list, build_info):
        good_rev = [rev for rev in build_info if rev['task_status'] == 'SUCCESS']
        logging.debug("==== GOOD REVISIONS: ====")
        for rev in good_rev:
            logging.debug("%s\n", rev)
        logging.debug("==== (END OF 'GOOD REVISIONS') ====")

        min_rev_idx = 0
        max_rev_idx = len(good_rev) - 1
        while min_rev_idx < max_rev_idx:
            if not self._does_have_erros(requrl, good_rev[max_rev_idx]):
                break
            max_rev_idx -= 1
        while min_rev_idx < max_rev_idx:
            if not self._does_have_erros(requrl, good_rev[min_rev_idx]):
                break
            min_rev_idx += 1
        if not self._diff_rev(requrl, good_rev[min_rev_idx], good_rev[max_rev_idx], url_position_list):
            logging.debug("RESULT: NO DIFF")
            self.set_bisect_info(BisectInfo.result, 'no diff')
        else:
            self._bisect(requrl, good_rev, 0, len(good_rev) - 1, url_position_list)

    def _diff_rev(self, requrl, rev1, rev2, url_posistion_list):
        cmp_descr = '==== compare revision {} vs {} ===='.format(rev1['revision'], rev2['revision'])
        with self.current_action(cmp_descr):
            if not self.cache[rev1['revision']].get(requrl):
                self.cache[rev1['revision']][requrl] = self.get_docs(rev1, requrl)
            docs1 = self.cache[rev1['revision']].get(requrl)

            if not self.cache[rev2['revision']].get(requrl):
                self.cache[rev2['revision']][requrl] = self.get_docs(rev2, requrl)
            docs2 = self.cache[rev2['revision']].get(requrl)

            for un in url_posistion_list:
                doc1 = docs1[un] if un < len(docs1) else '[document not exists]'
                doc2 = docs2[un] if un < len(docs2) else '[document not exists]'
                if doc1 != doc2:
                    self.set_bisect_info(
                        BisectInfo.current_revisions,
                        '{}:{}'.format(rev1['revision'], rev2['revision'])
                    )
                    # TODO: docs list?
                    self.set_bisect_info(BisectInfo.old_doc, doc1)
                    self.set_bisect_info(BisectInfo.new_doc, doc2)
                    diff_descr = 'DIFF urls for position {}\n{} <-> {}\n{} <-> {}'.format(
                        un + 1, doc1, doc2, format_row(rev1), format_row(rev2)
                    )
                    logging.debug(diff_descr)
                    return True
                else:
                    if BisectInfo.old_doc not in self.ctx['bisect_info']:
                        self.set_bisect_info(BisectInfo.old_doc, doc1)
                        self.set_bisect_info(BisectInfo.new_doc, doc2)
                logging.debug("Equal urls in pos[{}]: {}".format(un + 1, doc1))
        logging.debug('---- revision {} and {} SIMILAR ----'.format(rev1['revision'], rev2['revision']))
        return False

    def _does_have_erros(self, requrl, rev):
        cmp_descr = '==== Checking revision {} for errors ===='.format(rev['revision'])
        with self.current_action(cmp_descr):
            if not self.cache[rev['revision']].get(requrl):
                try:
                    self.cache[rev['revision']][requrl] = self.get_docs(rev, requrl)
                except Exception as e:
                    eh.log_exception('==== Revision {} got error ===='.format(rev['revision']), e)
                    self.cache[rev['revision']][requrl] = ["<Error!>"] * 30
                    return True

        logging.info('==== Revision {} got no errors ===='.format(rev['revision']))
        return False

    def _bisect(self, requrl, rows, rev_min_idx, rev_max_idx, url_position_list):
        logging.info("search diff inside revisions [{}(idx={})..{}(idx={})]".format(
            rows[rev_min_idx]['revision'], rev_min_idx, rows[rev_max_idx]['revision'], rev_max_idx))
        if rev_min_idx == rev_max_idx:
            result_descr = "EMPTY search range: {}".format(rev_max_idx)
            logging.debug(result_descr)
            self.set_bisect_info(BisectInfo.result, result_descr)
            return None
        # if _diff_rev(good_rev[rev_min_idx], good_rev[rev_max_idx], [1]):
        if (rev_max_idx - rev_min_idx) == 1:
            diff_rev = rows[rev_max_idx]['revision']
            result_descr = 'found diff revision: {}'.format(diff_rev)
            logging.debug(result_descr)
            self.set_bisect_info(
                BisectInfo.result,
                (
                    '<a href="https://a.yandex-team.ru/arc/commit/{}"> diff revision r{}</a>'.format(
                        diff_rev, diff_rev
                    )
                )
            )
            # TODO: check if has NOT_SUCCESS revisions in range
            return diff_rev
        rev_middle_idx = (rev_min_idx + rev_max_idx) / 2

        def print_diff(r1, r2):
            logging.debug('+++ revisions {} and {} are different +++'.format(r1, r2))

        if self._does_have_erros(requrl, rows[rev_middle_idx]):
            l_rev = rev_middle_idx - 1
            r_rev = rev_middle_idx + 1
            while l_rev > rev_min_idx:
                if not self._does_have_erros(requrl, rows[l_rev]):
                    break
                l_rev -= 1
            while r_rev < rev_max_idx:
                if not self._does_have_erros(requrl, rows[r_rev]):
                    break
                r_rev += 1
            # Now it looks like this:
            # [rev_min_idx,        ...,     l_rev, ..., rev_middle_idx, ...,  r_rev,       ...,      rev_max_idx]
            # [    Good,    Hopefully Good,  Good,  Bad,      Bad,      Bad,   Good, Hopefully Good,     Good   ]
            if not self._diff_rev(requrl, rows[rev_min_idx], rows[l_rev], url_position_list):
                if not self._diff_rev(requrl, rows[rev_min_idx], rows[r_rev], url_position_list):
                    return self._bisect(requrl, rows, r_rev, rev_max_idx, url_position_list)
                if not self._diff_rev(requrl, rows[r_rev], rows[rev_max_idx], url_position_list):
                    # TODO better handling
                    raise Exception("Looks like diff happened while component was broken. Read the logs.")
            if not self._diff_rev(requrl, rows[l_rev], rows[rev_max_idx], url_position_list):
                if not self._diff_rev(requrl, rows[rev_min_idx], rows[r_rev], url_position_list):
                    raise Exception("WTF flaps detected. Read the logs.")  # TODO better handling
                if not self._diff_rev(requrl, rows[r_rev], rows[rev_max_idx], url_position_list):
                    return self._bisect(requrl, rows, rev_min_idx, l_rev, url_position_list)
            raise Exception("Everything is really messed-up if we here. Read the logs.")  # TODO better handling

        if self._diff_rev(requrl, rows[rev_min_idx], rows[rev_middle_idx], url_position_list):
            print_diff(rows[rev_min_idx]['revision'], rows[rev_middle_idx]['revision'])
            return self._bisect(requrl, rows, rev_min_idx, rev_middle_idx, url_position_list)
        else:
            print_diff(rows[rev_middle_idx]['revision'], rows[rev_max_idx]['revision'])
            return self._bisect(requrl, rows, rev_middle_idx, rev_max_idx, url_position_list)

    @staticmethod
    @decorators.retries(6, delay=10, backoff=2)
    def _read(url):
        if sys.version_info[0] == 2:
            if isinstance(url, six.text_type):
                url = url.encode('utf-8')
        req = urllib2.Request(
            url,
            headers={
                'User-Agent': _CURL_UA,
            },
        )
        logging.debug("Requesting: %s", url)
        u = urllib2.urlopen(req)
        result = u.read()
        u.close()
        return result

    @staticmethod
    def read(url):
        """
            Get data from url (with few attempts) - helper for inheritors
            :return: response from asked url
        """
        time.sleep(3)
        return Bisect._read(url)
