# coding: utf-8

import base64
import datetime
import json
import os
import sys
import tarfile
import tempfile
import time
import urllib

import sandbox.common.types.client as ctc

from sandbox.projects import resource_types
from sandbox.projects.common.arcadia import sdk as arcadiasdk
from sandbox.projects.common.nanny import nanny
from sandbox.sandboxsdk.task import SandboxTask
from sandbox.sandboxsdk.parameters import SandboxSvnUrlParameter, SandboxBoolParameter, SandboxStringParameter
from sandbox.sandboxsdk.process import run_process
from sandbox.sandboxsdk.svn import Arcadia
from sandbox.sandboxsdk.errors import SandboxTaskFailureError, SandboxSvnError
from sandbox.sandboxsdk.sandboxapi import RESOURCE_READY
from sandbox.sandboxsdk.channel import channel
from sandbox.sandboxsdk import paths

from sandbox import sdk2
from sandbox.sdk2 import yav


DEFAULT_SVN_REPO = 'svn+ssh://arcadia.yandex.ru/arc/trunk'
DATA_DIR = 'data'
BAD_USER_AGENTS = 'bad_user_agents.lst'
L_KEYS_MIN_SIZE = 1000


class AntirobotScriptsSvnUrlParameter(SandboxSvnUrlParameter):
    name = 'antirobot_scripts_svn_url'
    default_value = "%s/arcadia/antirobot/scripts/" % DEFAULT_SVN_REPO
    description = 'SVN full path to antirobot/scripts (e.g. %s)' % default_value


class BadUserAgentsDirSvnUrlParameter(SandboxSvnUrlParameter):
    name = 'bad_user_agents_dir_svn_url'
    default_value = "%s/data/quality/" % DEFAULT_SVN_REPO
    description = 'SVN full path to dir contains %s (e.g. %s)' % (BAD_USER_AGENTS, default_value)


class GeoData6BinXurmaHttpUrlParameter(SandboxStringParameter):
    name = 'geodata6_bin_xurma_http_url'
    default_value = 'https://proxy.sandbox.yandex-team.ru/last/GEODATA6BIN_XURMA_STABLE'
    description = 'geodata6-xurma.bin source (e.g. %s)' % default_value


class LKeysHttpUrlParameter(SandboxStringParameter):
    name = 'l_keys_http_url'
    default_value = 'http://blackbox.yandex.net/lrandoms.txt'
    description = 'L-cookie-keys.txt source (e.g. %s)' % default_value


class CheckForEmptyParameter(SandboxBoolParameter):
    name = 'check_for_empty_files'
    description = 'All files in %s/ must by not empty' % DATA_DIR
    default_value = True


class Yt2ProtoResourceIdParameter(SandboxStringParameter):
    name = 'yt2proto_resource_id'
    description = 'antirobot/tools/yt2proto sandbox resource id'


class HypocrisyToolsResourceIdParameter(SandboxStringParameter):
    name = 'hypocrisy_tools_resource_id'
    description = 'antirobot/lib/hypocrisy/tools sandbox resource id'


class CollectAntirobotData(nanny.ReleaseToNannyTask, SandboxTask):
    """
        Выкачиваем АнтиРоботные данные, пакуем в tar, сохраняем как ресурс
    """

    type = 'COLLECT_ANTIROBOT_DATA'

    client_tags = ctc.Tag.IPV4


    input_parameters = (
        AntirobotScriptsSvnUrlParameter,
        BadUserAgentsDirSvnUrlParameter,
        GeoData6BinXurmaHttpUrlParameter,
        LKeysHttpUrlParameter,
        CheckForEmptyParameter,
        Yt2ProtoResourceIdParameter,
        HypocrisyToolsResourceIdParameter,
    )

    def on_release(self, additional_parameters):
        self.mark_released_resources(additional_parameters["release_status"])
        nanny_client = nanny.NannyClient(api_url='http://nanny.yandex-team.ru/',
                                         oauth_token=yav.Secret("sec-01fg1xh9n1r1sf3s25fz3vevsy").data()["nanny-oauth-token"])
        service_ids = [
            "prod_antirobot_yp_prestable",
            "prod_antirobot_yp_man",
            "prod_antirobot_yp_sas",
            "prod_antirobot_yp_vla",
        ]
        for service_id in service_ids:
            # FIXME: it may be dangerous if some deployment is in progress, because it will be interrupted
            nanny_client.update_service_sandbox_file(
                service_id=service_id,
                task_type=self.type,
                task_id=str(self.id),
                deploy=True,
                comment='Bump COLLECT_ANTIROBOT_DATA to {}'.format(self.id)
            )

    def UpdateKeysFile(self, dataPath):
        KEYS_FILE = 'keys'
        resources = channel.sandbox.list_resources('ANTIROBOT_KEYS', status=RESOURCE_READY) or []
        latestResource = None
        latest = 0
        for r in resources:
            if r.timestamp > latest:
                latestResource = r
                latest = r.timestamp

        assert latestResource, "Can't find previous ANTIROBOT_KEYS resource"

        updatedKeysFile = os.path.join(dataPath, KEYS_FILE)
        self.sync_resource(latestResource.id)
        paths.add_read_permissions_for_path(latestResource.path)
        paths.copy_path(latestResource.path, updatedKeysFile)
        paths.add_write_permissions_for_path(updatedKeysFile)

        run_process('python %(bin)s %(keysFile)s' % {'bin': self.path('genkeys.py'), 'keysFile': updatedKeysFile}, log_prefix='genkeys')

        if latestResource:
            self.remove_resource_files(latestResource.id)

        paths.copy_path(updatedKeysFile, KEYS_FILE)
        self.create_resource('antirobot keys file %s' % str(datetime.date.today()), KEYS_FILE, resource_types.ANTIROBOT_KEYS, attributes={'ttl': 32})

    def update_dictionaries(self, data_path):
        yt2proto_url = channel.sandbox.get_resource_http_links(self.ctx['yt2proto_resource_id'])[0]
        yt2proto_path = self.path('yt2proto')
        urllib.urlretrieve(yt2proto_url, yt2proto_path)
        os.chmod(yt2proto_path, 0o755)

        dictionary_path = os.path.join(data_path, 'dictionaries')

        if not os.path.exists(dictionary_path):
            os.mkdir(dictionary_path)

        with arcadiasdk.mount_arc_path('arcadia-arc:/#trunk') as arc:
            with open(os.path.join(arc, 'antirobot/config/global_config.json')) as global_config_file:
                global_config = json.load(global_config_file)

        name_to_dictionary = {}

        for dictionary in global_config['dictionaries_meta']:
            name_to_dictionary[dictionary['name']] = dictionary

        for dictionary_name in global_config['dictionaries']:
            dictionary = name_to_dictionary[dictionary_name]

            if dictionary['type'] == 'yt':
                key_type = dictionary['key_type']
                remap = None
                key_hash = 'none'
                row_func = 'none'
                if key_type == 'cityhash64':
                    row_type = 'TCityHash64FloatRecord'
                    key_hash = 'cityhash64'
                elif key_type == 'uid':
                    row_type = 'TUidRecord'
                    remap = ['yuid:Key']
                elif key_type == 'custom_geobase':
                    row_type = 'TStringRecord'
                elif key_type == 'mini_geobase':
                    row_type = 'TFixed64Record'
                    row_func = 'mini_geobase'
                elif key_type == 'jws_stats':
                    row_type = 'TMarketJwsStatesStats'
                    remap = [
                        'jws:Key',
                        'jws_state_is_default_expired_ratio:JwsStateIsDefaultExpiredRatio',
                        'jws_state_is_default_ratio:JwsStateIsDefaultRatio',
                        'jws_state_is_invalid_ratio:JwsSateIsInvalidRatio',
                        'jws_state_is_susp_expired_ratio:JwsStateIsSuspExpiredRatio',
                        'jws_state_is_susp_ratio:JwsStateIsSuspRatio',
                        'jws_state_is_valid_expired_ratio:JwsStateIsValidExpiredRatio',
                        'jws_state_is_valid_ratio:JwsStateIsValidRatio',
                        ]
                elif key_type == 'market_stats':
                    row_type = 'TMarketStats'
                    key_hash = 'cityhash64'
                    remap = [
                        'entity:Key',
                        'blocked_cnt_ratio:BlockedCntRatio',
                        'catalog_reqs_cnt_ratio:CatalogReqsCntRatio',
                        'enemy_cnt_ratio:EnemyCntRatio',
                        'enemy_redirects_cnt_ratio:EnemyRedirectsCntRatio',
                        'fuid_cnt_ratio:FuidCntRatio',
                        'hosting_cnt_ratio:HostingCntRatio',
                        'icookie_cnt_ratio:IcookieCntRatio',
                        'ipv4_cnt_ratio:Ipv4CntRatio',
                        'ipv6_cnt_ratio:Ipv6CntRatio',
                        'login_cnt_ratio:LoginCntRatio',
                        'mobile_cnt_ratio:MobileCntRatio',
                        'other_handles_reqs_cnt_ratio:OtherHandlesReqsCntRatio',
                        'product_reqs_cnt_ratio:ProductReqsCntRatio',
                        'proxy_cnt_ratio:ProxyCntRatio',
                        'referer_is_empty_cnt_ratio:RefererIsEmptyCntRatio',
                        'referer_is_not_yandex_cnt_ratio:RefererIsNotYandexCntRatio',
                        'referer_is_yandex_cnt_ratio:RefererIsYandexCntRatio',
                        'robots_cnt_ratio:RobotsCntRatio',
                        'search_reqs_cnt_ratio:SearchReqsCntRatio',
                        'spravka_cnt_ratio:SpravkaCntRatio',
                        'tor_cnt_ratio:TorCntRatio',
                        'vpn_cnt_ratio:VpnCntRatio',
                        'yndx_ip_cnt_ratio:YndxIpCntRatio',
                        ]
                else:
                    row_type = 'TFloatRecord'

                cmd = [
                    yt2proto_path,
                    '--proxy', dictionary['proxy'],
                    '--type', row_type,
                    '--input', dictionary['path'],
                    '--output', os.path.join(dictionary_path, dictionary_name),
                    '--key-hash', key_hash,
                    '--row-func', row_func,
                ]

                if remap:
                    for it in remap:
                        cmd.extend(['--rename-column', it])

                run_process(
                    cmd,
                    log_prefix='yt2proto',
                )

    def fetch_yasc_key(self, data_path):
        secret = yav.Secret("sec-01f73bsecbwmrrm6tqbr16rpbh").data()

        with open(os.path.join(data_path, "yasc_key"), "w") as yasc_key_file:
            yasc_key_file.write(secret["antirobot-yasc-key"])

    def fetch_spravka_data_key(self, data_path):
        secret = yav.Secret("sec-01f9a0gwwye5nexgxes0f813sh").data()

        with open(os.path.join(data_path, "spravka_data_key.txt"), "w") as yasc_key_file:
            yasc_key_file.write(secret["antirobot-spravka-data-key-new"])

    def fetch_market_jws_key(self, data_path):
        market_secret = yav.Secret("sec-01ehw6eazgtqx7ytypaye8kmy1")
        market_keys = base64.b64decode(market_secret.data()["all"])
        jws_key = None

        for line in market_keys.splitlines():
            tokens = [token.strip() for token in line.split("=", 1)]
            if len(tokens) != 2:
                continue

            if tokens[0] == "jwt-secret":
                jws_key = tokens[1]
                break

        if jws_key is None:
            raise Exception("jwt-secret missing in market keys")

        jws_key_encoded = base64.b64encode(jws_key.strip().encode())

        with open(os.path.join(data_path, "market_jws_key"), "w") as jws_key_file:
            jws_key_file.write(jws_key_encoded)

    def fetch_narwhal_jws_key(self, data_path):
        narwhal_secret = yav.Secret("sec-01f8swwters094deh0ckphy8xa")
        jws_key = narwhal_secret.data()["antirobot_device_validator_signing_key"]

        with open(os.path.join(data_path, "narwhal_jws_key"), "w") as jws_key_file:
            jws_key_file.write(jws_key)

    def fetch_autoru_offer_salt(self, data_path):
        autoru_offer_salt = yav.Secret("sec-01fscdrr7dy747pdt42p5tbs7a")
        salt = autoru_offer_salt.data()["salt"]

        with open(os.path.join(data_path, "autoru_offer_salt.txt"), "w") as salt_file:
            salt_file.write(salt)

    def fetch_autoru_tamper_salt(self, data_path):
        autoru_tamper_salt = yav.Secret("sec-01f8pry9x85104485k4tgz298j")
        salt = autoru_tamper_salt.data()["salt"]

        with open(os.path.join(data_path, "autoru_tamper_salt"), "w") as salt_file:
            salt_file.write(salt)

    def update_greed(self, data_path):
        hypocrisy_descriptor_key_secret = yav.Secret("sec-01ff7ze3peagm7zffbxbevq6xm")
        hypocrisy_descriptor_key = hypocrisy_descriptor_key_secret.data()["antirobot_hypocrisy_descriptor_key"]

        with tempfile.NamedTemporaryFile("w") as hypocrisy_descriptor_key_file:
            hypocrisy_descriptor_key_file.write(hypocrisy_descriptor_key)
            hypocrisy_descriptor_key_file.flush()

            hypocrisy_tools_url = channel.sandbox.get_resource_http_links(self.ctx["hypocrisy_tools_resource_id"])[0]

            hypocrisy_tools_path = self.path("hypocrisy_tools")
            if not os.path.exists(hypocrisy_tools_path):
                os.mkdir(hypocrisy_tools_path)

            for tool in ["greed.js", "hypocrisy_prepare", "hypocrisy_make_bundle"]:
                path = os.path.join(hypocrisy_tools_path, tool)
                urllib.urlretrieve(hypocrisy_tools_url + "/" + tool, path)

            for tool in ["hypocrisy_prepare", "hypocrisy_make_bundle"]:
                os.chmod(os.path.join(hypocrisy_tools_path, tool), 0o755)

            bundle_path = os.path.join(data_path, "hypocrisy")

            bundle_info_path = os.path.join(bundle_path, "bundle.json")
            if os.path.exists(bundle_info_path):
                with open(bundle_info_path) as bundle_info_file:
                    bundle_info = json.load(bundle_info_file)

                print >>sys.stderr, "Previous greed generation time:", bundle_info.get("generation_time")

            contrib_obfuscation_options = {
                "controlFlowFlatteningThreshold": 0.25,
                "deadCodeInjectionThreshold": 0.25,
            }

            run_process(
                [
                    os.path.join(hypocrisy_tools_path, "hypocrisy_make_bundle"),
                    "--prev-output", bundle_path,
                    "--output", bundle_path,
                    "--size", "3",
                    "--greed", os.path.join(hypocrisy_tools_path, "greed.js"),
                    "--descriptor-key", hypocrisy_descriptor_key_file.name,
                    "--contrib-obfuscate",
                    "--contrib-obfuscate-override", json.dumps(contrib_obfuscation_options),
                    "--rename-greed", "PGreed",
                ],
                log_prefix="hypocrisy_make_bundle",
            )

    def on_execute(self):
        os.environ['YT_TOKEN'] = yav.Secret("sec-01fg4770qgt52nh5nezt33msbr").data()["hahn"]

        svn_url_info = Arcadia.info(self.ctx['antirobot_scripts_svn_url'])
        revision, tag, branch = self.arcadia_info()
        sub_path = branch if branch is not None else tag

        data_path = self.path(DATA_DIR)
        try:
            os.mkdir(data_path)
        except OSError:
            pass  # skipped for rerun task

        self.current_action('SVN exports')
        exportedLists = [
            ('support/privileged_ips',            'privileged_ips'),
            ('support/whitelist_ips.txt',         'whitelist_ips.txt'),
            ('support/whitelist_ips_all.txt',     'whitelist_ips_all.txt'),
            ('support/whitelist_ips_market.txt',  'whitelist_ips_market.txt'),
            ('support/whitelist_ips_news.txt',    'whitelist_ips_news.txt'),
            ('support/genaccessip.py',            'genaccessip.py'),
            ('support/yandex_ips.txt',            'yandex_ips.txt'),
            ('support/special_ips.txt',           'special_ips.txt'),
            ('support/ua_proxy.txt',              'ua_proxy.txt'),
            ('support/trbosrvnets.txt',           'trbosrvnets.txt'),
            ('genkeys/genkeys.py',                'genkeys.py'),
        ]
        for from_, to in exportedLists:
            Arcadia.export('%s/%s' % (svn_url_info['url'], from_),  self.path(to), revision)
        Arcadia.export('%s/%s' % (self.ctx['bad_user_agents_dir_svn_url'], BAD_USER_AGENTS), os.path.join(data_path, BAD_USER_AGENTS), revision)

        self.current_action('IPs lists generation')
        generatedLists = [
            ('privileged_ips', 'privileged_ips'),
            ('special_ips', 'special_ips.txt'),
            ('trbosrvnets', 'trbosrvnets.txt'),
            ('yandex_ips', 'yandex_ips.txt'),
            ('whitelist_ips', 'whitelist_ips.txt'),
            ('whitelist_ips_market', 'whitelist_ips_market.txt'),
            ('whitelist_ips_news', 'whitelist_ips_news.txt'),
            ('whitelist_ips_all', 'whitelist_ips_all.txt'),
            ('ua_proxy_ips', 'ua_proxy.txt'),
        ]
        for pair in generatedLists:
            name, realName = pair
            run_process('python %(bin)s --src-dir %(sourceDir)s --src-file %(sourceFile)s --out-file %(outputFlie)s.new'
                        % {'bin': self.path('genaccessip.py'), 'sourceDir': self.path('.'), 'sourceFile': realName, 'outputFlie': name},
                        log_prefix='genaccessip')
            os.rename(self.path(name + '.new'), os.path.join(data_path, name))

        fetchedData = [
            ('l_keys_http_url', 'L-cookie-keys.txt'),
            ('geodata6_bin_xurma_http_url', 'geodata6-xurma.bin'),
        ]
        for from_, to in fetchedData:
            self.current_action('Fetching %s' % to)
            urllib.urlretrieve(self.ctx[from_], os.path.join(data_path, to))

        if self.ctx['check_for_empty_files']:
            self.current_action('Checking')
            for file_name in os.listdir(data_path):
                if file_name == "whitelist_ips_all":
                    continue
                file_path = os.path.join(data_path, file_name)
                if os.path.isfile(file_path) and os.path.getsize(file_path) == 0:
                    raise SandboxTaskFailureError("Empty file: %s" % file_name)
            l_keys_size = len(open(os.path.join(data_path, 'L-cookie-keys.txt'), 'r').readlines())
            if l_keys_size < L_KEYS_MIN_SIZE:
                raise SandboxTaskFailureError("L-cookie-keys.txt to small: %d lines" % l_keys_size)

        self.current_action('Updating antirobot keys file')
        self.UpdateKeysFile(data_path)

        self.current_action('Updating dictionaries')
        self.update_dictionaries(data_path)

        self.current_action("Updating yet another secret")
        self.fetch_yasc_key(data_path)

        self.current_action("Updating spravka data secret")
        self.fetch_spravka_data_key(data_path)

        self.current_action("Updating market jws key")
        self.fetch_market_jws_key(data_path)

        self.current_action("Updating narwhal jws key")
        self.fetch_narwhal_jws_key(data_path)

        self.current_action("Updating greed.js")
        self.update_greed(data_path)

        self.current_action("Updating autoru offer salt")
        self.fetch_autoru_offer_salt(data_path)

        self.current_action("Updating autoru tamper salt")
        self.fetch_autoru_tamper_salt(data_path)

        with open(os.path.join(data_path, "timestamp.txt"), "wt") as ts:
            print >>ts, "%d" % int(time.time())

        self.current_action('Pack to .tgz')
        data_tgz_path = self.path(DATA_DIR + '.tar.gz')
        with tarfile.open(data_tgz_path, 'w:gz') as tar:
            tar.add(data_path, arcname=DATA_DIR)

        self.current_action('Creating resource')
        self.create_resource('antirobot data from %s r%s' % (sub_path, revision), data_tgz_path, resource_types.ANTIROBOT_DATA, arch='any')

        self.current_action('Done!')

    def arcadia_info(self):
        path = self.ctx['antirobot_scripts_svn_url']
        parsed_url = Arcadia.parse_url(path)
        revision = parsed_url.revision if parsed_url.revision is not None else Arcadia.info(path)['entry_revision']
        branch = None
        if parsed_url.branch is not None:
            branch = parsed_url.branch
        elif parsed_url.trunk:
            branch = 'trunk'
        elif parsed_url.tag is None:
            raise SandboxSvnError("Couldn't extract branch or tag from url: %s" % path)

        return revision, parsed_url.tag, branch


__Task__ = CollectAntirobotData

