

import re
import time
import socket
import random
import traceback
import requests
import tenacity

from app.resps_resolver import RespResolver, NO_RESPS
from app.tvm import get_waffles_tvm_ticket
from app.engines.base_engine import BaseEngine
from app.db.db import new_session
from app.db.models import DebbyProject, PipelineScans, DebbyScanResults
from app.engines.utils import target_generator_helper
from app.utils import is_valid_ipv4_address, is_valid_ipv6_address, is_subnet
from app.settings import MOLLY_OAUTH_TOKEN, QLOUD_OAUTH_TOKEN
from app.qloud_helper import QloudHelper, QLOUD_EXT_BASE_URL, QLOUD_INT_BASE_URL
from app.db.models import DebbyPolicyAdditionalOptions, DebbyPolicy
from app.settings import STATES_DEAD, STATES_ALIVE
from app.db.models import DebbyTask, RetryTargets



class MollyEngine(BaseEngine):

    @staticmethod
    def __init__(self):
        self.cache = list()

    @staticmethod
    def _prepare_profile(policy, vhost):
        raise RuntimeError

    @staticmethod
    def _get_target_list_from_db(prev_scan_id):
        session = new_session()
        targets = session.query(DebbyScanResults.ip).filter(DebbyScanResults.scan_id == prev_scan_id)\
                                                    .filter(DebbyScanResults.enabled == True).distinct()
        target_list = [ip for (ip, ) in targets]
        session.close()

        return target_list

    @staticmethod
    def _get_target_list_from_project(project_id):
        target_list = list()

        s = new_session()
        project = s.query(DebbyProject).filter(DebbyProject.id == project_id).first()
        s.close()

        for target in project.targets.split(', '):
            target_list.append(target.strip())

        return target_list

    @staticmethod
    @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=1, max=30), 
                    stop=tenacity.stop_after_attempt(10), reraise=True)
    def _get_virtual_hosts(ip):

        WAFFLES_URL = 'https://waffles.sec.yandex.net/api/v1/virtual_host/{}'.format(ip)
        tvm_ticket = get_waffles_tvm_ticket()

        r = requests.get(WAFFLES_URL, headers={'X-Ya-Service-Ticket': tvm_ticket}, verify=False)
        vhosts = r.json().get(ip)

        if vhosts is None:
            vhosts = list()

        # print('[+] MollyEngine. _get_virtual_hosts. ip: {}. vhost: {}'.format(ip, vhosts))
        return vhosts

    @staticmethod
    def _get_cgroup(host):
        try:
            r = requests.get('https://c.yandex-team.ru/api-cached/hosts2groups/%s' % host, verify=False)
            if r.status_code == 404:
                return host
        except Exception as e:
            traceback.print_exc()
            return host

        cgroups = r.text.split('\n')
        cgroups = list([x for x in cgroups if x])
        if not cgroups:
            return host

        for g in cgroups:
            g = g.strip()
            if not g:
                continue
            if 'prod' in g:
                return g
            if 'stable' in g:
                return g

        res = cgroups[0].strip()

        # BUG CATCHER
        if not res:
            print("[BUG] MollyEngine. _get_cgroup. not res")
            return host

        return res

    @staticmethod
    def _get_targets(project_id, scan_id=None):
        """
            Get list of targets by Pipeline or project's targets

        :param project_id:
        :param scan_id:
        :return:
            List of targets (ip, subnets, macros, domains)
        """

        # print('[+] MollyEngine. _get_targets. scan_id: {}.'.format(scan_id))

        # if in pipeline get from db
        # probably only ip addrs
        target_list = None
        if scan_id:
            session = new_session()
            scan_pipeline = session.query(PipelineScans).filter(PipelineScans.next_scan_id == scan_id).first()
            session.close()
            if scan_pipeline:
                target_list = MollyEngine._get_target_list_from_db(scan_pipeline.prev_scan_id)
        # else get from project targets field
        # ips / subnets / fqnds
        if target_list is None:
            target_list = MollyEngine._get_target_list_from_project(project_id)

        return target_list

    @staticmethod
    def _target_to_domains(target):
        """
            Get domain list for every IP.
            1. Vhosts
            or
            2. FQDN

        :param target:
        :return:
            List of domains or empty list
        """
        hosts = list()

        # ip to domains
        if is_valid_ipv4_address(target) or is_valid_ipv6_address(target):
            hosts = MollyEngine._get_virtual_hosts(target)
            if not hosts:
                fqdn = socket.getfqdn(target)
                if fqdn != target:
                    hosts = [fqdn]

        # subnet to domains
        elif is_subnet(target):
            for ip in target_generator_helper([target]):
                res = MollyEngine._get_virtual_hosts(ip)
                if res:
                    hosts = res
                else:
                    fqdn = socket.getfqdn(target)
                    if fqdn != target:
                        hosts = [fqdn]

        # domain to domain
        else:
            hosts = [target]

        return hosts

    @staticmethod
    def _get_projects_engine_by_id(project_id):
        s = new_session()
        project = s.query(DebbyProject).filter(DebbyProject.id == project_id).first()
        s.close()
        return project.engine

    @staticmethod
    def get_tld_priority(fqdn):
        if not fqdn:
            return 1000

        tld_priodities = {
            "ru": 0,
            "com": 1,
            "io": 101,
            "yandex": 102,
            "st": 103,
            "net": 104,
        }
        tld = fqdn.split(".")[-1]
        priority = tld_priodities.get(tld, 50)
        return priority

    @staticmethod
    def _shuffle_fqnd_list(fqdn_list, project_engine=None):
        """
            Sort fqdns inside tld sequence
        """

        shiffled_fqdn_list = list()
        cur_tld = None
        cur_tld_fqnd_list = list()

        for fqdn in fqdn_list:
            tld = fqdn.split(".")[-1]
            if not cur_tld or cur_tld == tld:
                cur_tld = tld
                cur_tld_fqnd_list.append(fqdn)
            else:
                random.shuffle(cur_tld_fqnd_list)
                shiffled_fqdn_list += cur_tld_fqnd_list
                cur_tld = tld
                cur_tld_fqnd_list = [fqdn]

        random.shuffle(cur_tld_fqnd_list)
        shiffled_fqdn_list += cur_tld_fqnd_list

        return shiffled_fqdn_list


    @staticmethod
    def _prepare_targets(target_list, project_engine=None, scan_profile=None):
        """
            Yield mechanism
            Resolve targets to domains and generate payload for each

        :param target_list:
        :param project_engine:
        :return:
        """

        MollyPrep = MollyPreparator()

        fqdn_list = list()
        for target in target_list:
            fqdn_list += MollyEngine._target_to_domains(target)

        fqdn_list = list([x for x in fqdn_list if x])
        fqdn_list = list([x for x in fqdn_list if x[0] != "*"])
        fqdn_list = list([x for x in fqdn_list if not x.startswith("www.")])

        WHITELIST_HOSTS = ['.ipvsfwmark.', 'mc.yandex.', 'an.yandex.', ".cdn."]
        fqdn_list = list([x for x in fqdn_list if not any(mk.lower() in x for mk in WHITELIST_HOSTS)])

        host_list = sorted(fqdn_list, key=MollyEngine.get_tld_priority)
        host_list = MollyEngine._shuffle_fqnd_list(host_list)

        for host in host_list:
            host_service_list = [{
                "host": host, 
                "service": MollyEngine._get_cgroup(host)
            }]

            for host_service_payload in MollyPrep.prepare(host_service_list, scan_profile):

                targets_list = [host_service_payload.get('host')]

                payload = {
                    "engine": project_engine,
                    "profile": host_service_payload.get('payload'),
                    "save_to_db": False
                }

                yield (targets_list, payload)

    @staticmethod
    def new_tasks_payloads_generator(project_id, scan_id=None):

        # get target list (ip, subnets, domains, Macros)
        target_list = MollyEngine._get_targets(project_id, scan_id=scan_id)
        # project engine
        project_engine = MollyEngine._get_projects_engine_by_id(project_id)
        # scan_profile
        s = new_session()
        project = s.query(DebbyProject).filter(DebbyProject.id == project_id).first()
        policy = s.query(DebbyPolicy).filter(DebbyPolicy.id == project.policy_id).first()
        additional_options = s.query(DebbyPolicyAdditionalOptions) \
                                    .filter(DebbyPolicyAdditionalOptions.policy_id == policy.id).first()
        s.close()
        scan_profile = additional_options.value if additional_options else None

        for (target_list, payload) in MollyEngine._prepare_targets(target_list, project_engine, scan_profile):
            # print("[+] MollyEngine. new_tasks_payloads_generator. target_list: {}. payload: {}".format(target_list, payload))
            yield (target_list, payload, None)
        
        # retry logic
        # if project.retries == 0:
        #     return 
        while True:
            min_period = 30
            user_defined = project.retry_period * 60
            period = min_period if min_period > user_defined else user_defined
            time.sleep(period)

            myrts = list()

            s = new_session()
            rts = s.query(RetryTargets).filter(RetryTargets.scan_id == scan_id).filter(RetryTargets.failed_task_id == RetryTargets.cur_task_id).all()
            for rt in rts:
                myrts.append((rt.id, rt.failed_task_id))
                rt.cur_task_id = None  # identify that it took into queue
            s.commit()
            s.close()
            # print("[new_tasks_payloads_generator]. rts: {}".format(myrts))
            if not myrts:
                s = new_session()
                alive_tasks = s.query(DebbyTask).filter(DebbyTask.debbyscan_id == scan_id).filter(DebbyTask.state.in_(STATES_ALIVE)).count()
                s.close()
                if alive_tasks == 0:
                    break
            
            for rt in myrts:
                s = new_session()
                dt = s.query(DebbyTask).filter(DebbyTask.id == rt[1]).first()
                s.close()
                if not dt:
                    return  # TODO: It should never happen. But it's better to add soft check and loggins instead of exit
                for (target_list, payload) in MollyEngine._prepare_targets(dt.targets.split(" "), project_engine, scan_profile):
                    yield (target_list, payload, rt[0])

        return

    @staticmethod
    def scan_results_to_splunk_events(*args, **kwargs):
        return list()


class MollyPreparator(object):
    def __init__(self):
        self.cache = set()
        self.regex_cache = list()
        self.bonding_regex = re.compile('(.*?)(\\d+)([^\\d]*)')
        self.qh_ext = QloudHelper(QLOUD_OAUTH_TOKEN, QLOUD_EXT_BASE_URL)
        self.qh_int = QloudHelper(QLOUD_OAUTH_TOKEN, QLOUD_INT_BASE_URL)

    def _is_match_bonding_regex(self, string):
        try:
            m = self.bonding_regex.match(string)
            if not m:
                return None
            else:
                return m.groups()
        except Exception:
            traceback.print_exc('[!] MollyPreparator. is_match_bonding_regex. string: {}'.format(string))
            return None

    def _is_in_regex_cache(self, string):
        for r in self.regex_cache:
            try:
                if r.match(string):
                    return True
            except Exception:
                traceback.print_exc('[!] MollyPreparator. is_in_regex_cache. r: {}. string: {}'.format(r, string))

        return False

    def _add_to_regex_cache_if_match_bonding(self, string):
        g = self._is_match_bonding_regex(string)
        if not g:
            return

        new_regex = g[0] + '\\d+' + g[2]
        new_regex = new_regex.replace('.', '\\.')
        self.regex_cache.append(re.compile(new_regex))

        # print('[+] MollyPreparator. add_to_regex_cache_if_match_bonding. string: {}. new_regex: {}'.format(string,
        #                                                                                                    new_regex))
        return

    @staticmethod
    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError),
                    wait=tenacity.wait_fixed(1), stop=tenacity.stop_after_attempt(10), reraise=True)
    def get_molly_target(url):
        try:
            response = requests.get('https://molly.yandex-team.ru/api/v1.1/targets/', params={'url': url},
                                    headers={'Authorization': 'OAuth {}'.format(MOLLY_OAUTH_TOKEN)}, verify=False)
            json_resp = response.json()
            # print('[+] MollyPreparator. get_molly_target. json_resp: {}'.format(json_resp))
        except ValueError:
            return ''
        if not json_resp.get('targets') or not isinstance(json_resp.get('targets'), list):
            return ''
        return json_resp.get('targets')[0].get('name', '')

    @staticmethod
    def _get_host_resp(fqdn):
        try:
            resps, _ = RespResolver.get_resps(fqdn)
        except Exception:
            print("[!!!] EXCEPTION MollyPreparator. _get_host_resp. didnt resolve fqdn: {}".format(fqdn))
            traceback.print_exc()
            resps = NO_RESPS
        return resps

    def _prepare_one(self, host_service, scan_profile):
        fqdn = host_service.get('host')
        url = 'https://{}/'.format(fqdn)

        if len(fqdn) < 2 or fqdn[0] == '*':
            return None

        # don't scan www.* domains (this is 302 redirect)
        if fqdn[0:4] == 'www.':
            return None

        # already run a scan for this domain.TLD
        if fqdn in self.cache:
            return None

        # -----------------
        WHITELIST_HOSTS = ['.ipvsfwmark.', 'mc.yandex.', 'an.yandex.']
        if any(mk.lower() in fqdn for mk in WHITELIST_HOSTS):
            return None
        # -----------------

        tld = fqdn.split('.')[-1]

        # pick lighter checks for cookie_less domain
        if scan_profile:
            profile = scan_profile
        elif tld in ['net', 'yandex', 'io', 'st']:
            profile = 'Crasher_net'
        else:
            profile = 'Crasher'

        # ----------------------------------------------------------------
        # try to group host by service/target
        # molly_service = self.get_molly_target(url)
        # # print('[+] MollyPreparator. _prepare_one. url: {}. molly_service: {}'.format(url, molly_service))
        # if molly_service:
        #     service = molly_service
        # else:
        #     service = 'CRASHER_' + host_service.get('service')

        if not host_service.get('service'):
            print("[!!!] BUG CATCHER")
            print("[!!!] not host_service.get('service')")
            print("[!!!] host_service: {}".format(host_service))

        service = 'CRASHER_' + host_service.get('service')
        # ----------------------------------------------------------------

        # ----------------------------------------------------------------
        # do not scan some internal resources
        WHITELIST_SERVICES = [
            'cdn-', 'avatars', 'video-storages', '_mulca_', '-upload', 'elliptics-', '_downloader',
            'golem', 'resize', 'yandex_waf', 'geocontext.yandex.ru', '.storage.yandex.net', 'strm-prod'
        ]
        if any(mk.lower() in service for mk in WHITELIST_SERVICES):
            return None
        # ----------------------------------------------------------------

        # -------------------
        # --- TLD BONDING ---
        BONDING_TLDS = ['ru', 'by', 'kz', 'ua', 'ee', 'md', 'az', 'fr', 'lv', 'tj', 'pl', 'fi', 'eu',
                        'kg', 'lt', 'tm', 'uz', 'co.il', 'com.am', 'com.ua', 'com.ge', 'com.tr']

        # if tld != 'net':
        if tld in BONDING_TLDS:
            no_tld_domain = fqdn.rsplit('.', 1)[0]
            for dom in BONDING_TLDS:
                self.cache.add('.'.join([no_tld_domain, dom]))
        else:
            self.cache.add(fqdn)

        # ---------------------
        # --- REGEX BONDING ---
        if tld != "ru" and self._is_in_regex_cache(fqdn):
            return None
        self._add_to_regex_cache_if_match_bonding(fqdn)

        # ------------------

        host = host_service.get('host')
        service = host_service.get('service')
        resps = self._get_host_resp(fqdn)

        # ------------
        # collect urls for qloud targets
        url = None
        try:
            if self.qh_ext.is_qloud_host(fqdn):
                routes = self.qh_ext.find_routes_for_domain(fqdn)
                if routes:
                    url = ["https://{}/".format(fqdn)] + ["https://{}{}".format(fqdn, route) for route in routes]

            if not url and self.qh_int.is_qloud_host(fqdn):
                routes = self.qh_int.find_routes_for_domain(fqdn)
                if routes:
                    url = ["https://{}/".format(fqdn)] + ["https://{}{}".format(fqdn, route) for route in routes]
        except Exception as e:
            print("[!] Exception. MollyPreparator::_prepare_one. qloud_url_collector. {}".format(e))

        if not url:
            url = "https://{}/".format(fqdn)

        # ---------------

        # print("[MOLLY][_prepare_one] host: {}. service: {}. resps: {}".format(host, service, resps))

        return {
            'host': host,
            'service': service,
            'payload': {
                'url': url,
                'service': service,
                'profile': profile,
                'resp': resps
            }
        }

    def prepare(self, host_service_list, scan_profile):

        for host_service in host_service_list:
            host_service_payload = self._prepare_one(host_service, scan_profile)
            if host_service_payload:
                yield host_service_payload
