#!/usr/bin/env python
# -*-coding: utf-8 -*-

import datetime
import time
import urlparse
import requests
import settings
import json
import tenacity
import datetime


class Crasher(object):
    SCAN_STATUS_IDLE = 0
    SCAN_STATUS_RUNNING = 1
    SCAN_STATUS_FORCED = 2

    def __init__(self):
        self.scan_cache = []
        self.known_services = set()
        self.state = []
        self.chunks_uid = []

    @staticmethod
    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def get_scan_status():
        response = requests.get(settings.MOLLY_PREFIX + '/crasher/',
                                headers={'Authorization': settings.AUTH_TOKEN})
        json_resp = response.json()
        # last_updated = datetime.datetime.strptime(json_resp.get('last_updated'), '%Y-%m-%dT%H:%M:%S')
        # if last_updated < (datetime.datetime.now() - datetime.timedelta(days=2)):
        #     return 0
        return json_resp.get('status', 0)

    @staticmethod
    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def mark_crasher_scan_started():
        response = requests.put(settings.MOLLY_PREFIX + '/crasher/',
                                data={'status': 1},
                                headers={'Authorization': settings.AUTH_TOKEN})
        return response

    @staticmethod
    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def mark_crasher_scan_finished():
        response = requests.put(settings.MOLLY_PREFIX + '/crasher/',
                                data={'status': 0},
                                headers={'Authorization': settings.AUTH_TOKEN})
        return response

    @staticmethod
    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def get_molly_target(url):
        try:
            response = requests.get(settings.MOLLY_PREFIX + '/targets/', params={'url': url},
                                    headers={'Authorization': settings.AUTH_TOKEN})
            json_resp = response.json()
        except ValueError:
            return ''
        if not json_resp.get('targets') or not isinstance(json_resp.get('targets'), list):
            return ''
        return json_resp.get('targets')[0].get('name', '')

    @staticmethod
    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def run_molly(url, service, profile, resp, logon=True):
        if not resp:
            resp = []
        if logon:
            response = requests.post(settings.MOLLY_PREFIX + '/scan/',
                                     headers={'Authorization': settings.AUTH_TOKEN},
                                     data={'target_uri': url, 'target': service, 'profile': profile,
                                           'users': ','.join(resp), 'is_prod': True,
                                           'auth_profile': settings.DEFAULT_AUTH_PROFILE})
        else:
            response = requests.post(settings.MOLLY_PREFIX + '/scan/',
                                     headers={'Authorization': settings.AUTH_TOKEN},
                                     data={'target_uri': url, 'target': service, 'profile': profile,
                                           'is_prod': True, 'users': ','.join(resp)})

        return response

    @staticmethod
    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def get_host_resp(fqdn):
        response = requests.get('http://ro.admin.yandex-team.ru/api/get_host_resp.sbml', params={'hostname': fqdn})
        if response.status_code != 200 or not response.text:
            return []
        return filter(lambda x: x, map(lambda x: x.strip(), filter(lambda x: x, response.text.split(','))))

    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def pull_state(self):
        with open(settings.TARGETS_FILE, 'r') as fd:
            line = fd.readline()
            while line:
                profile = 'Crasher'
                try:
                    url, service = line.strip('\n').split(' ')
                    parsed_url = urlparse.urlparse(url)
                    if not parsed_url or not getattr(parsed_url, 'netloc'):
                        line = fd.readline()
                        continue
                except ValueError:
                    line = fd.readline()
                    continue
                else:
                    # get responsible people for a domain name from "golem.yandex-team.ru"
                    resp = self.get_host_resp(parsed_url.netloc)
                    tld = parsed_url.netloc.split('.')[-1]

                # don't scan www.* domains (this is 302 redirect)
                if parsed_url.netloc.split('.')[0] == 'www':
                    line = fd.readline()
                    continue

                # definitely not an domain
                if len(parsed_url.netloc) < 3:
                    line = fd.readline()
                    continue

                # already ran a scan for this domain.TLD
                cache_key = '://'.join([parsed_url.scheme, parsed_url.netloc])
                if cache_key in self.scan_cache:
                    line = fd.readline()
                    continue

                # pick lighter checks for cookie_less domain
                if tld in ['net','yandex','io','st']:
                    profile = 'Crasher_net'

                # try to group host by service/target
                molly_service = self.get_molly_target(url)
                if molly_service:
                    service = molly_service
                else:
                    service = 'CRASHER_' + service

                # do not scan some internal resources
                if any(mk.lower() in service for mk in settings.BLACKLIST):
                    line = fd.readline()
                    continue

                print(url, repr(service), profile, resp)

                self.state.append({'url': url, 'service': service, 'profile': profile, 'resp': resp})

                if tld != 'net':
                    no_tld_domain = '://'.join([parsed_url.scheme, '.'.join(parsed_url.netloc.split('.')[:-1])])
                    for dom in settings.STICKY_TLD:
                        self.scan_cache.append('.'.join([no_tld_domain, dom]))
                line = fd.readline()

        for i in range(0, len(self.state), settings.CHUNK_SIZE):
            chunk = self.state[i:i + settings.CHUNK_SIZE]
            requests.post(settings.MOLLY_PREFIX + '/crasher-state/',
                          headers={'Authorization': settings.AUTH_TOKEN}, files={'data': json.dumps(chunk)})

    @tenacity.retry(retry=tenacity.retry_if_exception_type(requests.exceptions.ConnectionError))
    def get_chunks_uid(self):
        response = requests.get(settings.MOLLY_PREFIX + '/crasher-state/',
                                headers={'Authorization': settings.AUTH_TOKEN})
        self.chunks_uid = response.json()

    # Do we need tenacity here?
    def do_scan(self):
        self.get_chunks_uid()
        while self.chunks_uid:
            for uid in self.chunks_uid:
                uid = uid.get('uid')
                response = requests.get(settings.MOLLY_PREFIX + '/crasher-state/' + uid + '/',
                                        headers={'Authorization': settings.AUTH_TOKEN})
                try:
                    data = response.json().get('data')
                    chunk = json.loads(data)
                    for target in chunk:
                        print(target.get('url'), target.get('service'), target.get('profile'),
                              target.get('resp', []), datetime.datetime.now())
                        self.run_molly(url=target.get('url'), service=target.get('service'),
                                       profile=target.get('profile'),
                                       resp=target.get('resp', []))
                    requests.delete(settings.MOLLY_PREFIX + '/crasher-state/' + uid + '/',
                                    headers={'Authorization': settings.AUTH_TOKEN})

                    response = requests.get(settings.MOLLY_PREFIX + '/scan/queue/',
                                                headers={'Authorization': settings.AUTH_TOKEN})
                    queue_len = response.json().get('in_progress')

                    # Check if number of scans in queue < settings.QUEUE_LEN_MAX
                    while queue_len >= settings.QUEUE_LEN_MAX:

                        # Check queue_len every settings.PAUSE_TIME seconds
                        time.sleep(settings.PAUSE_TIME)

                        response = requests.get(settings.MOLLY_PREFIX + '/scan/queue/',
                                                    headers={'Authorization': settings.AUTH_TOKEN})
                        queue_len = response.json().get('in_progress')

                except ValueError as e:
                    requests.delete(settings.MOLLY_PREFIX + '/crasher-state/' + uid + '/',
                                    headers={'Authorization': settings.AUTH_TOKEN})
                    print("Value error" + str(e.message))

            self.get_chunks_uid()


def main():
    crasher = Crasher()
    scan_status = crasher.get_scan_status()

    if scan_status == crasher.SCAN_STATUS_IDLE:
        exit(0)
    elif scan_status == crasher.SCAN_STATUS_RUNNING:
        crasher.do_scan()
        crasher.mark_crasher_scan_finished()
    elif scan_status == crasher.SCAN_STATUS_FORCED:
        crasher.pull_state()
        crasher.mark_crasher_scan_started()
        crasher.do_scan()
        crasher.mark_crasher_scan_finished()


if __name__ == '__main__':
    main()
