#!/usr/bin/env python

import ConfigParser
import argparse
import syslog
import socket
import urllib2
import time
from netaddr import IPNetwork, IPAddress
import datetime
import requests
import re

SECTION = 'find-targets'
TAG = "Crasher"
VHOSTS = {}


def message(txt):
    print (datetime.datetime.now().strftime('%d.%m.%Y %H:%M') + ' ' + txt)


def strip_dots(line):
    return line.strip('.').rstrip('.')


def parse_dnszone(filename):
    a_records = dict()
    virtual_hosts = dict()
    cnames = dict()
    fd = open(filename, 'rb')
    line = fd.readline()
    while line:

        line = {k: v.strip('"') for k, v in re.findall(r'(\S+)=(".*?"|\S+)', line)}

        if line['record_type'] == "A":
            if not virtual_hosts.get(line["record_data"]):
                virtual_hosts[line["record_data"]] = []
            virtual_hosts[line["record_data"]].append(line["name"])
            if not a_records.get(line["name"]):
                a_records[line["name"]] = []
            a_records[line["name"]].append(line["record_data"])

        if line['record_type'] == "CNAME":
            cnames[line['name']] = line['record_data']

        line = fd.readline()
    fd.close()

    for c, r in cnames.iteritems():
        z = a_records.get(r, [])
        for ip in z:
            if not virtual_hosts.get(ip):
                virtual_hosts[ip] = []
            virtual_hosts[ip].append(c)

    return virtual_hosts


def get_hostnames(ipv4):
    global VHOSTS
    result = []
    for x in VHOSTS.get(ipv4, []):
        result.append(x.rstrip('.').replace('*', 'crasher-test').strip('.'))
        result.append(x.rstrip('.').replace('*', 'm').strip('.'))
    return list(set(result))


def get_cgroup(host):
    try:
        r = requests.get('http://c.yandex-team.ru/api-cached/hosts2groups/%s' % host)
        if r.status_code == 404:
            return host
    except Exception as e:
        message(e.message)
        time.sleep(0.1)
        return host

    cgroups = r.text.split('\n')
    cgroups = filter(lambda x: x, cgroups)
    if not cgroups:
        return host

    for g in cgroups:
        g = g.strip()
        if not g:
            continue
        if 'prod' in g:
            return g
        if 'stable' in g:
            return g
    return cgroups[0].strip()


def process_ip_line(line, aggregate=False):
    global BLACKLIST_NETS

    result = []

    ipv4 = line.strip()
    if not ipv4:
        return result

    message('Processing line for %s' % ipv4)
    for net in BLACKLIST_NETS:
        if IPAddress(ipv4) in net:
            return result

    target_hostnames = []
    hostnames = get_hostnames(ipv4)

    if not hostnames:
        try:
            hostname, alias, addresslist = socket.gethostbyaddr(ipv4)
            hostnames.append(hostname)
        except Exception as e:
            message(e.message)

    if not hostnames:
        target_hostnames.append(ipv4)

    elif aggregate:
        for host in hostnames:
            if not host:
                continue
            target_hostnames.append((host, get_cgroup(host)))

    result.extend([{'url': 'https://' + t[0] + '/', 'target': t[1]} for t in target_hostnames])

    return result


def expand_macro(macro, only_v4=True):
    global GOLEM_URL
    data = ''
    try:
        req = urllib2.Request(GOLEM_URL + 'firewall/expand_macro.sbml?macro=%s' % macro)
        f = urllib2.urlopen(req)
        data = f.read()
        data = data.strip()
    except Exception as e:
        message(e.message)

    if 'ok' not in data:
        return []

    resolved = []
    result = data.split('ok ')[1]
    result = result.replace(' ', '')
    result = [r for r in result.split('or')]
    for item in result:
        try:
            hinfo = socket.gethostbyname_ex(item)
        except Exception as e:
            message(e.message)
            continue
        else:
            if not hinfo or len(hinfo) < 3:
                continue
            for h in hinfo[2]:
                resolved.append(h)

    if resolved:
        result = resolved
    if only_v4:
        result = filter(lambda x: ':' not in x, result)
    return result


def main():
    global VHOSTS, TARGET_FILE, ZONE_FILE, GOLEM_URL
    global IP_FILE, NEED_AGGREGATE, BLACKLIST_NETS
    syslog.openlog(TAG)
    syslog.syslog("Beginning regular process of finding targets")

    config_file = '/opt/config.ini'
    parser = argparse.ArgumentParser(
        description='Finder of web servers in YANDEXNETS')
    parser.add_argument('-c', '--config',
                        metavar='FILENAME',
                        dest='config',
                        help='Configuration file')

    args = parser.parse_args()

    if args.config:
        config_file = args.config

    config = ConfigParser.ConfigParser()
    config.readfp(open(config_file))

    TARGET_FILE = config.get(SECTION, 'target_file')
    ZONE_FILE = config.get(SECTION, 'zone_file')
    GOLEM_URL = config.get(SECTION, 'golem_api')
    IP_FILE = config.get(SECTION, 'ip_file')
    NEED_AGGREGATE = config.getboolean(SECTION, 'aggregate')
    BLACKLIST = config.get(SECTION, 'blacklist')

    BLACKLIST_NETS = []
    for item in BLACKLIST.split(','):
        if not item:
            continue
        if item.startswith('_'):
            BLACKLIST_NETS += [IPNetwork(net) for net in expand_macro(item)]
        else:
            hinfo = socket.gethostbyname_ex(item)
            if not hinfo or len(hinfo) < 3:
                continue
            BLACKLIST_NETS += [IPNetwork(net) for net in hinfo[2]]

    message('Parsing DNS zone file')

    VHOSTS = parse_dnszone(ZONE_FILE)

    message('Scanning all networks for web servers')


def read_zmap():
    global IP_FILE, TARGET_FILE, NEED_AGGREGATE

    zmap_file = open(IP_FILE, 'r')
    message('Processing zmap result file %s' % zmap_file.name)
    line = zmap_file.readline()
    target_file = open(TARGET_FILE, 'w+')
    while line:
        for r in process_ip_line(line, NEED_AGGREGATE):
            print (r.get('url', ''), r.get('target', ''))
            target_file.write(r.get('url', '') + ' ' + r.get('target', '') + '\n')
        line = zmap_file.readline()
    zmap_file.close()
    target_file.close()

    syslog.syslog("Finishing regular process of finding targets")


if __name__ == '__main__':
    main()
    read_zmap()
