# -*- coding: utf-8 -*-
# pylint: disable=too-many-locals,too-many-statements,too-many-lines
from __future__ import absolute_import, print_function, unicode_literals

import collections
import contextlib
import datetime
import errno
import hashlib
import itertools
import json
import operator
import monotonic
import os
import re
import socket
import tempfile
import time
import xml.etree.ElementTree as ET
from itertools import chain, islice

import tornado.httpclient
from tornado.netutil import errno_from_exception

import logging as logger
from infra.netmon.build_topology.lib.ipnetwork import IPNetwork, NetworkSet
import infra.netmon.build_topology.lib.url as urllib

UNKNOWN = "unknown"

BACKBONE = "backbone"
FASTBONE = "fastbone"

BOT_HOSTS_DATA_URL = "https://bot.yandex-team.ru/api/view.php?name=view_oops_hardware&format=json"
BOT_SWITCHES_URL = "https://bot.yandex-team.ru/api/view.php?name=view_noc_racktables&format=json"
MIN_BOT_HOSTS = 50000
MAX_INCOMPLETE_HOSTS = 37000  # 10000 limit + 27000 hosts from MAN
BOT_WALLE_HOST_COUNT_DIFF = 10000

RACKTABLES_NETMAP_IP_URL = "https://ro.racktables.yandex.net/export/netmap/L123r"
RACKTABLES_NETMAP_MAC_URL = "https://ro.racktables.yandex.net/export/netmap/L12.fvt"
RACKTABLES_NETWORKS_URL = "https://ro.racktables.yandex.net/export/networklist-perdc.txt"
RACKTABLES_BACKBONE_VRF_URL = "https://ro.racktables.yandex.net/export/networklist.php?report=vrf-bb"
RACKTABLES_FASTBONE_VRF_URL = "https://ro.racktables.yandex.net/export/networklist.php?report=vrf-fb"
RACKTABLES_SWITCHES_URL = "https://ro.racktables.yandex.net/export/net-layout.xml"
RACKTABLES_MACROS_URL = 'https://ro.racktables.yandex.net/export/nets-by-project.php'
RACKTABLES_MTN_URL = 'https://ro.racktables.yandex.net/export/vm-projects2.txt'

WALLE_HOSTS_URL = ("https://api.wall-e.yandex-team.ru/v1/hosts"
                   "?fields=inv,name,macs,status,state,location.short_datacenter_name,location.short_queue_name,"
                   "location.rack,location.switch,owners,project&strict=true&limit={0}&cursor={1}"
                   "&resolve_owners=1&include_shadow=1")
WALLE_PROJECT_URL = "https://api.wall-e.yandex-team.ru/v1/projects?fields=id,tags"
WALLE_TREE_URL = "https://api.wall-e.yandex-team.ru/v1/physical-location-tree"

ABC_SERVICES_URL = ("https://abc-back.yandex-team.ru/api/v4/services/"
                    "?format=json&page_size=1000&page={page}&fields=state,id")
ABC_MEMBERS_URL = ("https://abc-back.yandex-team.ru/api/v4/services/members/"
                   "?service__in={0}&role__scope_in=administration,services_management"
                   "&format=json&page_size=500&fields=person.login,service.id")

STAFF_PERSONS_URL = ("https://staff-api.yandex-team.ru/v3/persons"
                     "?_fields=id,login,groups.group.url,department_group.ancestors.url,department_group.url"
                     "&_sort=id&_limit=500&_query=id>{min_id}")

CONDUCTOR_PROJECTS_URL = "https://c.yandex-team.ru/api-cached/projects?format=json"
CONDUCTOR_HOSTS_URL = "https://c.yandex-team.ru/api-cached/projects2hosts/{0}?format=json"

NETMON_REPORT_URL = "https://netmon.yandex-team.ru/api/v1/agent_report"

NOC_EXPORT_PODS_URL = "https://noc-export.yandex.net/rt/l3tor-pod-nums.json"

MTN_BACKBONE_BLOCK = IPNetwork('2a02:6b8:c00::/40').to_range()
MTN_FASTBONE_BLOCK = IPNetwork('2a02:6b8:fc00::/48').to_range()


CACHE_DIR = 'http_cache'
USE_CACHE = True


def chunks(iterable, chunk_size=1000):
    """Split ``iterable`` into chunks containing ``chunk_size`` elements."""
    iterable = iter(iterable)
    while True:
        yield list(chain([next(iterable)], islice(iterable, chunk_size - 1)))


class Stopwatch(object):
    def __init__(self):
        self.__start = monotonic.monotonic()

    def get(self, reset=True):
        now = monotonic.monotonic()
        delta = now - self.__start
        if reset:
            self.__start = now
        return delta


class Interface(object):

    __slots__ = (
        "name", "domain", "fqdn",
        "hwaddr", "switch",
        "ipv4addr", "ipv4addr_timestamp", "ipv6addr", "ipv6addr_timestamp",
        "vlan", "vrf", "network_type", "macro", "project_id",
        "network_timestamp",
    )

    def __init__(self):
        self.name = None
        self.domain = None
        self.fqdn = None

        self.hwaddr = None
        self.switch = None

        self.ipv4addr = None
        self.ipv4addr_timestamp = 0
        self.ipv6addr = None
        self.ipv6addr_timestamp = 0

        self.vlan = 0
        self.vrf = None
        self.network_type = None
        self.macro = None
        self.project_id = None

        self.network_timestamp = 0

    def set_fqdn(self, fqdn):
        self.name, domain = fqdn.split(".", 1)
        self.domain = "." + domain
        self.fqdn = fqdn

    def update_from(self, entry, switch_name):
        if entry.ipaddr_family == socket.AF_INET6 and entry.timestamp >= self.ipv6addr_timestamp:
            self.ipv6addr = entry.ipaddr
            self.ipv6addr_timestamp = entry.timestamp
        elif entry.ipaddr_family == socket.AF_INET and entry.timestamp >= self.ipv4addr_timestamp:
            self.ipv4addr = entry.ipaddr
            self.ipv4addr_timestamp = entry.timestamp

        if self.switch is None:
            self.switch = switch_name
        if self.hwaddr is None:
            self.hwaddr = entry.hwaddr

        if entry.timestamp >= self.network_timestamp:
            self.network_type = entry.network_type
            if entry.vlan:
                self.vlan = entry.vlan
            if entry.vrf:
                self.vrf = entry.vrf
            if entry.macro:
                self.macro = entry.macro
            if entry.project_id:
                self.project_id = entry.project_id
            self.network_timestamp = entry.timestamp

    def to_dict(self):
        assert (
            self.name is not None
            and self.domain is not None
            and self.fqdn is not None
            and self.switch is not None
            and (self.ipv6addr is not None or self.ipv4addr is not None)
        ), "not all fields filled properly: {!r}".format(self)
        return {
            "switch": self.switch,
            "name": self.name,
            "domain": self.domain,
            "fqdn": self.fqdn,
            "hwaddr": _mac_to_str(self.hwaddr),
            "ipv4addr": self.ipv4addr or UNKNOWN,
            "ipv6addr": self.ipv6addr or UNKNOWN,
            "network_type": self.network_type or UNKNOWN,
            "vrf": self.vrf or UNKNOWN,
            "vlan": self.vlan,
            "macro": self.macro or UNKNOWN,
            "project_id": self.project_id or UNKNOWN
        }

    def __repr__(self):
        return (
            "<Address(fqdn={!r}, switch={!r}, hwaddr={!r}, ipv4addr={!r}, ipv6addr={!r},"
            " vlan={!r}, vrf={!r}, network_type={!r}, macro={!r})>"
        ).format(
            self.fqdn, self.switch, self.hwaddr, self.ipv4addr, self.ipv6addr,
            self.vlan, self.vrf, self.network_type, self.macro
        )


class Host(object):

    __slots__ = (
        "name", "domain", "fqdn",
        "hwaddrs", "invnum", "bot_location", "planner_id", "owners",
        "dc", "queue", "pod", "rack", "switch", "hwaddrs",
        "ipv4addr", "ipv4addr_timestamp", "ipv6addr", "ipv6addr_timestamp",
        "vlan", "vrf", "network_timestamp",
        "walle_project", "walle_tags",
        "interfaces", "children", "netmap_entries",
        "short_names"
    )

    def __init__(self):
        self.name = None
        self.short_names = ()
        self.domain = None
        self.fqdn = None

        self.hwaddrs = ()
        self.invnum = 0
        self.bot_location = ()
        self.planner_id = 0
        self.owners = []

        self.dc = None
        self.queue = None
        self.pod = None
        self.rack = None

        self.switch = None

        self.ipv4addr = None
        self.ipv4addr_timestamp = 0
        self.ipv6addr = None
        self.ipv6addr_timestamp = 0

        self.vlan = 0
        self.vrf = None
        self.network_timestamp = 0

        self.walle_project = None
        self.walle_tags = []

        self.interfaces = []
        self.children = []

        self.netmap_entries = []

    def set_fqdn(self, fqdn):
        self.name, domain = fqdn.split(".", 1)
        self.domain = "." + domain
        self.fqdn = fqdn

    def create_with_fqdn(self, fqdn):
        host = Host()
        host.set_fqdn(fqdn)
        host.dc = self.dc
        host.queue = self.queue
        host.pod = self.pod
        host.rack = self.rack
        host.switch = self.switch
        host.bot_location = self.bot_location
        host.owners = self.owners
        return host

    def update_from(self, entry):
        if entry.ipaddr_family == socket.AF_INET6 and entry.timestamp >= self.ipv6addr_timestamp:
            self.ipv6addr = entry.ipaddr
            self.ipv6addr_timestamp = entry.timestamp
        elif entry.ipaddr_family == socket.AF_INET and entry.timestamp >= self.ipv4addr_timestamp:
            self.ipv4addr = entry.ipaddr
            self.ipv4addr_timestamp = entry.timestamp

        if entry.timestamp >= self.network_timestamp:
            if entry.vlan:
                self.vlan = entry.vlan
            if entry.vrf:
                self.vrf = entry.vrf
            self.network_timestamp = entry.timestamp

    def valid(self):
        return (
            self.name is not None
            and self.domain is not None
            and self.fqdn is not None
            and self.dc is not None
            and self.queue is not None
            and self.switch is not None
        )

    def to_dict(self):
        assert self.valid(), "not all fields filled properly: {!r}".format(self)
        return {
            "name": self.name,
            "domain": self.domain,
            "fqdn": self.fqdn,
            "hwaddrs": [_mac_to_str(x) for x in self.hwaddrs],
            "invnum": unicode(self.invnum) if self.invnum else "",
            "owners": self.owners,
            "dc": self.dc,
            "queue": self.queue,
            "pod": self.pod or UNKNOWN,
            "rack": self.rack or UNKNOWN,
            "switch": self.switch,
            "ipv4addr": self.ipv4addr or UNKNOWN,
            "ipv6addr": self.ipv6addr or UNKNOWN,
            "vlan": self.vlan,
            "vrf": self.vrf or UNKNOWN,
            "walle_project": self.walle_project or UNKNOWN,
            "walle_tags": self.walle_tags,
            "interfaces": [x.to_dict() for x in self.interfaces],
            "children": self.children,
            "bot_short_names": self.short_names
        }

    def __eq__(self, other):
        return self.fqdn == other.fqdn

    def __hash__(self):
        return hash(self.fqdn)

    def __repr__(self):
        return (
            "<Host(fqdn={!r}, invnum={!r}, dc={!r}, queue={!r}, rack={!r}, switch={!r},"
            " hwaddrs={!r} ipv4addr={!r}, ipv6addr={!r}, vlan={!r}, vrf={!r}, interfaces={!r})>"
        ).format(
            self.fqdn, self.invnum, self.dc, self.queue, self.rack, self.switch,
            self.hwaddrs, self.ipv4addr, self.ipv6addr, self.vlan, self.vrf, self.interfaces
        )


def _mac_to_int(s):
    return int(s.replace(".", "").replace(":", ""), 16)


def _mac_to_str(i):
    return unicode(hex(i)[2:].upper())


def _maybe_int(s):
    try:
        return int(s)
    except (ValueError, TypeError):
        return None


def _download_into_file(file_name, url, timeout, retry_attempts=3, retry_timeout=5, **kwargs):
    for attempt in xrange(retry_attempts):
        try:
            with open(file_name, 'wb') as f:
                urllib.makeRequest(url, timeout=timeout, streaming_callback=f.write, **kwargs)
            return
        except Exception as exc:
            logger.error("Request to %s failed: %s", url, exc)
            retriable_errno = (errno.ECONNRESET, errno.ECONNREFUSED, errno.ETIMEDOUT, errno.ENETUNREACH)
            nonfatal = (isinstance(exc, tornado.httpclient.HTTPError) and (exc.code == 429 or exc.code >= 500) or
                        errno_from_exception(exc) in retriable_errno or
                        # Tornado sometimes raises _QuietException on ECONNRESET
                        type(exc).__name__ == '_QuietException')
            if nonfatal and attempt < retry_attempts - 1:
                logger.exception('Caught exception, retrying request')
                time.sleep(retry_timeout)
            else:
                raise
    assert False, "Shouldn't get here"


@contextlib.contextmanager
def _file_based_request(url, timeout=300, **kwargs):
    logger.info("Requesting %s", url)

    if USE_CACHE:
        request_hash = hashlib.md5(datetime.date.today().isoformat() + url).hexdigest()
        cache_file_name = os.path.join(CACHE_DIR, request_hash + '.dump')

        if not os.path.exists(cache_file_name):
            cache_file_name_tmp = cache_file_name + '.tmp'
            _download_into_file(cache_file_name_tmp, url, timeout, **kwargs)
            os.rename(cache_file_name_tmp, cache_file_name)
            logger.info("Cached into %s", cache_file_name)
        else:
            logger.info("Using cached file %s", cache_file_name)

        with open(cache_file_name, 'rb') as f:
            yield f
    else:
        with tempfile.TemporaryFile() as stream:
            urllib.makeRequest(url, timeout=timeout, streaming_callback=stream.write, **kwargs)
            stream.seek(0)
            yield stream


def _tools_based_request(url, **kwargs):
    headers = {"Authorization": "OAuth {}".format(os.environ["STAFF_OAUTH_TOKEN"])}
    page_url = url
    while True:
        with _file_based_request(page_url, headers=headers, **kwargs) as stream:
            response = json.load(stream)
            yield response
            if not response["next"]:
                return
            page_url = response["next"]


def _staff_based_request(url, **kwargs):
    headers = {"Authorization": "OAuth {}".format(os.environ["STAFF_OAUTH_TOKEN"])}
    last_id = -1
    while True:
        with _file_based_request(url.format(min_id=last_id), headers=headers, **kwargs) as stream:
            reply = json.load(stream)
            if not reply['result']:
                break
            for row in reply["result"]:
                yield row
            last_id = reply['result'][-1]['id']


def _abc_based_request(url, **kwargs):
    for reply in _tools_based_request(url, **kwargs):
        for row in reply["results"]:
            yield row
        if not reply["next"]:
            break


def _abc_load_roles():
    found_service_ids = []
    for service in _abc_based_request(ABC_SERVICES_URL):
        if service["state"] not in ("deleted", "closed"):
            found_service_ids.append(service["id"])

    for chunk in chunks(found_service_ids, 100):
        for person in _abc_based_request(ABC_MEMBERS_URL.format(
                ",".join(str(service_id) for service_id in chunk))):
            yield (person["service"]["id"], person["person"]["login"])


def _staff_load_persons():
    for person in _staff_based_request(STAFF_PERSONS_URL):
        group_set = set()
        for group in person.get("groups", []):
            if "url" in group["group"]:  # sometimes there's no url for w/e reason
                group_set.add(group["group"]["url"])

        group_set.add(person["department_group"]["url"])

        for department in person["department_group"]["ancestors"]:
            group_set.add(department["url"])

        for group in group_set:
            yield (person["login"], group)


def _bot_load_hosts():
    with _file_based_request(BOT_HOSTS_DATA_URL) as stream:
        for row in json.load(stream):
            yield row


def _bot_load_switches():
    with _file_based_request(BOT_SWITCHES_URL) as stream:
        for row in json.load(stream):
            yield row


def _walle_load_hosts(limit=1000):
    cursor = 0
    while True:
        with _file_based_request(WALLE_HOSTS_URL.format(limit, cursor)) as stream:
            result = json.load(stream)
        for row in result["result"]:
            cursor = max(cursor, row["inv"]) + 1
            yield row
        if len(result["result"]) < limit:
            break


def _walle_load_projects():
    with _file_based_request(WALLE_PROJECT_URL) as stream:
        return json.load(stream)["result"]


def _walle_load_tree():
    with _file_based_request(WALLE_TREE_URL) as stream:
        return json.load(stream)["result"]


def _racktables_request(url, separator, **kwargs):
    with _file_based_request(url, **kwargs) as stream:
        is_empty = True
        for line in stream:
            if line:
                is_empty = False
                yield line.decode("utf-8").strip().split(separator)
        if is_empty:
            raise Exception("racktables return empty response to {}".format(url))


def _racktables_load_networks():
    for network_address, vlan, _, network_type in _racktables_request(RACKTABLES_NETWORKS_URL, separator="\t"):
        yield network_address, vlan, network_type


def _racktables_load_vrfs():
    for x in itertools.chain(
            _racktables_request(RACKTABLES_BACKBONE_VRF_URL, separator="\t"),
            _racktables_request(RACKTABLES_FASTBONE_VRF_URL, separator="\t")):
        yield x


def _racktables_load_switches():
    with _file_based_request(RACKTABLES_SWITCHES_URL) as stream:
        tree = ET.parse(stream)
    for node in tree.findall(".//switch"):
        if "inventory" in node.attrib and "name" in node.attrib:
            invnum = int(node.attrib["inventory"])
            name = node.attrib["name"]
            yield invnum, name


def _racktables_load_macros():
    for row in _racktables_request(RACKTABLES_MACROS_URL, separator="\t"):
        if row[0] == "CIDR":
            continue
        yield row[1], row[0]


def _racktables_load_mtn():
    for macro, project_id, _ in _racktables_request(RACKTABLES_MTN_URL, separator="\t"):
        yield macro, project_id


def _netmap_load_ip():
    for parts in _racktables_request(RACKTABLES_NETMAP_IP_URL, separator=" "):
        if len(parts) != 5 or not parts[-1]:
            continue
        yield parts


def _netmap_load_mac():
    for parts in _racktables_request(RACKTABLES_NETMAP_MAC_URL, separator=" "):
        hwaddr, _, location, timestamp = parts
        switch_name, _, port = location.partition("/")
        timestamp = int(timestamp)
        yield hwaddr, switch_name, port, timestamp


def _conductor_load_projects():
    with _file_based_request(CONDUCTOR_PROJECTS_URL) as stream:
        for project in json.load(stream):
            if project["abc_service_id"]:
                yield project["name"], project["abc_service_id"]


def _conductor_load_hosts():
    for project_name, planner_id in _conductor_load_projects():
        try:
            with _file_based_request(CONDUCTOR_HOSTS_URL.format(project_name)) as stream:
                for host in json.load(stream):
                    yield host["fqdn"], planner_id
        except tornado.httpclient.HTTPError as exc:
            if exc.code != 404:
                raise


def _netmon_load_interfaces():
    with _file_based_request(NETMON_REPORT_URL) as stream:
        for info in json.load(stream)["agents"]:
            for interface in info["interfaces"]:
                if interface["mac"]:
                    yield (
                        interface["fqdn"],
                        interface["address"],
                        interface["mac"],
                        int(info["generated"])
                    )


def _noc_export_load_pods():
    with _file_based_request(NOC_EXPORT_PODS_URL) as stream:
        for switch, pod_info in json.load(stream).iteritems():
            yield switch, pod_info


class ToolsStorage(object):

    def __init__(self):
        self._users_by_project = {}
        for planner_id, login in _abc_load_roles():
            self._users_by_project.setdefault(planner_id, []).append(login)

        self._users_by_group = {}
        for login, group in _staff_load_persons():
            self._users_by_group.setdefault(group, []).append(login)

    def get_users_by_planner_id(self, planner_id):
        return self._users_by_project.get(planner_id, [])

    def get_users_by_group(self, group):
        return self._users_by_group.get(group, [])


class BotStorage(object):

    Switch = collections.namedtuple("BotSwitch", (
        "name",
        "invnum",
        "bot_location"
    ))

    def __init__(self):
        self._hosts = tuple(self._parse_hosts())

        self._macs_to_hosts = {}
        for host in self._hosts:
            for mac in host.hwaddrs:
                if mac in self._macs_to_hosts:
                    logger.warning("Hosts %r and %r have same mac %r", host, self._macs_to_hosts[mac], _mac_to_str(mac))
                self._macs_to_hosts[mac] = host

        self._invnum_to_switches = {}
        for switch in self._parse_switches():
            if switch.invnum in self._invnum_to_switches:
                logger.warning("Switches %r and %r have same invnum", switch, self._invnum_to_switches[switch.invnum])
            self._invnum_to_switches[switch.invnum] = switch

    @staticmethod
    def _extract(d, fields):
        return tuple(d[field] for field in fields if d[field])

    @classmethod
    def _extract_macs(cls, d):
        ex_macs_raw = d.get("ExMACs")
        ex_macs = ex_macs_raw.split(",") if ex_macs_raw else ()
        return tuple(itertools.chain(
            (_mac_to_int(mac) for mac in cls._extract(d, ("MAC{}".format(idx) for idx in xrange(1, 5)))),
            (_mac_to_int(mac) for mac in ex_macs if mac)
        ))

    @classmethod
    def _extract_location(cls, d):
        return cls._extract(d, ("LocationSegment{}".format(x) for x in xrange(1, 6)))

    @classmethod
    def _parse_hosts(cls):
        for row in _bot_load_hosts():
            if (row["Status"] == "OPERATION" or row["Status"].startswith("SETUP_")) and row["FQDN"] and "." in row["FQDN"]:
                host = Host()
                host.set_fqdn(row["FQDN"])
                host.hwaddrs = cls._extract_macs(row)
                host.invnum = int(row["Inv"])
                host.bot_location = cls._extract_location(row)
                host.planner_id = _maybe_int(row["planner_id"]) if row["planner_id"] else None
                yield host

    @classmethod
    def _parse_switches(cls):
        for row in _bot_load_switches():
            yield cls.Switch(
                name=row["FQDN"].split(".", 1)[0] if row["FQDN"] else None,
                invnum=int(row["InstanceNumber"]),
                bot_location=cls._extract_location(row)
            )

    def find_host_by_mac(self, mac):
        return self._macs_to_hosts.get(mac)

    def find_switch_by_invnum(self, invnum):
        return self._invnum_to_switches.get(invnum)

    def find_all_switches(self):
        return self._invnum_to_switches.itervalues()

    def host_count(self):
        return len(self._hosts)

    def __iter__(self):
        return iter(self._hosts)


class ConductorStorage(object):

    def __init__(self):
        self._planner_id_by_fqdn = {fqdn: planner_id for fqdn, planner_id in _conductor_load_hosts()}

    def find_planner_id_by_fqdn(self, fqdn):
        return self._planner_id_by_fqdn.get(fqdn, 0)


class PodStorage(object):

    def __init__(self):
        self._pod_by_switch = {
            switch: "{}-{}".format(pod_info["domain"], pod_info["pod"])
            for switch, pod_info in _noc_export_load_pods()
        }

    def find_pod_by_switch(self, switch):
        return self._pod_by_switch.get(switch)


class WalleStorage(object):

    WalleHost = collections.namedtuple("WalleHost", (
        "fqdn", "hwaddrs", "invnum",
        "dc", "queue", "rack", "switch",
        "owners", "project", "tags"
    ))

    def __init__(self):
        project_tags = {project_id: tags for project_id, tags in self._parse_projects_tags()}
        self._hosts = tuple(self._parse_hosts(project_tags))
        tree_index, group_names = self._parse_tree()
        self._physical_tree_index = dict(tree_index)
        self._group_names = group_names

        self._macs_to_hosts = {}
        self._invnum_to_hosts = {}
        for host in self._hosts:
            for mac in host.hwaddrs:
                if mac in self._macs_to_hosts:
                    logger.warning("Hosts %r and %r have same mac %r", host, self._macs_to_hosts[mac], _mac_to_str(mac))
                self._macs_to_hosts[mac] = host

            if host.invnum in self._invnum_to_hosts:
                logger.warning("Hosts %r and %r have same invnum", host, self._invnum_to_hosts[host.invnum])
            self._invnum_to_hosts[host.invnum] = host

    @classmethod
    def _parse_projects_tags(cls):
        for row in _walle_load_projects():
            yield row["id"], row.get("tags", [])

    @classmethod
    def _parse_hosts(cls, project_tags):
        for row in _walle_load_hosts():
            if "name" in row and "location" in row:
                location = row["location"]
                try:
                    dc = location["short_datacenter_name"]
                    queue = location["short_queue_name"]
                    rack = location.get("rack")
                    switch = location["switch"]
                except KeyError:
                    pass
                else:
                    yield cls.WalleHost(
                        fqdn=row["name"],
                        hwaddrs=tuple(_mac_to_int(x) for x in row["macs"]) if "macs" in row else (),
                        invnum=row["inv"],
                        dc=dc,
                        queue=queue,
                        rack=rack,
                        switch=switch,
                        owners=row["owners"],
                        project=row["project"],
                        tags=project_tags[row["project"]]
                    )

    @staticmethod
    def _parse_tree():
        indexes = collections.defaultdict(dict)
        group_names = {}
        level_names = ("country", "city", "datacenter", "queue", "rack")

        def traverse(component, level=0, parents_short_names=()):
            path = tuple(component["path"].split("|"))
            short_name = component.get("short_name")
            name = short_name if "short_name" in component else component["name"].lower()
            level_name = level_names[level]
            indexes[level_name][path] = name

            seen_short_names = collections.defaultdict(set)
            if short_name:
                for i_lvl_name, names_set in seen_short_names.iteritems():
                    if short_name in names_set:
                        logger.error(
                            "Name %s is not unique, it is in %s at %s and %s",
                            short_name,
                            level_name, '->'.join(parents_short_names), i_lvl_name
                        )
                seen_short_names[level_name].add(short_name)

            if short_name:
                parents_short_names = parents_short_names + (short_name, )

            for subcomponent in component.get("nodes", ()):
                traverse(subcomponent, level + 1, parents_short_names)

            if level_name == "queue" and parents_short_names:
                group_names[path] = parents_short_names

        for component in _walle_load_tree():
            traverse(component)

        return indexes, group_names

    def find_host_by_mac(self, mac):
        return self._macs_to_hosts.get(mac)

    def find_host_by_invnum(self, invnum):
        return self._invnum_to_hosts.get(invnum)

    def find_datacenter_name(self, bot_location):
        return self._physical_tree_index["datacenter"].get(bot_location[:3])

    def find_queue_name(self, bot_location):
        return self._physical_tree_index["queue"].get(bot_location[:4])

    def find_rack_name(self, bot_location):
        return self._physical_tree_index["rack"].get(bot_location[:5])

    def get_all_names(self, bot_queue_loc):
        return self._group_names.get(bot_queue_loc, ())

    def host_count(self):
        return len(self._hosts)


class NetworkParser(object):

    def __init__(self, macros_set, network_address):
        self._macros_set = macros_set
        self._network = IPNetwork(network_address)

    def find_macro(self):
        macro = self._macros_set.get_by_int(self._network.first)
        if macro is not None and macro.network.first <= self._network.first <= self._network.last <= macro.network.last:
            return macro.name
        else:
            return None

    def is_mtn(self):
        return (
            self._network.version == 6
            and (
                MTN_BACKBONE_BLOCK[0] <= self._network.first <= self._network.last <= MTN_BACKBONE_BLOCK[1]
                or MTN_FASTBONE_BLOCK[0] <= self._network.first <= self._network.last <= MTN_FASTBONE_BLOCK[1]
            )
        )


class RacktablesStorage(object):

    Macro = collections.namedtuple("Macro", (
        "name",
        "network"
    ))

    Switch = collections.namedtuple("RacktablesSwitch", (
        "name",
        "invnum",
        "bot_location"
    ))

    Network = collections.namedtuple("RacktablesNetwork", (
        "vlan",
        "vrf",
        "network_type",
        "macro",
        "mtn"
    ))

    EMPTY_NETWORK = Network(0, None, None, None, False)

    def __init__(self, bot_storage):
        self._vlans, self._vrfs = self._create_indexes()
        self._switches = {x.name: x for x in self._parse_switches(bot_storage)}
        self._mtn_projects = {project_id: macro for macro, project_id in _racktables_load_mtn()}

    @classmethod
    def _create_indexes(cls):
        macros_set = NetworkSet((network, cls.Macro(
            name=macro,
            network=IPNetwork(network)
        )) for macro, network in _racktables_load_macros())

        vlan_list = []
        for network_address, vlan, network_type in _racktables_load_networks():
            network_parser = NetworkParser(macros_set, network_address)
            vlan_list.append((network_address, cls.Network(
                vlan=int(vlan),
                vrf=None,
                network_type=network_type,
                macro=network_parser.find_macro(),
                mtn=network_parser.is_mtn()
            )))

        vrf_list = []
        for network_address, vrf in _racktables_load_vrfs():
            vrf_list.append((network_address, cls.Network(
                vlan=0,
                vrf=vrf,
                network_type=None,
                macro=None,
                mtn=False
            )))

        return (
            NetworkSet(vlan_list),
            NetworkSet(vrf_list)
        )

    @classmethod
    def _parse_switches(cls, bot_storage):
        known_invnum = set()
        for invnum, name in _racktables_load_switches():
            known_invnum.add(invnum)
            bot_info = bot_storage.find_switch_by_invnum(invnum)
            if bot_info is not None:
                yield cls.Switch(
                    name=name,
                    invnum=invnum,
                    bot_location=bot_info.bot_location
                )

        for bot_info in bot_storage.find_all_switches():
            if bot_info.invnum not in known_invnum and bot_info.name is not None:
                yield cls.Switch(
                    name=bot_info.name,
                    invnum=bot_info.invnum,
                    bot_location=bot_info.bot_location
                )

    def find_network_by_addr(self, addr):
        vlan, vrf, network_type, macro, mtn = self.EMPTY_NETWORK
        vlan_result = self._vlans.get(addr)
        if vlan_result is not None:
            vlan = vlan_result.vlan
            network_type = vlan_result.network_type
            macro = vlan_result.macro
            mtn = vlan_result.mtn
        vrf_result = self._vrfs.get(addr)
        if vrf_result is not None:
            vrf = vrf_result.vrf
        return self.Network(vlan, vrf, network_type, macro, mtn)

    def find_switch_by_name(self, name):
        return self._switches.get(name)

    def find_macro_by_project_id(self, project_id):
        return self._mtn_projects.get(project_id)


class NetmonStorage(object):

    Interface = collections.namedtuple("Interface", (
        "name", "domain", "fqdn",
        "hwaddr", "ipaddr", "ipaddr_family",
        "network_type", "vlan", "vrf",
        "macro", "project_id", "timestamp"
    ))

    def __init__(self, racktables_storage):
        self._interfaces_by_hwaddr = {}
        for fqdn, ipaddr, raw_hwaddr, timestamp in _netmon_load_interfaces():
            try:
                name, domain = fqdn.split(".", 1)
            except ValueError:
                continue
            hwaddr = _mac_to_int(raw_hwaddr)
            interfaces = self._interfaces_by_hwaddr.setdefault(hwaddr, [])
            network_data = racktables_storage.find_network_by_addr(ipaddr)
            project_id = IPNetwork.extract_mtn_project(ipaddr) if network_data.mtn else None
            interfaces.append(self.Interface(
                name=name,
                domain=domain,
                fqdn=fqdn,
                ipaddr=ipaddr,
                ipaddr_family=IPNetwork.get_addr_family(ipaddr),
                hwaddr=hwaddr,
                network_type=network_data.network_type,
                vlan=network_data.vlan,
                vrf=network_data.vrf,
                macro=racktables_storage.find_macro_by_project_id(project_id) if project_id else network_data.macro,
                project_id=project_id,
                timestamp=timestamp
            ))

    def find_interfaces_by_hwaddr(self, hwaddr):
        return self._interfaces_by_hwaddr.get(hwaddr, ())


class NetmapStorage(object):

    HwaddrInfo = collections.namedtuple("HwaddrInfo", (
        "hwaddr",
        "switch_name",
        "port",
        "timestamp"
    ))

    Entry = collections.namedtuple("NetmapEntry", (
        "name", "domain", "fqdn",
        "hwaddr", "switch_name", "port",
        "ipaddr", "ipaddr_family",
        "network_type", "vlan", "vrf",
        "macro", "project_id",
        "identifier", "timestamp"
    ))

    def __init__(self, racktables_storage):
        self._hwaddrs = self._parse_mac()
        self._hwaddrs_by_port = collections.defaultdict(list)
        for hwaddr_info in self._hwaddrs.itervalues():
            self._hwaddrs_by_port[(hwaddr_info.switch_name, hwaddr_info.port)].append(hwaddr_info)

        self._entries = {x.identifier: x for x in self._parse_ip(racktables_storage)}

        self._entries_by_port = collections.defaultdict(list)
        self._entries_by_fqdn = collections.defaultdict(list)
        self._entries_by_mac = collections.defaultdict(list)
        for entry in self._entries.itervalues():
            self._entries_by_port[(entry.switch_name, entry.port)].append(entry)
            self._entries_by_fqdn[entry.fqdn].append(entry)
            self._entries_by_mac[entry.hwaddr].append(entry)

    @classmethod
    def _parse_mac(cls):
        hwaddr_map = {}
        for hwaddr, switch_name, port, timestamp in _netmap_load_mac():
            hwaddr = _mac_to_int(hwaddr)
            hwaddr_map[hwaddr] = cls.HwaddrInfo(
                hwaddr=hwaddr,
                switch_name=switch_name,
                port=port,
                timestamp=timestamp
            )
        return hwaddr_map

    def _parse_ip(self, racktables_storage):
        for identifier, parts in enumerate(_netmap_load_ip()):
            ipaddr = parts[3]
            network_data = racktables_storage.find_network_by_addr(ipaddr)
            name, domain = parts[4].split(".", 1)
            hwaddr = _mac_to_int(parts[2])
            hwaddr_info = self._hwaddrs.get(hwaddr)
            project_id = IPNetwork.extract_mtn_project(ipaddr) if network_data.mtn else None
            entry = self.Entry(
                name=name,
                domain=domain,
                fqdn=parts[4],
                hwaddr=hwaddr,
                switch_name=parts[0],
                port=parts[1],
                ipaddr=parts[3],
                ipaddr_family=IPNetwork.get_addr_family(parts[3]),
                network_type=network_data.network_type,
                vlan=network_data.vlan,
                vrf=network_data.vrf,
                macro=racktables_storage.find_macro_by_project_id(project_id) if project_id else network_data.macro,
                project_id=project_id,
                identifier=identifier,
                timestamp=hwaddr_info.timestamp if hwaddr_info is not None else 0
            )
            if entry.name and entry.domain and not entry.domain.endswith(".dhcp.yndx.net"):
                yield entry

    def mark_as_seen(self, entry):
        self._entries.pop(entry.identifier)

    def find_by_port(self, switch_name, port):
        return self._entries_by_port[(switch_name, port)]

    def find_by_fqdn(self, fqdn):
        return self._entries_by_fqdn[fqdn]

    def find_by_mac(self, mac):
        return self._entries_by_fqdn[mac]

    def switch_by_mac(self, hwaddr):
        hwaddr_info = self._hwaddrs.get(hwaddr)
        return hwaddr_info.switch_name if hwaddr_info is not None else None

    def macs_by_port(self, switch_name, port):
        return (hwaddr_info.hwaddr for hwaddr_info in self._hwaddrs_by_port[(switch_name, port)])

    def __iter__(self):
        return iter(tuple(self._entries.itervalues()))


class InterfaceBuilder(object):

    def __init__(self, host):
        self._host = host
        self._interface_map = {}

    def parse(self, entry, switch_name, update_host=True):
        if entry.fqdn not in self._interface_map:
            iface = self._interface_map[entry.fqdn] = Interface()
            iface.set_fqdn(entry.fqdn)
        else:
            iface = self._interface_map[entry.fqdn]

        # An interface may have multiple ipv6 addrs with and without project_id.
        # If we've already found an addr with project_id, don't overwrite it with zero.
        has_project_id = lambda iface: iface.project_id and iface.project_id != '0'
        if not(has_project_id(iface) and not has_project_id(entry) and entry.ipaddr_family == socket.AF_INET6):
            iface.update_from(entry, switch_name)

        if iface.network_type == BACKBONE and update_host:
            self._host.update_from(entry)

    def finish(self):
        self._host.interfaces = sorted(
            self._interface_map.itervalues(),
            key=operator.attrgetter("network_type", "fqdn")
        )


class TopologyContext(object):
    """Simply store anything needed to build topology."""

    def __init__(self):
        timer = Stopwatch()

        if USE_CACHE:
            try:
                os.mkdir(CACHE_DIR)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise

        # first of all load data from external sources and build needed indexes
        self.bot_storage = BotStorage()
        logger.info("BotStorage created in %.3f seconds", timer.get())

        # check that there are enough hosts
        if self.bot_storage.host_count() < MIN_BOT_HOSTS:
            raise RuntimeError("Something wrong with BOT, only {} hosts found".format(
                self.bot_storage.host_count()
            ))

        self.tools_storage = ToolsStorage()
        logger.info("ToolsStorage created in %.3f seconds", timer.get())

        self.conductor_storage = ConductorStorage()
        logger.info("ConductorStorage created in %.3f seconds", timer.get())

        self.pod_storage = PodStorage()
        logger.info("PodStorage created in %.3f seconds", timer.get())

        self.walle_storage = WalleStorage()
        logger.info("WalleStorage created in %.3f seconds", timer.get())

        if self.bot_storage.host_count() + BOT_WALLE_HOST_COUNT_DIFF < self.walle_storage.host_count():
            raise RuntimeError("Something wrong with BOT or Wall-E, {} hosts in BOT, {} hosts in Wall-E".format(
                self.bot_storage.host_count(), self.walle_storage.host_count()
            ))

        self.racktables_storage = RacktablesStorage(self.bot_storage)
        logger.info("RacktablesStorage created in %.3f seconds", timer.get())

        self.netmap_storage = NetmapStorage(self.racktables_storage)
        logger.info("NetmapStorage created in %.3f seconds", timer.get())

        self.netmon_storage = NetmonStorage(self.racktables_storage)
        logger.info("NetmonStorage created in %.3f seconds", timer.get())

        self.hosts_data = list(self._prepare_hosts_data())
        logger.info("Hosts data created in %.3f seconds", timer.get())

        self.cleaned_hosts_data = list(self._classify_netmap())
        self.cleaned_hosts_data.sort(key=operator.attrgetter("fqdn"))

        if not self.cleaned_hosts_data or len(self.cleaned_hosts_data) < MIN_BOT_HOSTS:
            raise RuntimeError("Something wrong with topology, only {} hosts found".format(
                len(self.cleaned_hosts_data)
            ))

        logger.info("Netmap processed in %.3f seconds", timer.get())

    def _detect_switch_by_mac(self, host):
        for hwaddr in host.hwaddrs:
            switch_name = self.netmap_storage.switch_by_mac(hwaddr)
            if switch_name is not None:
                switch_info = self.racktables_storage.find_switch_by_name(switch_name)
                if switch_info is not None:
                    return switch_info.name

    def _prepare_hosts_data(self):
        for host in self.bot_storage:
            walle_host = self.walle_storage.find_host_by_invnum(host.invnum)
            if walle_host is not None:
                # fill fqdn, macs and location from wall-e
                host.set_fqdn(walle_host.fqdn)
                host.hwaddrs = walle_host.hwaddrs
                host.dc = walle_host.dc
                host.queue = walle_host.queue
                host.rack = walle_host.rack
                host.switch = walle_host.switch
                host.walle_project = walle_host.project
                host.walle_tags = walle_host.tags
            else:
                # fill location from bot
                host.dc = self.walle_storage.find_datacenter_name(host.bot_location)
                host.queue = self.walle_storage.find_queue_name(host.bot_location)
                host.rack = self.walle_storage.find_rack_name(host.bot_location)

            host.short_names = self.walle_storage.get_all_names(host.bot_location[:4])

            self._fix_owners(host, walle_host)

            # also fill ip addresses from netmap
            for entry in self.netmap_storage.find_by_fqdn(host.fqdn):
                if entry.network_type == BACKBONE:
                    host.update_from(entry)

                    if host.switch is None:
                        switch_info = self.racktables_storage.find_switch_by_name(entry.port[0])
                        if switch_info is not None:
                            host.switch = switch_info.name

            if host.switch is None:
                # let's try to detect switch by mac
                host.switch = self._detect_switch_by_mac(host)

            pod = self.pod_storage.find_pod_by_switch(host.switch)
            if pod:
                host.pod = pod

            if host.dc is not None and host.queue is not None and host.switch is not None:
                yield host
            else:
                logger.warning("Host %r has no location or switch", host)

    @staticmethod
    def _append_if_needed(l, value):
        if value not in l:
            l.append(value)
            l.sort()

    @staticmethod
    def _remove_fb_as_postfix(entry):
        return re.sub(r"(-fb\d*$|fb\d*-)", "", entry.name) + entry.domain

    @staticmethod
    def _remove_fb_as_domain(entry):
        return entry.name + re.sub(r"^\.fb\.", ".", entry.domain)

    @staticmethod
    def _remove_fb_as_prefix(entry):
        return re.sub(r"^fastbone\.", "", entry.fqdn)

    def _iter_fqdn_by_mac(self, hwaddr):
        # try to find hosts by mac
        possible_fqdns = set()
        bot_host = self.bot_storage.find_host_by_mac(hwaddr)
        if bot_host is not None:
            possible_fqdns.add(bot_host.fqdn)
        walle_host = self.walle_storage.find_host_by_mac(hwaddr)
        if walle_host is not None:
            possible_fqdns.add(walle_host.fqdn)
        possible_fqdns.update(x.fqdn for x in self.netmap_storage.find_by_mac(hwaddr))
        return iter(possible_fqdns)

    def _classify_netmap(self):
        bare_hosts_map = {host.fqdn: host for host in self.hosts_data}
        virtual_hosts_map = {}

        created = 0
        incomplete = 0

        # let's exclude from netmap all addresses that has same fqdn as bare hosts
        for entry in self.netmap_storage:
            bare_host = bare_hosts_map.get(entry.fqdn)
            if bare_host is not None:
                bare_host.netmap_entries.append(entry)
                self.netmap_storage.mark_as_seen(entry)

        # let's find all virtual hosts
        for entry in self.netmap_storage:
            if entry.network_type != BACKBONE:
                # ignore non backbone entries
                continue

            found_host_map = {
                fqdn: bare_hosts_map[fqdn]
                for hwaddr in self.netmap_storage.macs_by_port(entry.switch_name, entry.port)
                for fqdn in self._iter_fqdn_by_mac(hwaddr)
                if fqdn in bare_hosts_map and fqdn != entry.fqdn
            }

            bare_host = None
            if len(found_host_map) > 1:
                bare_host = next(found_host_map.itervalues())
                logger.warning("More than one bare host found for %r: %s", entry, ", ".join(found_host_map))
            elif found_host_map:
                bare_host = next(found_host_map.itervalues())
            else:
                logger.debug("No bare hosts found for %r", entry)

            if bare_host is None:
                # hosts without parents aren't needed in topology
                continue

            # let's create virtual host if needed
            child_host = virtual_hosts_map.get(entry.fqdn)
            if child_host is None:
                child_host = bare_host.create_with_fqdn(entry.fqdn)
                self._fix_owners(child_host)
                virtual_hosts_map[child_host.fqdn] = child_host
                created += 1

            child_host.netmap_entries.append(entry)

            self._append_if_needed(bare_host.children, child_host.fqdn)

            self.netmap_storage.mark_as_seen(entry)

        # try to find some additional interfaces
        cleanup_methods = (self._remove_fb_as_postfix, self._remove_fb_as_domain, self._remove_fb_as_prefix)
        for entry in self.netmap_storage:
            if entry.network_type == BACKBONE:
                # ignore backbone entries
                continue

            # try to find hosts by mac
            possible_fqdn_list = {
                fqdn for fqdn in self._iter_fqdn_by_mac(entry.hwaddr)
                if fqdn != entry.fqdn
            }

            # try to find host by port
            if not possible_fqdn_list:
                for neighbor_entry in self.netmap_storage.find_by_port(entry.switch_name, entry.port):
                    if neighbor_entry.network_type == BACKBONE \
                            and neighbor_entry.fqdn not in possible_fqdn_list:
                        possible_fqdn_list.add(neighbor_entry.fqdn)

            if len(possible_fqdn_list) > 1 and entry.network_type == FASTBONE:
                for method in cleanup_methods:
                    cleaned_fqdn = method(entry)
                    if cleaned_fqdn in possible_fqdn_list:
                        possible_fqdn_list = {cleaned_fqdn}
                        break

            if len(possible_fqdn_list) == 1:
                [fqdn] = possible_fqdn_list
                some_host = bare_hosts_map.get(fqdn, virtual_hosts_map.get(fqdn))
                if some_host is not None:
                    some_host.netmap_entries.append(entry)
                    self.netmap_storage.mark_as_seen(entry)

        # turn addresses into interfaces
        cleaned_hosts_data = []
        for host in itertools.chain(bare_hosts_map.itervalues(), virtual_hosts_map.itervalues()):
            builder = InterfaceBuilder(host)

            for entry in host.netmap_entries:
                builder.parse(entry, entry.switch_name)

            # let's use netmon to fill interfaces
            for hwaddr in host.hwaddrs:
                for entry in self.netmon_storage.find_interfaces_by_hwaddr(hwaddr):
                    # Don't use interfaces in container vlans to update host info,
                    # just add them to the `host.interfaces` list. Netmon-agent
                    # reports fake fqdns for these interfaces, otherwise we
                    # wouldn't consider them valid.
                    builder.parse(entry, host.switch,
                                  update_host=entry.vlan not in (688, 788))

            builder.finish()

            if host.valid():
                cleaned_hosts_data.append(host)

            if not host.valid() or not host.interfaces:
                incomplete += 1
                logger.warning("Host %r has no location or interfaces", host)

        logger.info(
            "created: %d, incomplete: %d, total bare hosts: %d, found bare hosts: %d, virtual hosts: %d",
            created, incomplete, self.bot_storage.host_count(),
            sum(1 for row in cleaned_hosts_data if row.invnum),
            sum(1 for row in cleaned_hosts_data if not row.invnum))

        # check that there are enough hosts with interfaces
        if incomplete > MAX_INCOMPLETE_HOSTS:
            raise RuntimeError("Something wrong with NOC, {} incomplete hosts found".format(
                incomplete
            ))

        return cleaned_hosts_data

    def _fix_owners(self, host, walle_host=None):
        owner_set = set(host.owners)
        if not host.planner_id:
            host.planner_id = self.conductor_storage.find_planner_id_by_fqdn(host.fqdn)
        if host.planner_id:
            abc_users = self.tools_storage.get_users_by_planner_id(host.planner_id)
            # don't add absurdly large groups to prevent topology size from blowing up
            if len(abc_users) < 1000:
                owner_set.update(abc_users)
        if walle_host is not None:
            owner_set.update(walle_host.owners)
        for owner in list(owner_set):
            if owner.startswith("@"):
                owner_set.discard(owner)
                owner_set.update(self.tools_storage.get_users_by_group(owner[1:]))
        host.owners = sorted(owner_set)


def build_topology(use_file_cache=True):
    global USE_CACHE
    USE_CACHE = use_file_cache
    return [host.to_dict() for host in TopologyContext().cleaned_hosts_data]
