# -*- coding: utf-8 -*-
import re
from collections import defaultdict

import six


nameRe = re.compile('[^\W\d_]+')  # regexp to fetch shard name
idRe = re.compile('[\d]+')  # regexp to fetch shard id


def splitHostName(host):
    """
    >>> splitHostName('ws1-111.yandex.ru')
    ('ws1-', '111', '.yandex.ru')
    >>> splitHostName('www.yandex.ru')
    ('www', -1, '.yandex.ru')
    """
    domain = ''
    fullHostName = host
    dotPos = fullHostName.find('.')
    if not dotPos == -1:
        domain = fullHostName[dotPos:]
        host = fullHostName[:dotPos]
    prefix = ''
    number = ''
    i = len(host)
    while i > 0 and host[i - 1].isdigit():
        i -= 1
    if i != len(host):
        prefix = host[:i]
        number = host[i:]
    if not prefix:
        prefix = host
        number = -1
    return prefix, number, domain


def splitHostName2(host):
    """
    >>> splitHostName2('ws1-111.yandex.ru')
    ('ws1-', '111', '', '.yandex.ru')
    >>> splitHostName2('www.yandex.ru')
    ('www', -1, '', '.yandex.ru')
    >>> splitHostName2('distbuild02dev.search.yandex.net')
    ('distbuild', '02', 'dev', 'search.yandex.net')
    """
    domain = ''
    postfix = ''
    number = -1

    fullHostName = host
    dotPos = fullHostName.find('.')
    if not dotPos == -1:
        domain = fullHostName[dotPos:]
        host = fullHostName[:dotPos]

    i = len(host)
    digitFound = False
    while i > 0:
        if host[i - 1].isdigit():
            if not digitFound:
                # handle postfix
                postfix = host[i:]
                digitFound = True
            # collect number
            i -= 1
        elif not digitFound:
            # collect postfix
            i -= 1
        else:
            break

    if digitFound:
        prefix = host[:i]
        number = host[i:-len(postfix)] if postfix else host[i:]
    else:
        prefix = host

    return prefix, number, postfix, domain


def formatHosts(hosts, useGrouping=True, addDomain=None, yrFormat=True, separator=' '):
    """
    Форматирование хостов в YR-формате.

    Возвращает переданный список хостов в виде строки.

    :param iterable hosts:   имена машин для форматирования
    :param bool useGrouping: надо ли группировать хосты шелл-совместимым образом (например, "ws1-{001..123}")
    :param bool addDomain:   надо ли выводить полное доменное имя. Если флаг выключен, домены
                             .yandex.ru и .search.yandex.net будут отрезаться.
    :param bool yrFormat: надо ли использовать YR-нотацию в имени хостов
    :param str separator: какой использовать разделитель между хостами (по умолчанию пробел)
    :rtype: str
    :return: строка в YR-формате
    """
    if not useGrouping:
        return separator.join('%s%s' % ('+' if yrFormat else '', h) for h in sorted(hosts))
    defaultDomains = ['.yandex.ru', '.search.yandex.net'] if addDomain is None else []

    results = []
    groups = {}
    named_hosts = []

    for host in hosts:
        if not host:
            continue
        prefix, number, postfix, domain = splitHostName2(host)
        if number == -1:
            named_hosts.append((prefix, domain))
        else:
            groups.setdefault((prefix, postfix, domain), []).append((int(number), number))

    for (prefix, postfix, domain), suffixes in groups.items():
        suffixes.sort()
        suffixes.append((99999999999999, 0))
        start = 0
        singles = []
        for i in six.moves.xrange(1, len(suffixes)):
            if suffixes[i - 1][0] + 1 != suffixes[i][0]:
                if i - start > 3:
                    if singles:
                        if len(singles) > 1:
                            results.append('%s%s{%s}%s%s' % (
                                '+' if yrFormat else '',
                                prefix, ','.join(singles), postfix, (domain if domain not in defaultDomains else '')
                            ))
                        else:
                            results.append('%s%s%s%s%s' % (
                                '+' if yrFormat else '',
                                prefix, singles[0], postfix, (domain if domain not in defaultDomains else ''))
                            )
                        del singles[:]
                    results.append('%s%s{%s..%s}%s%s' % (
                        '+' if yrFormat else '',
                        prefix, suffixes[start][1],
                        suffixes[i - 1][1],
                        postfix,
                        (domain if domain not in defaultDomains else '')
                    ))
                else:
                    singles.extend((suffixes[j][1] for j in six.moves.xrange(start, i)))
                start = i
        if singles:
            if len(singles) > 1:
                results.append('%s%s{%s}%s%s' % (
                    '+' if yrFormat else '',
                    prefix, ','.join(singles), postfix, (domain if domain not in defaultDomains else ''))
                )
            else:
                results.append('%s%s%s%s%s' % (
                    '+' if yrFormat else '',
                    prefix, singles[0], postfix, (domain if domain not in defaultDomains else '')))
    results.sort()
    for (prefix, domain) in named_hosts:
        results.append('%s%s%s' % (
            '+' if yrFormat else '',
            prefix, (domain if domain not in defaultDomains else '')))
    return separator.join(results)


def formatShards(shards):

    def formatNumbers(numbers):
        numbers = sorted(numbers)
        if not numbers:
            return
        strings = []
        prev_number = numbers[0]
        numbers = numbers[1:]
        begin = prev_number
        # end = prev_number

        def getSlice(begin, end):
            if begin == end:
                return '{begin:03}'.format(begin=begin)
            else:
                return '{{{begin:03}..{end:03}}}'.format(begin=begin, end=end)
        for number in numbers:
            if number != prev_number + 1:
                strings.append(getSlice(begin, prev_number))
                begin = number
                # end = number
            prev_number = number
        strings.append(getSlice(begin, prev_number))
        return strings

    try:
        # a dict which holds dict which holds a set
        formatted = defaultdict(lambda: defaultdict(set))
        other = set()

        for shard in shards:
            # e.g shard can be 'diversity-001-1311783151'
            try:
                if shard.count('-') != 2:
                    raise Exception('unsupported shard format')
                pos = shard.rfind('-')
                if pos == -1:
                    raise Exception
                stamp = shard[pos+1:]
                name = nameRe.search(shard).group()
                id = idRe.search(shard[len(name):-(len(stamp)+1)]).group()
                number = shard[len(name)+len(id)+1:-len(stamp)-1]
                if not number:
                    number = id
                    id = ''
                names = formatted[stamp]
                numbers = names[name + id]
                numbers.add(int(number))
            except:
                other.add(shard)

        strings = set()
        if formatted:
            for stamp, names in formatted.items():
                for name, numbers in names.iteritems():
                    numbers = formatNumbers(numbers)
                    for number in numbers:
                        strings.add('-'.join((name, number, stamp)))
        if other:
            strings.update(other)

        return ' '.join(sorted(strings))
    except:
        return ' '.join(sorted(shards))
