# coding: utf-8
from __future__ import unicode_literals, absolute_import, division, print_function

import re

from six import string_types


class TagBuilder(object):
    """
    Build tag for ammo
    """

    PART_SUBSTITUTION = '_'
    WELLKNOWN_LANGUAGES = ['en', 'ru', 'uk', 'be', 'tt', 'kk', 'tr', 'uz']

    @classmethod
    def not_method_first_part(cls, part):
        """
        For cases like languages ('ru', 'uk') and versions of interface (v1, v2, v3, ...)
        Other cases of 1-2 symbols likely aren't method name too
        """
        return len(part) <= 2

    @classmethod
    def erase_part_by_neighbors(cls, parts, prefix, suffix):
        try:
            prefix_index = parts.index(prefix)
        except ValueError:
            return  # not found
        if len(parts) > prefix_index + 2 and parts[prefix_index + 2] == suffix:
            parts[prefix_index + 1] = TagBuilder.PART_SUBSTITUTION

    @classmethod
    def erase_leading_language(cls, parts):
        first_index = 0
        if not parts[0]:
            first_index = 1
        if first_index >= len(parts):
            return
        if parts[first_index] in TagBuilder.WELLKNOWN_LANGUAGES:
            parts[first_index] = TagBuilder.PART_SUBSTITUTION

    def __init__(self):
        self.part_filters = [
            lambda part: part.find('--') >= 0,  # directions like omsk--moscow
            r'.*\d{3,}',  # at least 3 numbers like some thread uids
            r'^\d+',  # started with number including only numbers (like some thread uids, different pure ids and codes)
            r'.*%',  # any with %
            r'^[sc]\d+$',  # city/station identity like c5, s1234
        ]
        self.compile_filter_regexp()

        self.common_handlers = [
            # morda_backend methods with slugs
            lambda parts: TagBuilder.erase_part_by_neighbors(parts, 'settlement', 'stations'),
            lambda parts: TagBuilder.erase_part_by_neighbors(parts, 'settlement', 'transport-popular-directions'),
            TagBuilder.erase_leading_language,
        ]

    def compile_filter_regexp(self):
        for i, filter in enumerate(self.part_filters):
            if isinstance(filter, string_types):
                self.part_filters[i] = lambda part, regexp=re.compile(self.part_filters[i]): regexp.match(part)

    def apply_filters(self, parts):
        # determine start index of parts to filter (to skip: /v3/begin, /begin, /ru/begin, it's not identifiers anyway)
        method_begin = 0
        if not parts[method_begin]:
            method_begin += 1  # skip first empty part
        if len(parts) > method_begin and TagBuilder.not_method_first_part(parts[method_begin]):
            method_begin += 1  # skip first part which is not a begin of the method name
        method_begin += 1  # skip first word of the method name

        for i in range(method_begin, len(parts)):
            for part_filter in self.part_filters:
                if part_filter(parts[i]):
                    parts[i] = TagBuilder.PART_SUBSTITUTION
                    break

        for handler in self.common_handlers:
            handler(parts)

    def build_by_yt_log(self, line):
        end = line.find('?')
        if end < 0:
            raw = line
        else:
            raw = line[:end]
        parts = raw.split('/')
        self.apply_filters(parts)
        if not parts[-1]:
            parts.pop()
        return '/'.join(parts)
