#!/usr/bin/python
#coding: utf-8
from __future__ import unicode_literals
import argparse
from pprint import pprint
import re


TERM_RE = re.compile("^'#(uat:.*|ys|yp|ycookie|virus|ua|uat|ts|tld|referer|products|lr|lang|feat:\d{1,3}|clid_type|clid|buckets|apps|req:.*),(lt|gt|eq|match|ic_match|not_eq|not_match|ic_not_match),.*'$")

GEO_API = 'https://geoadmin.yandex-team.ru/_search.xml'

PARAMS = {
    'ys': 'ys-кука в json',
    'yp': 'yp-кука в json',
    'ycookie': 'y-кука в виде json',
    'virus': 'флаг virus из bigb',
    'ua': 'user-agent',
    'uat': 'ua-traits',
    'ts': 'текущий таймстемп',
    'tld': 'top level domain',
    'referer': 'где показать',
    'products': 'продукты у юзера',
    'lr': 'регион пользователя',
    'lang' : 'язык',
    'feat': 'фактор',
    'feat:499': 'фактор:YandexuidAge 1 / (1 + yandexuid-age-in-days)',
    'feat:564': 'фактор:UserLastDistrClose нормированная разность Now() - ts последнего закрытия дистрибуции',
    'feat:461': 'фактор:UserDistrElementShows PERSONAL-385',
    'feat:559': 'фактор:CryptaAdhocHasBrowser 217 вероятностные Adhoc-и; 319:0 LAL: браузер (пользователь похож на тех, кто скачал браузер)',
    'feat:524': 'фактор:RelevFeatGc PERSONAL-664',
    'feat:479': 'фактор:RelevFeatIsorg PERSONAL-385',
    'feat:525': 'фактор:RelevFeatIl PERSONAL-664',
    'clid_type': 'тип клида',
    'clid': 'клид',
    'buckets': 'user_buckets',
    'apps': 'приложения из bigb',
    'req': 'cgi параметр',
    'req:aspect_ratio': 'cgi параметр:aspect_ratio',
    'req:app_platform': 'cgi параметр:app_platform',
    'req:lang': 'cgi параметр:lang',
    'req:api_level': 'cgi параметр:api_level',
}
OPERATIONS = {
    'lt': 'меньше',
    'gt': 'больше',
    'eq': 'число равно',
    'match': 'совпадает',
    'ic_match': 'совпадает без учета регистра',
    'not_eq': 'не равно',
    'not_match': 'не совпадает',
    'ic_not_match': 'не совпадает без учета регистра',
}

TAB = "    "

class ClothingBracketsNotFound(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

class WrongTerm(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

def find_in_brackets(s):
    bc = 0
    for nc, cc in enumerate(s):
        if cc == "(":
            bc += 1
        if cc == ")":
            bc -= 1
        if bc == 0:
            end = nc
            return s[1:end]
    raise ClothingBracketsNotFound(s)

def parse_child(s, validate=False):
    if validate:
        validate_term(s)
    return s.strip("'").strip("(").strip(")").split(',')

def pack_obj(s, parsed, sign='&', has_child=False):
    return {
        'src': s,
        'sign': sign,
        'parsed': parsed,
        'has_child': has_child
    }

def validate_term(term):
    if not TERM_RE.match(term):
        raise WrongTerm(term)

def pack(f):
    """Return packed filter from parsed string
        Args:
        f (list): list with terms
    """
    return "".join([
        "%s(%s)" % (t['sign'], pack(t['parsed'])) if t['has_child']
        else "%s'%s'" % (t['sign'], ",".join(t['parsed'])) for t in f])

def show(f, tab=0, translate=False):
    """Return pretty view of parsed object
        Args:
        f (list): list with terms
        taranlse(bool): if True tranlate filter to russian
    """
    s = ""
    for t in f:
        if t['has_child']:
            tmp = ""
            if t['sign']:
                tmp += "%s%s\n" %  (tab * TAB, t['sign'])
            tmp += "%s(\n%s%s)\n" % (tab * TAB, show(t['parsed'], tab + 1, translate), tab * TAB)
            s += tmp
        else:
            tmp = ""
            if t['sign']:
                tmp += "%s%s\n" %  (tab * TAB, t['sign'])
            if translate:
                for k, v in PARAMS.iteritems():
                    if '#' + k in t['parsed'][0]:
                        t['parsed'][0] = t['parsed'][0].replace('#' + k, v)
                for k, v in OPERATIONS.iteritems():
                    if k in t['parsed'][1]:
                        t['parsed'][1] = t['parsed'][1].replace(k, v)
                term = TAB.join(t['parsed'])
            else:
                term = ",".join(t['parsed'])
            s += "%s%s'%s'\n" % (tmp, tab * TAB, term)
    return s

def parse(s, validate=False):
    """Return parsed filter string
    Args:
        s (str): string with filter expression.

    Returns:
        List of dicts. Each dict is term:
        {
            'src': string, source of term,
            'sign': sign before term, could be '' (if it is first term in expression or sub expression), & or | ,
            'parsed': parsed, recursive list of dicts or parsed simpe expression,
            'has_child': True if term is simple False if has childs
        }

        For example:
        [{'has_child': False,
          'parsed': ['#lang', 'eq', 'ru'],
          'sign': '',
          'src': "'#lang,eq,ru'"},
         {'has_child': True,
          'parsed': [{'has_child': False,
                      'parsed': ['#referer', 'match', 'google.com'],
                      'sign': '',
                      'src': "'#referer,match,google.com'"},
                     {'has_child': False,
                      'parsed': ['#referer', 'match', 'ru.yandex.mail%'],
                      'sign': '|',
                      'src': "'#referer,match,ru.yandex.mail%'"}],
          'sign': '&',
          'src': "('#referer,match,google.com')|('#referer,match,ru.yandex.mail%')"}]
    """
    out = []
    sign = ''
    i = 0
    term = ''
    while i < len(s):
        if s[i] == '(':
            inner = find_in_brackets(s[i:])
            if '&' in inner or '|' in inner:
                out.append(pack_obj(inner, parse(inner), sign, True))
            else:
                out.append(pack_obj(inner, parse_child(inner), sign))
            i += len(inner) + 1
        elif s[i] == '&' or s[i] == '|':
            if term:
                out.append(pack_obj(term, parse_child(term), sign))
            term = ''
            sign = s[i]
        else:
            term += s[i]
        i += 1
    if term:
        out.append(pack_obj(term, parse_child(term), sign))
    return out

if __name__ == '__main__':
    #print generate_default_filters(PRODUCTS, clients=['smart_banner'], products=['launcher', 'browser', 'search'])
    f = "(('#uat:DeviceName,eq,iPad'&'#apps,not_match,8'&'#products,not_match,ru.yandex.mobile.search.ipad')|('#uat:OSFamily,eq,Android'&'#apps,not_match,5'&'#apps,not_match,6'&'#products,not_match,com.yandex.browser')|('#uat:OSFamily,eq,iOS'&'#apps,not_match,7'&'#products,not_match,ru.yandex.mobile.search'))"
    parser = argparse.ArgumentParser()
    parser.add_argument('filterstring', default='', nargs='?')
    args = parser.parse_args()
    if not args.filterstring:
        args.filterstring = f
    else:
        args.filterstring = args.filterstring.decode('utf8')
    k = parse(args.filterstring)
    pprint(k)
    print '\n', args.filterstring, '\n'
    print pack(k)
    print '\n', show(k, translate=True)

