import json
from datetime import datetime


infile = 'teaser_service_exp.20160510-1952.txt'
outfile = 'teaser_service_exp.20160510-1952.json'
host = 'bannerteaser'
textauthor = 'chikachoff'
fml_weight = 100
bandits_weight = 1
cnt = 999990000
HEADER_STRING = 'domain	lang	geo	from	till	title	text	color	t_color	button	url	bk	bk_tag	bk_require_flag	counter	android	iphone	service	time_from	time_till	week_day	weight	exp	delete'

app_map = {
    "taxi": "'#apps,not_match,3'",
    "browser": "'#apps,not_match,5'&'#apps,not_match,6'&'#apps,not_match,7'&'#apps,not_match,8'",
    "music": "'#apps,not_match,21'&'#apps,not_match,21'",
    "radio": "'#apps,not_match,21'&'#apps,not_match,21'",
    "appsearch": "'#apps,not_match,40'&'#apps,not_match,62'",
    "transport": "'#apps,not_match,28'",
    "launcher": "",
    "keyboard": "",
    "market": "'#apps,not_match,16'&'#apps,not_match,17'&'#apps,not_match,18'",
    "autoru": "'#apps,not_match,43'",
    "avia": ""
}

user_agent_appsearch_map = {
    "iphone": "'#ua,ic_not_match,%android%'&'#ua,ic_not_match,%yaapp_ios%'&'#ua,ic_not_match,%yabrowser%'&'#ua,ic_not_match,%ios 1.%'&'#ua,ic_not_match,%ios 2.%'&'#ua,ic_not_match,%ios 3.%'&'#ua,ic_not_match,%ios 4.%'&'#ua,ic_not_match,%ios 5.%'&'#ua,ic_not_match,%ios 6.%'&'#ua,ic_not_match,%ios 7.%'",
    "android": "'#ua,ic_not_match,%yandex search plugin%'&'#ua,ic_not_match,%yandexsearch%'&'#ua,ic_not_match,%yabrowser%'&'#ua,ic_not_match,%android 1.%'&'#ua,ic_not_match,%android 2.%'&'#ua,ic_not_match,%android 3.%'"
}

user_agent_browser_map = {
    "iphone": "'#ua,ic_not_match,%android%'&'#ua,ic_not_match,%yabrowser%'&'#ua,ic_not_match,%yaapp_ios%'&'#ua,ic_not_match,%ios 6.%'&'#ua,ic_not_match,%ios 5.%'&'#ua,ic_not_match,%ios 4.%'&'#ua,ic_not_match,%ios 3.%'&'#ua,ic_not_match,%ios 2.%'&'#ua,ic_not_match,%ios 1.%'",
    "android": "'#ua,ic_not_match,%yabrowser%'&'#ua,ic_not_match,%yandex search plugin%'&'#ua,ic_not_match,%yandexsearch%'&'#ua,ic_not_match,%android 1.%'&'#ua,ic_not_match,%android 2.%'&'#ua,ic_not_match,%android 3.%'&'#ua,ic_not_match,%android 4.0%'"
}

filter_template = "'#lang,eq,{lang}'&'#tld,eq,{tld}'&'#ua,ic_not_match,%windows%'"

collection = []
with open(infile) as fp:
    data = fp.readlines()
header = HEADER_STRING.strip('\r\n').split('\t')

link_extensions = {"_link_extensions": {
                "link": {
                    "atom_bannerid": "$banner_id",
                    "atom_reqid": "reqid",
                    "host": "$host",
                    "yandexuid": "uid"}}}

for line in data[1:]:
    collection_element = {}
    line = line.strip('\r\n').split('\t')
    aux = dict(zip(header, line))
    if aux['till']:
        if datetime.strptime(aux['till'], '%Y-%m-%d') < datetime.today():
            continue

    internal_url = '/'.join([host, str(cnt)])
    snippet = aux['text']
    title = aux['title']
    product = aux['service']
    if product == 'yabrowser':
        product = 'browser'
    url = '/'.join([host, product])
    lang = aux['lang']
    tld = aux['domain']

    if 'appmetrika.yandex' in aux['url']:
        aux.update(link_extensions)
    aux['banner_id'] = cnt

    # Filters
    collection_element['filter'] = filter_template.format(lang=lang,
                                                          tld=tld)
    if app_map[product]:
        collection_element['filter'] = '&'.join([collection_element['filter'],
                                                 app_map[product]])
    if aux['iphone'] == '1':
        collection_element['filter'] = '&'.join([
                                        "'#ua,match,%iphone%'",
                                        collection_element['filter']])
    if aux['android'] == '1':
        collection_element['filter'] = '&'.join([
                                        "'#ua,match,%android%'",
                                        collection_element['filter']])
    if product == 'appsearch':
        if aux['iphone'] == '1':
            collection_element['filter'] = '&'.join([
                                        collection_element['filter'],
                                        user_agent_appsearch_map['iphone']
            ])
        elif aux['android'] == '1':
            collection_element['filter'] = '&'.join([
                                        collection_element['filter'],
                                        user_agent_appsearch_map['android']
            ])
    if product == 'browser':
        if aux['iphone'] == '1':
            collection_element['filter'] = '&'.join([
                                        collection_element['filter'],
                                        user_agent_browser_map['iphone']
            ])
        elif aux['android'] == '1':
            collection_element['filter'] = '&'.join([
                                        collection_element['filter'],
                                        user_agent_browser_map['android']
            ])
    # End filters

    if ' ' in aux['color']:
        colors = [color.rstrip(',') for color in aux['color'].split(' ') if color]
        aux['color'] = ','.join(colors)
    collection_element['__product'] = product
    collection_element['__textauthor'] = textauthor
    collection_element['bandits-weight'] = bandits_weight
    collection_element['fml-weight'] = fml_weight
    collection_element['internal-url'] = internal_url
    collection_element['snippet'] = snippet
    collection_element['title'] = title
    collection_element['url'] = url
    collection_element['grouping-key'] = product
    collection_element['aux-data'] = aux

    collection.append(collection_element)
    cnt += 1

structure = json.dumps(collection,
                       indent=4,
                       encoding='utf-8',
                       ensure_ascii=False)

with open(outfile, 'w') as fp:
    fp.write(structure)
