#!/usr/bin/env python3

import re
import sys
import json


def get_report_uuid(node):
    uuid = node.get('uuid')
    if uuid is None:
        uuid = node['refers']
    return uuid


ANTIROBOT_REPORT_POSSIBLE_CONTENTS=set(['shared', 'hasher'])

def fix_antirobot_report(node, path):
    report = node.get('report')
    if report is None or get_report_uuid(report) != 'antirobot' or 'stats_eater' in report:
        return

    node.pop('report')
    contents_to_move = ANTIROBOT_REPORT_POSSIBLE_CONTENTS.intersection(report.keys())
    if not contents_to_move or len(contents_to_move) != 1:
        raise Exception()
    c = contents_to_move.pop()
    node[c] = report[c]
    return


def fix_pumpkin_name(s):
    return s.replace('_last', '_w_pumpkin')


def fix_pumpkin_names(node, path):
    if any('internal_slb_' in n for n in path):
        return
    for k in list(node.keys()):
        newk = fix_pumpkin_name(k)
        if newk != k:
            node[newk] = node.pop(k)
            k = newk
        if isinstance(node[k], str):
            node[k] = fix_pumpkin_name(node[k])


UNTRUSTED_NETWORKS = {'130.193.33.248/29', '130.193.63.0/24', '130.193.62.0/26', '199.21.99.0/24', '130.193.38.0/23', '130.193.37.0/24'}

def fix_trusted_networks(node, path):
    if not path or path[-1] != 'trusted_networks':
        return

    node['match_source_ip']['source_mask'] = ','.join(filter(lambda net: net not in UNTRUSTED_NETWORKS, node['match_source_ip']['source_mask'].split(',')))


UPDATED_PATTERNS = {
    "/(padsearch|search/pad)(/)?$": "/(padsearch|search/pad)(/)?",
    "/(yandsearch|search)(/)?$": "/(yandsearch|search)(/)?",
    "/(touchsearch|search/touch|jsonproxy)(/)?$": "/(touchsearch|search/touch)(/)?",
    # seems fine
    "/(suggest|suggest-delete-text|suggest-dict-info|suggest-endings|suggest-experiments|suggest-explain|suggest-fact|suggest-fact-two|suggest-filters|suggest-first-char|suggest-ie|suggest-kinopoiskbro|suggest-mobile|suggest-news-by|suggest-news-com|suggest-news-kz|suggest-news-ru|suggest-news-sources|suggest-news-ua|suggest-opera|suggest-quality-test|suggest-relq-serp|suggest-rich|suggest-search-lib-examples|suggest-sl|suggest-trendy-queries|suggest-internal)(/.*)?|suggest-ff.cgi|suggest-sd.cgi|suggest-ya.cgi": "/(suggest|suggest-delete-text|suggest-dict-info|suggest-endings|suggest-experiments|suggest-explain|suggest-fact|suggest-fact-two|suggest-filters|suggest-first-char|suggest-ie|suggest-kinopoiskbro|suggest-mobile|suggest-news-by|suggest-news-com|suggest-news-kz|suggest-news-ru|suggest-news-sources|suggest-news-ua|suggest-opera|suggest-quality-test|suggest-relq-serp|suggest-rich|suggest-search-lib-examples|suggest-sl|suggest-trendy-queries)(/.*)?|/(suggest-ff.cgi|suggest-sd.cgi|suggest-ya.cgi)",
    # seems fine
    "/(my-cookie\\.js|wsave\\.html|form\\.html|mailinfo\\.js|dropcounters\\.js|all\\.xml|original\\.xml|services\\.xml|hello\\.html|hellom\\.html|hellot\\.html|hellotel\\.html|ok\\.html|index\\.html|index\\.htm|google([a-zA-Z0-9]+)\\.html|application\\.xml|black\\.html|white\\.html|empty\\.html|crossdomain\\.xml|i-social__closer\\.html|login-status\\.html|mda\\.html|mdaxframeproxy\\.html|favicon\\.ico|xframeproxy\\.html|apple-touch-icon\\.png|embeded\\.min\\.js|htc|HTC|mdaiframe\\.html)(.*)?|/|/(wsave|autopattern|all|myclean|screenx|remotes-status|setmy|adddata|wcomplain|route|clean_route|save_route|drop_route|m|d|mdae|gpsave|mgpsave|jcb|gs|bjk|fb|sade|all|themes|skins|rapid|instant|postcard|y|json|data|test|banner|portal|log|black|white|map_router|ua|ru|kz|by|safari|ios7se|wy|inline|jsb|i|dform|chrome|\\.well-known|1tv|matchtv|ntv|5tv|ru24|vesti|rentv|tnt|m24|a2|sovsekretno|izvestia|echotv|probusiness|uspeh|globalstar|tochkatv|hardlife|oceantv|gamanoid|hitv|rutv|topspb|tdk|oirtv|rt|rtdoc|hdmedia|wfc|sibir|ntvpravo|ntvserial|ntvstyle|ctc)(/.*)?": "/(my-cookie\\.js|wsave\\.html|form\\.html|mailinfo\\.js|dropcounters\\.js|all\\.xml|original\\.xml|services\\.xml|hello\\.html|hellom\\.html|hellot\\.html|hellotel\\.html|ok\\.html|index\\.html|index\\.htm|google([a-zA-Z0-9]+)\\.html|application\\.xml|black\\.html|white\\.html|empty\\.html|crossdomain\\.xml|i-social__closer\\.html|login-status\\.html|mda\\.html|mdaxframeproxy\\.html|xframeproxy\\.html|apple-touch-icon\\.png|embeded\\.min\\.js|htc|HTC|mdaiframe\\.html)(.*)?|/|/(wsave|autopattern|all|myclean|screenx|remotes-status|setmy|adddata|wcomplain|route|clean_route|save_route|drop_route|m|d|mdae|gpsave|mgpsave|jcb|gs|bjk|fb|sade|all|themes|skins|rapid|instant|postcard|y|json|data|test|banner|portal|log|black|white|map_router|ua|ru|kz|by|safari|ios7se|inline|jsb|i|dform|chrome|\\.well-known|1tv|matchtv|ntv|5tv|ru24|vesti|rentv|tnt|m24|a2|sovsekretno|izvestia|echotv|probusiness|uspeh|globalstar|tochkatv|hardlife|oceantv|gamanoid|hitv|rutv|topspb|tdk|oirtv|rt|rtdoc|hdmedia|wfc|sibir|ntvpravo|ntvserial|ntvstyle|ctc)(/.*)?",
    # just kidding, i have carefully compared these off-screen
}

def fix_patterns(node, _):
    if 'pattern' in node:
        update = UPDATED_PATTERNS.get(node['pattern'])
        if update is not None:
            node['pattern'] = update


def make_two_weights_file_slashes(node, _):
    if 'weights_file' in node:
        node['weights_file'] = re.sub(r'^(./controls/)([^/].*)$', r'\1/\2', node['weights_file'])


def unshare_trusted_networks_suggest_images_hasher(node, path):
    if 'trusted_networks' not in path or len(path) < 3 or path[-3] != 'suggest-images':
        return

    shared = node.pop('shared', None)
    if shared is not None:
        node.update(shared)


def traverse(node, f, path=None):
    if path is None:
        path = []
    if isinstance(node, dict):
        f(node, path)
        for k, v in node.items():
            traverse(v, f, path + [k])
    if isinstance(node, list):
        for x in node:
            traverse(x, f)


for fname in sys.argv[1:]:
    with open(fname) as f:
        j = json.load(f)
    def fix(node, path):
        fix_antirobot_report(node, path)
        fix_pumpkin_names(node, path)
        fix_trusted_networks(node, path)
        fix_patterns(node, path)
        make_two_weights_file_slashes(node, path)
        unshare_trusted_networks_suggest_images_hasher(node, path)
    traverse(j, fix)
    with open(fname, 'w') as f:
        json.dump(j, f, sort_keys=True, indent=2)

