#!/usr/bin/env python3
"""
This script allows the importing of an event with best effort group matching.
"""

import argparse

import gzip
import json
from tempfile import NamedTemporaryFile
from pathlib import Path
from json.decoder import JSONDecodeError

import build


class BlueprintImportError(Exception):
    pass


def get_blueprint_schemas():
    schema_url = 'https://ssr-static-assets.di.xarth.tv/prod-schema-configs.json.gz'
    session = build.create_midway_auth_session()
    try:
        return json.loads(gzip.decompress(session.get(schema_url).content))
    except OSError as err:
        print(f'blueprint schema file invalid, are you sure you\'ve run \'mwinit --aea\'?\n{err}')
        return None
    except JSONDecodeError as err:
        print(f'JSONDecode error, please reach out to #spade-schema-registry for help\n{err}')
        return None
    except Exception as err:
        print(f'Unknown error, please reach out to #spade-schema-registry for help\n{err}')
        return None


def prompt(msg):
    return input(msg + ' (y/N): ').lower() in ('y', 'yes')


_deprecated = build.get_deprecated()
_types = build.list_types()
_transforms = build.list_transforms()
t_to_type = {
    'f@timestamp@unix':     {'name': 'timestamp', 'timezone': 'America/Los_Angeles'},
    'f@timestamp@unix-utc': {'name': 'timestamp', 'timezone': 'UTC'},
    'bigint': {'name': 'long'},
    'bool': {'name': 'bool'},
    'float': {'name': 'float'},
}


def parse_length_from_col_options(options):
    # The field is (<number>) with potential distkey or sortkey keyword
    return int(options.rsplit(')', 1)[0].split('(', 1)[1])


def non_source_equal(a, b):
    """Compare two objects without source"""
    if a == b:
        return True
    if type(a) == dict and type(a) == type(b):
        return {k: v for k, v in a.items() if k != 'source'} == \
            {k: v for k, v in b.items() if k != 'source'}
    return False


def check_field(col, field):
    # Compare the existing field against a constructed one.
    name = col['OutboundName']
    new_field = make_field(col)
    if not new_field:  # make_field returns an empty object if a field should not be made
        return {}
    checked_keys = ['type', 'transform', 'internal', 'sensitivity']
    for key in checked_keys:
        if not non_source_equal(field.get(key), new_field.get(key)):
            # Suppress warnings on internal settings on existing fields
            if key == 'internal' and field.get(key):
                continue
            print(f'Field {name} uses different value for {key}: ' +
                  f'{field.get(key)} != {new_field.get(key)}' +
                  ' You need to accept the original settings or manually change them. You will ' +
                  'need to work with Data Platform via support ticket to accomplish this.')
    return field


def make_field(col):
    name = col['OutboundName']
    if name in _deprecated['fields']:
        print(f'Field {name} is deprecated and will not be imported. If this is a problem ' +
              'talk with Data Platform about it.')
        return {}
    transform = col['Transformer']
    new_field = {'description': 'Fill this in'}
    new_field['sensitivity'] = col.get('SensitivityType', 'none').lower()
    if new_field['sensitivity'] == '':
        new_field['sensitivity'] = 'none'
    if transform in t_to_type:
        new_field.update({'type': {k: v for k, v in t_to_type[transform].items()}})
    elif transform in _transforms:
        new_field.update({'transform': {'name': transform, 'source': col['InboundName']}})
    elif transform == 'varchar':
        length = parse_length_from_col_options(col['ColumnCreationOptions'])
        new_field.update({'type': {'name': 'string', 'length': length}})
    elif transform in _deprecated['transforms'] or transform in _deprecated['types']:
        if transform in ('int', 'userIDWithMapping'):
            print(f'Field {name} uses deprecated int transform. You will need to work with Data ' +
                  'Platform via a support ticket to migrate it prior to landing this change.')
            col['Transformer'] = 'bigint'
            return make_field(col)
        elif transform in ('ipAsn'):
            print(f'Field {name} uses deprecated {transform} transform and will not be imported. ' +
                  'If this is a problem talk with Data Platform about it.')
            return {}
    else:
        raise BlueprintImportError(f'Unknown error creating field {name}. Please report this bug.')
    return new_field


def check_and_create_fields(columns, fields):
    """Checks fields against columns and maybe makes new fields."""
    used = {}
    new = {}
    for col in columns:
        name = col['OutboundName']
        # deprecated fields are returned as an empty dictionary
        if name in fields:
            field = check_field(col, fields[name])
        else:
            field = make_field(col)
            if field:
                new[name] = field
        if field:
            used[name] = field
    return set(used.keys()), new


def find_groups(fields, groups):
    """Attempts to find reasonable groups to recommend for the event."""
    # remove common fields
    print('\nAttempting to find group matches for you.  Feel free to decline all of these.')
    fields -= set(groups['all_events_common_fields']['fields'])
    del groups['all_events_common_fields']
    total = fields
    new_groups = []
    fields_per_group = {k: build.expand_group(v, groups) for k, v in groups.items()}
    while True:
        match_counts = {k: len(set(v) & fields) for k, v in fields_per_group.items()}
        if match_counts == {}:
            break
        # try the groups with the most hits first
        hit = sorted(match_counts.items(), key=lambda x: x[1], reverse=True)[0]
        if hit[1] == 0:
            break
        group_fields = set(groups[hit[0]]['fields'])
        new_fields = group_fields - fields
        existing = group_fields & fields
        if prompt(f'Do you want to use {hit[0]}? It uses {existing} and adds {new_fields}'):
            fields = fields - existing
            total |= new_fields
            new_groups.append(hit[0])
        del fields_per_group[hit[0]]
    return sorted(new_groups), sorted(fields)


def get_source_overrides(columns, existing, created):
    overrides = []
    for col in columns:
        name = col['OutboundName']
        source = col['InboundName']
        # deprecated column being dropped
        if name not in existing and name not in created:
            continue
        if name != source:
            # newly created transforms will use the existing source
            if created.get(name, {}).get('transform') is not None:
                continue
            # existing types/transforms with same source don't need override
            if name in existing:
                existing_t = existing[name].get('transform', existing[name].get('type', {}))
                if source == existing_t.get('source'):
                    continue
            overrides.append({'name': name, 'source': source})
    return overrides


def generate_event(event, owner):
    """Given an event name, download blueprint definition and generate the necessary files"""
    print(f'This script is attempting to import {event} for {owner}.  It will just make files ' +
          'locally which you will use as a seed for fully defining your event.  In many cases you' +
          ' will be importing existing fields, so please ensure their definitions meet your needs.')
    print('Once the import is finished, you will need to edit any new files to include useful ' +
          'descriptions and other pertinent information.  If you hit issues or need help, please ' +
          'ask in #spade-schema-registry.\n')
    existing_events = build.objects_by_name(build.get_events().values())
    if event in existing_events:
        raise BlueprintImportError(f'{event} already imported')
    maybe_def = [x for x in get_blueprint_schemas() if x['EventName'] == event]
    if len(maybe_def) != 1:
        raise BlueprintImportError(f'{event} not found in blueprint')
    bp_def = maybe_def[0]
    existing_fields = build.objects_by_name(build.get_fields().values())
    fields, fields_to_create = check_and_create_fields(bp_def['Columns'], existing_fields)
    existing_groups = build.objects_by_name(build.get_groups().values())
    groups, fields = find_groups(fields, existing_groups)
    # now output the objects to get created
    for key, value in fields_to_create.items():
        build.write_object('fields', {key: value})
    obj = {event: {'fields': fields, 'groups': groups, 'description': 'Fill this in'}}
    overrides = get_source_overrides(bp_def['Columns'], existing_fields, fields_to_create)
    if overrides:
        obj[event]['overrides'] = overrides
    build.write_object('events', obj)
    build.update_codeowners(owner, [event], sorted(fields_to_create.keys()), [])


def main():
    parser = argparse.ArgumentParser(description='Import a Blueprint schema.')
    parser.add_argument('event', metavar='EVENT', type=str, help='the event to import')
    parser.add_argument('owner', metavar='OWNER', type=str, help='the owner group for the event')
    args = parser.parse_args()
    try:
        generate_event(args.event, args.owner)
    except BlueprintImportError as e:
        print(f'Import failed: {e}')


if __name__ == '__main__':
    main()
