#!/usr/bin/env python
"""
This is a script for publishing events to Blueprint
"""


import subprocess
import json
import time
import sys
import argparse
import gzip
import logging

import boto3
from botocore.exceptions import ClientError

import build


logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')


CHANGE_WAIT_SECONDS = 300


def get_sensitivity_in_bp_format(sensitivity):
    """Convert sensitivity type from SSR format to Blueprint format"""
    if not sensitivity:
        return None

    sensitivity_info = {
        'userid':    'userID',
        'otherid':   'otherID',
        'sessionid': 'sessionID',
        'ip':        'ip',
        'none':      None
    }
    return sensitivity_info[sensitivity]


def convert_field_to_desired_state(field_name, field_info):
    """Convert a field to its desired state in the format expected by Blueprint"""
    inbound_name = field_name

    if 'type' in field_info:
        if 'source' in field_info['type']:
            inbound_name = field_info['type']['source']

    transformer = None
    column_creation_options = ''
    sensitivity = field_info.get('sensitivity')

    if 'transform' in field_info:
        transformer = field_info['transform']['name']
        inbound_name = field_info['transform']['source']

    elif field_info['type']['name'] == 'string':
        transformer = 'varchar'
        column_creation_options = f'({field_info["type"]["length"]})'

    elif field_info['type']['name'] == 'enum':
        transformer = 'varchar'
        # for enums set length to length of longest possible value
        max_possible_length = max([len(value) for value in field_info['type']['values']])
        column_creation_options= f'({max_possible_length})'

    elif field_info['type']['name'] == 'float':
        transformer = 'float'

    elif field_info['type']['name'] == 'long':
        transformer = 'bigint'

    elif field_info['type']['name'] == 'timestamp':
        transformer = 'f@timestamp@unix'
        column_creation_options = ''
        if field_info['type']['timezone'] == 'UTC':
            transformer = 'f@timestamp@unix-utc'

    elif field_info['type']['name'] == 'bool':
        transformer = 'bool'

    desired_state = {
            'inboundName': inbound_name,
            'outboundName': field_name,
            'transformer': transformer,
            'columnCreationOptions': column_creation_options,
            'sensitivityType': get_sensitivity_in_bp_format(sensitivity),
    }
    return desired_state


def get_event_owner(event_name):
    """Get owner for event from the CODEOWNERS file"""
    with open(build.project_path('.github') + '/CODEOWNERS') as fp:
        lines = [x for x in fp]
    lines = build.parse_lines_between(lines, '# Event ownership.', '# End Event')
    for x in lines:
        if x.startswith('/events/'):
            event_path, owner = x.strip().split()
            if event_path.endswith(f'{event_name}.yaml'):
                return owner
    return ''


def convert_event_to_desired_state(event_name, events, groups, fields):
    """Convert an event to its desired state in the format expected by Blueprint"""
    event_fields = events[event_name].get('fields')
    event_groups = events[event_name].get('groups')
    event_desired_state = {'eventName': event_name, 'columns': [], 'owner': get_event_owner(event_name)}
    all_fields = build.expand_event_fields(event_groups, event_fields, groups, fields)
    del all_fields['date']  # remove date because it is special, see more in fields/date.yaml
    all_fields = build.apply_overrides(events[event_name].get('overrides', []), all_fields)
    for name, info in sorted(all_fields.items()):
        event_desired_state['columns'].append(convert_field_to_desired_state(name, info))
    return event_desired_state


def convert_events_to_desired_state(events, groups, fields, blueprint_ssr_events):
    """Convert multiple events to their desired states in the format expected by Blueprint"""
    event_end_states = {'eventEndStates':[]}
    for event_name in sorted(events):
        desired_state = convert_event_to_desired_state(event_name, events, groups, fields)
        event_end_states['eventEndStates'].append(desired_state)
    for event in set(blueprint_ssr_events) - set(events.keys()):
        event_end_states['eventEndStates'].append({'eventName': event, 'columns': []})
    return event_end_states


def get_git_revision_short_hash():
    """Return short git hash for the events repo"""
    return subprocess.check_output(
        ['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip()


def check_publish_success(env, git_rev):
    """Check if the attempted convergence to the desired state has succeeded"""
    s3_client = boto3.client('s3')
    try:
        response =  s3_client.get_object(Bucket=f'blueprint-ops-{env}',
            Key=f'schema_changes/output/{git_rev}-changeset.json')
        if json.loads(response['Body'].read())['success'] is not True:
            return False
        return True
    except ClientError:
        return False


def get_blueprint_ssr_owned_events(env, s3_client):
    """Get all event in blueprint owned by SSR"""
    obj = None
    bucket = 'science-blueprint-configs'
    key = 'prod-schema-configs.json.gz'
    if env.startswith('integration-'):
        bucket = 'science-blueprint-configs-integration'
        namespace = env.split('-')[1:]
        key = f'{namespace}-schema-configs.json.gz'

    obj = s3_client.Object(bucket, key)
    content = None
    try:
        with gzip.GzipFile(fileobj=obj.get()["Body"]) as gzipfile:
            content = gzipfile.read()
    except ClientError as ex:
        if ex.response['Error']['Code'] == 'NoSuchKey':
            logging.info('No object found - returning empty')
            return set()
        raise

    content_json = json.loads(content)
    events = set()
    for event in content_json:
        if event['UserName'] == 'Spade Schema Registry':
            events.add(event['EventName'])
    return events


def publish_events(s3_resource, environment, git_rev, blueprint_state):
    """Publish events for the given environment"""
    s3_resource.Object(f'blueprint-ops-{environment}',
            f'schema_changes/input/{git_rev}-changeset.json').put(Body=json.dumps(blueprint_state))
    start_time = time.time()
    while time.time() - start_time <= CHANGE_WAIT_SECONDS:
        if check_publish_success(environment, git_rev):
            return True
        time.sleep(4)

    return False


def prepare_changes(blueprint_events):
    """Compute the blueprint and codegen state files"""
    events = build.objects_by_name(build.get_events().values())
    groups = build.objects_by_name(build.get_groups().values())
    fields = build.objects_by_name(build.get_fields().values())
    blueprint_state = convert_events_to_desired_state(events, groups, fields, blueprint_events)
    codegen_schema = build.make_codegen_schema(events, groups, fields)
    return blueprint_state, codegen_schema


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('environment', help='Which environment you are running for')
    parser.add_argument('--local', action='store_true', help='Just compute local state')
    args = parser.parse_args()
    s3_resource = boto3.resource('s3')
    git_rev = get_git_revision_short_hash().strip()
    logging.info('Publishing events for the environment %s at %s', args.environment, git_rev)
    blueprint_events = get_blueprint_ssr_owned_events(args.environment, s3_resource)
    blueprint_state, codegen_schema = prepare_changes(blueprint_events)
    if not args.local:
        if not publish_events(s3_resource, args.environment, git_rev, blueprint_state):
            logging.info('Change ops %s failed to execute', git_rev)
            sys.exit(1)
        logging.info('Change ops %s succesfully executed', git_rev)
    build.write_codegen_schema(codegen_schema, 'prod_schema.yaml.gz')


if __name__ == "__main__":
    main()
