import datetime
import time
import re
import logging
from zenyatta.aws import boto_client, get_instance
from airflow import DAG, settings
from airflow.models import DagRun, TaskInstance, DagBag
from airflow.operators.python_operator import PythonOperator
from airflow.utils.state import State

default_args = {
    'owner': 'DANG',
    'retries': 3,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': datetime.datetime(2017, 11, 1),
}

ident_pattern = re.compile('^[a-zA-Z0-9-]+-etl-(\\d{8}t\\d{6})(\\d{6})?$')


def filter_protected_instances(ds, **kwargs):
    # Get the instances of ACTIVE dagruns.  We do this mainly to
    # get a list of older dags that are still running.  We protect
    # against wiping PITRs that were just created by checking the timestamp
    # in the db identifier (to guard against race conditions where a new ETL)
    # spins up at the same time this dag starts & we kill the PITR bc we didn't
    # see it when we started
    instances = kwargs['ti'].xcom_pull(task_ids='collect_cleanup_instances')

    runs = DagRun.find(state=State.RUNNING)
    for dagrun in runs:
        for taskinstance in dagrun.get_task_instances():
            if (taskinstance.state == State.SUCCESS and
                    taskinstance.task_id.startswith('rds-point-in-time-recovery-for-')):
                rds_resource = taskinstance.xcom_pull(task_ids=taskinstance.task_id)
                identifier = rds_resource.identifier
                logging.info("{identifier} removed from deletion list bc a dag needs it".format(**locals()))
                if rds_resource.identifier in instances:
                    instances.remove(rds_resource.identifier)

    return instances


def collect_cleanup_instances(ds, **kwargs):
    # Get the list of instances we need to wipe out- it's all instances with identifiers
    # like <something>-etl-<timestamp> and
    # have a timestamp before an hour ago, and also are in the same VPN as this instance
    rds_client, _ = boto_client('rds')
    local_instance = get_instance('us-west-2')

    iterator = rds_client.get_paginator('describe_db_instances').paginate()
    instances = []
    for instance_response in iterator:
        for instance in instance_response['DBInstances']:
            identifier = instance['DBInstanceIdentifier']
            logging.info("Examining {identifier}".format(**locals()))

            vpc_id = instance['DBSubnetGroup']['VpcId']

            # Instance is in another VPC ID
            if vpc_id != local_instance.vpc_id:
                logging.info("Wrong VPC")
                continue

            # Identifier is not formatted as an ETL PITR
            match = ident_pattern.match(identifier)
            if match is None:
                logging.info("Wrong name")
                continue

            timestamp_str = match.group(1)

            try:
                logging.info("Attempting parse of timestamp {timestamp_str}".format(**locals()))
                timestamp = datetime.datetime(*time.strptime(timestamp_str, '%Y%m%dt%H%M%S')[:6])
                an_hour_ago = datetime.datetime.utcnow() - datetime.timedelta(hours=1)

                # Was spun up awhile ago
                if timestamp < an_hour_ago:
                    logging.info("Considering {identifier} for deletion.".format(**locals()))
                    instances.append(identifier)
                else:
                    logging.info("Too recent")

            except ValueError as e:
                logging.info("Error parsing {e}".format(**locals()))
                continue

    return instances


def trigger_cleanup(ds, **kwargs):
    # Trigger a single dagrun per instance passed from collect_cleanup_instances
    instances = kwargs['ti'].xcom_pull(task_ids='filter_protected_instances')
    execution_date = kwargs['execution_date']
    dagbag = DagBag(settings.DAGS_FOLDER)
    dag = dagbag.get_dag('cleanup_single_instance')

    session = settings.Session()

    for instance in instances:
        run_id = "report__{dag_id}__{identifier}__{execution_date}".format(
            dag_id=dag.dag_id, identifier=instance, execution_date=execution_date,
        )
        logging.info("triggering {run_id} for cleanup".format(**locals()))

        config = {'identifier': instance}

        if not DagRun.find(run_id=run_id):
            dr = dag.create_dagrun(
                run_id=run_id, state=State.RUNNING, external_trigger=True, conf=config,
            )

            session.add(dr)

    session.commit()
    session.close()


with DAG('cleanup_instances',
         default_args=default_args,
         schedule_interval='@daily',
         catchup=False,
         max_active_runs=1,
         ) as dag:

    filter_protected = PythonOperator(
        task_id='filter_protected_instances',
        provide_context=True,
        dag=dag,
        python_callable=filter_protected_instances,
    )

    collect_cleanup = PythonOperator(
        task_id='collect_cleanup_instances',
        provide_context=True,
        dag=dag,
        python_callable=collect_cleanup_instances,
    )

    trigger = PythonOperator(
        task_id='trigger_cleanup',
        provide_context=True,
        dag=dag,
        python_callable=trigger_cleanup
    )

    collect_cleanup >> filter_protected >> trigger
