import argparse

import time

from datasource import OMLCodeCov, OMLCapacity, OMLRPS, OMLSkadi, OMLGraphite
from handlers import IngestHandlers, DBHelper, QueryHandlers
from multiprochelper import run_parallel
from orghelper import OrgHelper

INSERT_RAW_REPO = """INSERT INTO TIME_SERIES_RAW
(TIME_SERIES_DEF_ID, INGEST_BATCH, VALUE, OML_BU_ID, OML_TEAM_ID, GHE_REPO_ID)
SELECT '%s', '%s', %s, OB.ID, OT.ID, GR.ID 
FROM OML_BU OB 
JOIN OML_TEAM OT ON OB.ID=OT.OML_BU_ID
JOIN GHE_REPO GR ON GR.OML_TEAM_ID=OT.ID
WHERE OB.NAME='%s' AND OT.NAME='%s' AND GR.NAME='%s'"""

INSERT_RAW_REPO_OML_ID = """INSERT INTO TIME_SERIES_RAW
(TIME_SERIES_DEF_ID, INGEST_BATCH, VALUE, OML_BU_ID, OML_TEAM_ID, GHE_REPO_ID) 
SELECT '%s', '%s', %s, %s, %s, GR.ID
FROM GHE_REPO GR WHERE GR.NAME='%s'"""

INSERT_RAW_SERVICE = """INSERT INTO TIME_SERIES_RAW
(TIME_SERIES_DEF_ID, INGEST_BATCH, VALUE, OML_BU_ID, OML_TEAM_ID, OML_SERVICE_ID)
SELECT '%s', '%s', %s, OB.ID, OT.ID, OS.ID
FROM OML_BU OB 
JOIN OML_TEAM OT ON OB.ID=OT.OML_BU_ID
JOIN OML_SERVICE OS ON OT.ID=OS.OML_TEAM_ID
WHERE OB.NAME='%s' AND OT.NAME='%s' AND OS.NAME='%s'"""

INSERT_RAW_TEAM = """INSERT INTO TIME_SERIES_RAW(
TIME_SERIES_DEF_ID, INGEST_BATCH, VALUE, OML_BU_ID, OML_TEAM_ID)
SELECT '%s', '%s', %s, OB.ID, OT.ID 
FROM OML_BU OB 
JOIN OML_TEAM OT ON OB.ID=OT.OML_BU_ID
WHERE OB.NAME='%s' AND OT.NAME='%s'"""

INSERT_RAW_BU = """INSERT INTO TIME_SERIES_RAW(TIME_SERIES_DEF_ID, INGEST_BATCH, VALUE, OML_BU_ID)
SELECT '%s', '%s', %s, OB.ID FROM OML_BU OB WHERE OB.NAME='%s'"""

METRICS = ['availability', 'ri', 'bugs', 'codecov', 'autoenv', 'integtest', 'unittest', 'canaries',
           'configmgmt', 'alerts']


def get_oml_level(kpi_id, rollup_id, actual_value):
    results = DBHelper.run_query_flat_results("SELECT MAX(OML_LEVEL) FROM GOAL "
                                           "WHERE KPI_ID=%s AND ROLLUP_ID=%s AND %s >= MIN_VALUE"
                                          % (kpi_id, rollup_id, actual_value))
    if len(results) == 0:
        return -1  # -1 indicates unknown oml_level
    else:
        return results[0]


def ingest_availability(inherit, override=None):
    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    avail_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='AVAIL'")[0][0]
    sla_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='AVAIL-SLA'")[0][0]
    row_count = 0
    print("Starting Ingest Batch %s..." % ingest_time)
    start = time.time()

    report_start_date, data = IngestHandlers().availability("bu", '*', '*', "*")
    for entry in data:
        status_BU = entry[0].strip()
        status_team = entry[1].strip()
        status_service_name = entry[2].strip()
        SLA = entry[3]
        availability = entry[4]
        oml_service_name = OrgHelper.status_to_ref_service(status_service_name)

        if oml_service_name is None:
            print("Cannot ingest %s/%s/%s because its OML Service is unknown" % (status_BU, status_team, status_service_name))
            continue

        oml_team_id = DBHelper.run_query_flat_results("SELECT OML_TEAM_ID FROM STATUS_SERVICE WHERE `NAME`='%s'"
                                                         % status_service_name)
        if len(oml_team_id) == 0:
            print("Cannot ingest %s/%s/%s because its OML Team is unknown" % (status_BU, status_team, status_service_name))
            continue

        oml_team_name = DBHelper.run_query_flat_results("SELECT NAME FROM OML_TEAM WHERE ID='%s'" % oml_team_id[0])[0]
        oml_bu_name = DBHelper.run_query_flat_results("SELECT OB.NAME FROM OML_BU OB "
                                                      "JOIN OML_TEAM OT ON OT.OML_BU_ID=OB.ID "
                                                      "WHERE OT.NAME='%s'" % oml_team_name)
        if len(oml_bu_name) == 0:
            print("Cannot ingest %s/%s/%s because its OML BU is unknown" % (status_BU, status_team, status_service_name))
            continue

        oml_bu_name = oml_bu_name[0]
        DBHelper.run_query(INSERT_RAW_SERVICE % (avail_id, ingest_time, availability, oml_bu_name, oml_team_name,
                                                 oml_service_name))
        DBHelper.run_query(INSERT_RAW_SERVICE % (sla_id, ingest_time, SLA, oml_bu_name, oml_team_name,
                                                 oml_service_name))
        row_count += 1
        print("Ingested data for %s/%s/%s" % (oml_bu_name, oml_team_name, oml_service_name))

    print("Done ingesting in %d seconds" % (time.time() - start))

    # validate
    actual = DBHelper.run_query("SELECT COUNT(*) FROM TIME_SERIES_RAW WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'"
                                % (avail_id, ingest_time))[0][0]
    if actual == row_count:
        print("Successfully ingested %d entries for AVAIL" % actual)
    else:
        print("WARNING: Ingestion failure for AVAIL. Expected %d. Actual %d." % (row_count, actual))

    actual = DBHelper.run_query("SELECT COUNT(*) FROM TIME_SERIES_RAW WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'"
                                % (sla_id, ingest_time))[0][0]
    if actual == row_count:
        print("Successfully ingested %d entries for AVAIL-SLA" % actual)
    else:
        print("WARNING: Ingestion failure for AVAIL-SLA. Expected %d. Actual %d." % (row_count, actual))

    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='AVAIL-SLA'")[0]
    print("Generating summary metrics")
    generate_summary_ratio_of_count_of_within_sla_entries_in_series(ingest_time, avail_id, sla_id, kpi_id)


def low_level_created_resolved_jira_query(open_key, resolved_key, handler_function_name):
    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    ri_open_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % open_key)[0][0]
    ri_resolved_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % resolved_key)[0][0]
    count = 0
    print("Starting Ingest Batch %s..." % ingest_time)
    start = time.time()
    for ref_bu in OrgHelper.ref_BUs():
        jira_bu = OrgHelper.jira_BU(ref_bu)
        for jira_team in OrgHelper.jira_teams_in_ref_BU(ref_bu):
            count += 1
            oml_bu, oml_team = OrgHelper().jira_to_ref(jira_team)
            target_function = getattr(IngestHandlers(), handler_function_name)
            try:
                created, resolved = target_function("team", jira_bu, jira_team, None)
                DBHelper.run_query(INSERT_RAW_TEAM % (ri_open_id, ingest_time, created, oml_bu, oml_team))
                DBHelper.run_query(INSERT_RAW_TEAM % (ri_resolved_id, ingest_time, resolved, oml_bu, oml_team))
                print("Ingested data for %s / %s (%s)" % (oml_bu, oml_team, jira_team))
            except Exception as e:
                print("Exception ingesting data for %s: %s" % (jira_team, e))

    print("Done ingesting in %d seconds" % (time.time() - start))

    # validate
    for tsr_id, key in [(ri_open_id, open_key), (ri_resolved_id, resolved_key)]:
        actual = DBHelper.run_query("SELECT COUNT(*) FROM TIME_SERIES_RAW "
                                    "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'" % (tsr_id, ingest_time))[0][0]
        if actual == count:
            print("Successfully ingested %d entries for %s" % (actual, key))
        else:
            print("WARNING: Ingestion failure for %s. Expected %d. Actual %d." % (key, count, actual))

    return ingest_time


def low_level_ingest_repo_based_binary_metric(time_series_def_key, inherit, override_list=None):
    if override_list is None:  # python language recommends doing this, vs "override_list=[]" in signature
        override_list = []

    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    tsr_def_key = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % time_series_def_key)[0][0]

    print("Starting Ingest Batch %s..." % ingest_time)
    start = time.time()

    # Process inherited values
    if inherit:
        prev_ingest_time_raw = DBHelper.run_query("SELECT MAX(INGEST_BATCH) FROM TIME_SERIES_RAW "
                                                  "WHERE TIME_SERIES_DEF_ID=%s" % tsr_def_key)[0][0]

        if prev_ingest_time_raw is None:
            print("A prior ingestion does NOT exist. NOT inheriting any metric samples.")
        else:
            prev_ingest_time = prev_ingest_time_raw.strftime('%Y-%m-%d %H:%M:%S')

            for oml_bu in OrgHelper.ref_BUs():
                for oml_team in OrgHelper.ref_teams_in_ref_bu(oml_bu):
                    ghe_repos = OrgHelper.ghe_repos_in_ref_team(oml_team)
                    for full_repo_name in ghe_repos:
                        metric_value_raw = DBHelper.run_query("SELECT VALUE FROM TIME_SERIES_RAW TSR "
                                                              "JOIN GHE_REPO GR ON GR.ID=TSR.GHE_REPO_ID "
                                                              "JOIN OML_TEAM OT ON OT.ID=TSR.OML_TEAM_ID "
                                                              "JOIN OML_BU OB ON OB.ID=TSR.OML_BU_ID "
                                                              "WHERE OB.NAME='%s' AND OT.NAME='%s' AND GR.NAME='%s' "
                                                              "AND TSR.INGEST_BATCH='%s' AND TSR.TIME_SERIES_DEF_ID=%s"
                                                              % (oml_bu, oml_team, full_repo_name, prev_ingest_time,
                                                                 tsr_def_key))
                        if len(metric_value_raw) == 0:
                            print("A prior ingestion for %s does NOT exist. Default to Unknown." % full_repo_name)
                            DBHelper.run_query(INSERT_RAW_REPO % (tsr_def_key, ingest_time, -1, oml_bu, oml_team,
                                                                  full_repo_name))
                        else:
                            metric_value = metric_value_raw[0][0]
                            DBHelper.run_query(INSERT_RAW_REPO % (tsr_def_key, ingest_time, metric_value, oml_bu,
                                                                     oml_team, full_repo_name))
                            print("Ingested inherited data for %s" % full_repo_name)

    # Process override flags, if any. Individual format is <FULL_REPO_NAME>=<1 or 0>
    for entry in override_list:
        override_full_repo_name, override_canary_state = entry.split("=")

        for oml_bu in OrgHelper.ref_BUs():
            for oml_team in OrgHelper.ref_teams_in_ref_bu(oml_bu):
                ghe_repos = OrgHelper.ghe_repos_in_ref_team(oml_team)
                if len(ghe_repos) == 0:
                    continue

                for full_repo_name in ghe_repos:
                    if full_repo_name == override_full_repo_name:
                        print("Overriding sample value for %s to %s" % (full_repo_name, override_canary_state))
                        DBHelper.run_query(
                            INSERT_RAW_REPO % (tsr_def_key, ingest_time, override_canary_state, oml_bu, oml_team,
                                               full_repo_name))

    print("Done ingesting in %d seconds" % (time.time() - start))

    return ingest_time


def generate_summary_ratio_of_true_vs_non_true_entries_in_single_series(ingest_time, binary_tsr_def_id, kpi_id,
                                                                        generate_service_rollup=False,
                                                                        generate_repo_rollup=False):
    # Step 1/5: generate summary for rollup level 1 (per company-wide, ie Twitch)
    twitch_rollup_id = DBHelper.get_rollup_id("Twitch")
    data = DBHelper.run_query("SELECT SUM(TSR1.VALUE>0),COUNT(TSR1.VALUE) FROM TIME_SERIES_RAW TSR1 "
                              "WHERE TSR1.INGEST_BATCH='%s' AND TSR1.TIME_SERIES_DEF_ID=%s"
                              % (ingest_time, binary_tsr_def_id))
    if len(data) > 0:
        count_of_true_values = data[0][0]
        count_of_any_values = data[0][1]
        computed_value = count_of_true_values / count_of_any_values
        level = get_oml_level(kpi_id, twitch_rollup_id, computed_value)
        DBHelper.run_query("INSERT INTO METRICS_SUMMARY(KPI_ID, INGEST_BATCH, ROLLUP_ID, "
                           "VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) VALUES(%s, '%s', %s, %s, %s, %s, %s)"
                           % (kpi_id, ingest_time, twitch_rollup_id, count_of_true_values, count_of_any_values,
                              computed_value, level))
    #TODO: handle len(data) == 0

    # Step 2/5: generate summary for rollup level 2 (per BU)
    bu_rollup_id = DBHelper.get_rollup_id("BU")
    bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
    for bu_id in bu_ids:
        data = DBHelper.run_query("SELECT TSR1.OML_BU_ID, SUM(TSR1.VALUE>0),COUNT(TSR1.VALUE) "
                                  "FROM TIME_SERIES_RAW TSR1 WHERE TSR1.INGEST_BATCH='%s' AND TSR1.TIME_SERIES_DEF_ID=%s "
                                  "AND TSR1.OML_BU_ID=%s GROUP BY TSR1.OML_BU_ID"
                                  % (ingest_time, binary_tsr_def_id, bu_id))

        if len(data) > 0:
            count_of_true_values = data[0][1]
            count_of_any_values = data[0][2]
            computed_value = count_of_true_values / count_of_any_values
            level = get_oml_level(kpi_id, bu_rollup_id, computed_value)
            DBHelper.run_query(
                "INSERT INTO METRICS_SUMMARY(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
                "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s)" % (
                kpi_id, ingest_time, bu_rollup_id, bu_id, count_of_true_values, count_of_any_values,
                computed_value, level))
        # TODO: handle len(data) == 0

    # Step 3/5: generate summary for rollup level 3 (per team)
    team_rollup_id = DBHelper.get_rollup_id("Team")
    bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
    for bu_id in bu_ids:
        team_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_TEAM WHERE OML_BU_ID=%s" % bu_id)
        for team_id in team_ids:
            data = DBHelper.run_query("SELECT TSR1.OML_BU_ID, TSR1.OML_TEAM_ID, SUM(TSR1.VALUE>0),COUNT(TSR1.VALUE) "
                                      "FROM TIME_SERIES_RAW TSR1 WHERE TSR1.INGEST_BATCH='%s' "
                                      "AND TSR1.TIME_SERIES_DEF_ID=%s AND TSR1.OML_BU_ID=%s AND TSR1.OML_TEAM_ID=%s "
                                      "GROUP BY TSR1.OML_BU_ID, TSR1.OML_TEAM_ID"
                                      % (ingest_time, binary_tsr_def_id, bu_id, team_id))
            if len(data) > 0:
                count_of_true_values = data[0][2]
                count_of_any_values = data[0][3]
                computed_value = count_of_true_values / count_of_any_values
                level = get_oml_level(kpi_id, team_rollup_id, computed_value)

                DBHelper.run_query("INSERT INTO METRICS_SUMMARY"
                                   "(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, OML_TEAM_ID, VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
                                   "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s, %s)"
                                   % (kpi_id, ingest_time, team_rollup_id, bu_id, team_id, count_of_true_values,
                                      count_of_any_values, computed_value, level))
            # TODO: handle len(data) == 0

    # Step 4/5: generate summary per service (skipped by default, unless params override)
    if generate_service_rollup:
        service_rollup_id = DBHelper.get_rollup_id("Service")
        bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
        for bu_id in bu_ids:
            team_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_TEAM WHERE OML_BU_ID=%s" % bu_id)
            for team_id in team_ids:
                service_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_SERVICE WHERE OML_TEAM_ID=%s" % team_id)
                for service_id in service_ids:
                    data = DBHelper.run_query(
                        "SELECT TSR1.OML_BU_ID, TSR1.OML_TEAM_ID, TSR1.OML_SERVICE_ID, SUM(TSR1.VALUE>0), "
                        "COUNT(TSR1.VALUE) FROM TIME_SERIES_RAW TSR1 WHERE TSR1.INGEST_BATCH='%s' "
                        "AND TSR1.TIME_SERIES_DEF_ID=%s AND TSR1.OML_BU_ID=%s AND TSR1.OML_TEAM_ID=%s "
                        "AND TSR1.OML_SERVICE_ID=%s "
                        "GROUP BY TSR1.OML_BU_ID, TSR1.OML_TEAM_ID, TSR1.OML_SERVICE_ID"
                        % (ingest_time, binary_tsr_def_id, bu_id, team_id, service_id))
                    if len(data) > 0:
                        count_of_true_values = data[0][3]
                        count_of_any_values = data[0][4]

                        DBHelper.run_query("INSERT INTO METRICS_SUMMARY "
                                           "(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, OML_TEAM_ID, "
                                           "OML_SERVICE_ID, VALUE1, VALUE2, COMPUTED_VALUE) "
                                           "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s, %s)"
                                           % (kpi_id, ingest_time, service_rollup_id, bu_id, team_id, service_id,
                                              count_of_true_values, count_of_any_values,
                                              count_of_true_values / count_of_any_values))
                    # TODO: handle len(data) == 0

    # Step 5/5: generate summary per repo (skipped by default, unless params override)
    if generate_repo_rollup:
        repo_rollup_id = DBHelper.get_rollup_id("Repo")
        bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
        for bu_id in bu_ids:
            team_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_TEAM WHERE OML_BU_ID=%s" % bu_id)
            for team_id in team_ids:
                repo_ids = DBHelper.run_query_flat_results("SELECT ID FROM GHE_REPO WHERE OML_TEAM_ID=%s" % team_id)
                for repo_id in repo_ids:
                    data = DBHelper.run_query(
                        "SELECT TSR1.OML_BU_ID, TSR1.OML_TEAM_ID, TSR1.GHE_REPO_ID, SUM(TSR1.VALUE>0), "
                        "COUNT(TSR1.VALUE) FROM TIME_SERIES_RAW TSR1 WHERE TSR1.INGEST_BATCH='%s' "
                        "AND TSR1.TIME_SERIES_DEF_ID=%s AND TSR1.OML_BU_ID=%s AND TSR1.OML_TEAM_ID=%s "
                        "AND TSR1.GHE_REPO_ID=%s "
                        "GROUP BY TSR1.OML_BU_ID, TSR1.OML_TEAM_ID, TSR1.GHE_REPO_ID"
                        % (ingest_time, binary_tsr_def_id, bu_id, team_id, repo_id))
                    if len(data) > 0:
                        count_of_true_values = data[0][3]
                        count_of_any_values = data[0][4]

                        DBHelper.run_query("INSERT INTO METRICS_SUMMARY"
                                           "(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, OML_TEAM_ID, GHE_REPO_ID, "
                                           "VALUE1, VALUE2, COMPUTED_VALUE) "
                                           "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s, %s)"
                                           % (kpi_id, ingest_time, repo_rollup_id, bu_id, team_id, repo_id,
                                              count_of_true_values, count_of_any_values,
                                              count_of_true_values / count_of_any_values))
                    #TODO: handle len(data) == 0

def generate_summary_ratio_of_count_of_within_sla_entries_in_series(ingest_time, actual_value_tsr_def_id,
                                                                    sla_tsr_def_id, kpi_id):
    # Step 1/3: generate summary for rollup level 1 (per company-wide, ie Twitch)

    # data = DBHelper.run_query("SELECT TSR1.OML_BU_ID, TSR1.OML_TEAM_ID, TSR1.OML_SERVICE_ID, TSR1.VALUE, TSR2.VALUE "
    #                           "FROM TIME_SERIES_RAW TSR1 "
    #                           "JOIN TIME_SERIES_RAW TSR2 ON (TSR1.INGEST_BATCH=TSR2.INGEST_BATCH "
    #                           "AND TSR1.OML_BU_ID=TSR2.OML_BU_ID AND TSR1.OML_TEAM_ID=TSR2.OML_TEAM_ID "
    #                           "AND TSR1.OML_SERVICE_ID=TSR2.OML_SERVICE_ID "
    #                           "AND TSR1.TIME_SERIES_DEF_ID=%s AND TSR2.TIME_SERIES_DEF_ID=%s "
    #                           "AND TSR1.INGEST_BATCH='%s)" % (actual_value_tsr_def_id, sla_tsr_def_id, ingest_time))

    # select tsr1.oml_bu_id, tsr1.oml_team_id, tsr1.oml_service_id, tsr1.value>tsr2.value from time_series_raw tsr1 join time_series_raw tsr2 on (tsr1.INGEST_BATCH=tsr2.INGEST_BATCH and tsr1.oml_bu_id=tsr2.oml_bu_id and tsr1.oml_team_id=tsr2.oml_team_id and tsr1.oml_service_id=tsr2.oml_service_id and tsr1.time_series_def_id=7 and tsr2.time_series_def_id=8);

    # rollup=service
    # select tsr1.oml_bu_id, tsr1.oml_team_id, tsr1.oml_service_id, sum(tsr1.value>tsr2.value),count(tsr2.value) from time_series_raw tsr1 join time_series_raw tsr2 on (tsr1.INGEST_BATCH=tsr2.INGEST_BATCH and tsr1.oml_bu_id=tsr2.oml_bu_id and tsr1.oml_team_id=tsr2.oml_team_id and tsr1.oml_service_id=tsr2.oml_service_id and tsr1.time_series_def_id=7 and tsr2.time_series_def_id=8) group by 1,2,3

    # rollup=team
    # select tsr1.oml_bu_id, tsr1.oml_team_id, sum(tsr1.value>tsr2.value),count(tsr2.value) from time_series_raw tsr1 join time_series_raw tsr2 on (tsr1.INGEST_BATCH=tsr2.INGEST_BATCH and tsr1.oml_bu_id=tsr2.oml_bu_id and tsr1.oml_team_id=tsr2.oml_team_id and tsr1.oml_service_id=tsr2.oml_service_id and tsr1.time_series_def_id=7 and tsr2.time_series_def_id=8) group by 1,2;

    # rollup=bu
    # select tsr1.oml_bu_id, sum(tsr1.value>tsr2.value),count(tsr2.value) from time_series_raw tsr1 join time_series_raw tsr2 on (tsr1.INGEST_BATCH=tsr2.INGEST_BATCH and tsr1.oml_bu_id=tsr2.oml_bu_id and tsr1.oml_team_id=tsr2.oml_team_id and tsr1.oml_service_id=tsr2.oml_service_id and tsr1.time_series_def_id=7 and tsr2.time_series_def_id=8) group by 1;

    # rollup=twitch
    # select sum(tsr1.value>tsr2.value),count(tsr2.value) from time_series_raw tsr1 join time_series_raw tsr2 on (tsr1.INGEST_BATCH=tsr2.INGEST_BATCH and tsr1.oml_bu_id=tsr2.oml_bu_id and tsr1.oml_team_id=tsr2.oml_team_id and tsr1.oml_service_id=tsr2.oml_service_id and tsr1.time_series_def_id=7 and tsr2.time_series_def_id=8);

    twitch_rollup_id = DBHelper.get_rollup_id("Twitch")
    data = DBHelper.run_query("SELECT SUM(TSR1.VALUE>TSR2.VALUE),COUNT(TSR2.VALUE) FROM TIME_SERIES_RAW TSR1 "
                              "JOIN TIME_SERIES_RAW TSR2 ON (TSR1.INGEST_BATCH=TSR2.INGEST_BATCH "
                              "AND TSR1.OML_BU_ID=TSR2.OML_BU_ID AND TSR1.OML_TEAM_ID=TSR2.OML_TEAM_ID "
                              "AND TSR1.OML_SERVICE_ID=TSR2.OML_SERVICE_ID AND TSR1.TIME_SERIES_DEF_ID=%s "
                              "AND TSR2.TIME_SERIES_DEF_ID=%s AND TSR1.INGEST_BATCH='%s')"
                              % (actual_value_tsr_def_id, sla_tsr_def_id, ingest_time))
    if len(data) > 0:
        in_sla_count = data[0][0]
        total_count = data[0][1]
        in_sla_ratio = in_sla_count / total_count
        level = get_oml_level(kpi_id, twitch_rollup_id, in_sla_ratio)

        DBHelper.run_query("INSERT INTO METRICS_SUMMARY(KPI_ID, INGEST_BATCH, ROLLUP_ID, "
                           "VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) VALUES(%s, '%s', %s, %s, %s, %s, %s)"
                           % (kpi_id, ingest_time, twitch_rollup_id, in_sla_count, total_count,
                              in_sla_ratio, level))

    # Step 2/3: generate summary for rollup level 2 (per BU)
    bu_rollup_id = DBHelper.get_rollup_id("BU")
    bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
    for bu_id in bu_ids:
        data = DBHelper.run_query("SELECT TSR1.OML_BU_ID, SUM(TSR1.VALUE>TSR2.VALUE),COUNT(TSR2.VALUE) "
                                  "FROM TIME_SERIES_RAW TSR1 JOIN TIME_SERIES_RAW TSR2 "
                                  "ON (TSR1.INGEST_BATCH=TSR2.INGEST_BATCH AND TSR1.OML_BU_ID=TSR2.OML_BU_ID "
                                  "AND TSR1.OML_TEAM_ID=TSR2.OML_TEAM_ID AND TSR1.OML_SERVICE_ID=TSR2.OML_SERVICE_ID "
                                  "AND TSR1.TIME_SERIES_DEF_ID=%s AND TSR2.TIME_SERIES_DEF_ID=%s "
                                  "AND TSR1.OML_BU_ID=%s AND TSR1.INGEST_BATCH='%s') GROUP BY TSR1.OML_BU_ID"
                                  % (actual_value_tsr_def_id, sla_tsr_def_id, bu_id, ingest_time))

        if len(data) > 0:
            in_sla_count = data[0][1]
            total_count = data[0][2]
            in_sla_ratio = in_sla_count / total_count
            level = get_oml_level(kpi_id, bu_rollup_id, in_sla_ratio)

            DBHelper.run_query(
                "INSERT INTO METRICS_SUMMARY(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
                "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s)" % (
                kpi_id, ingest_time, bu_rollup_id, bu_id, in_sla_count, total_count, in_sla_ratio, level))

    # Step 3/3: generate summary for rollup level 3 (per team)
    team_rollup_id = DBHelper.get_rollup_id("Team")
    bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
    for bu_id in bu_ids:
        team_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_TEAM WHERE OML_BU_ID=%s" % bu_id)
        for team_id in team_ids:
            data = DBHelper.run_query("SELECT TSR1.OML_BU_ID, TSR1.OML_TEAM_ID, SUM(TSR1.VALUE>TSR2.VALUE), "
                                      "COUNT(TSR2.VALUE) FROM TIME_SERIES_RAW TSR1 JOIN TIME_SERIES_RAW TSR2 "
                                      "ON (TSR1.INGEST_BATCH=TSR2.INGEST_BATCH AND TSR1.OML_BU_ID=TSR2.OML_BU_ID "
                                      "AND TSR1.OML_TEAM_ID=TSR2.OML_TEAM_ID "
                                      "AND TSR1.OML_SERVICE_ID=TSR2.OML_SERVICE_ID AND TSR1.TIME_SERIES_DEF_ID=%s "
                                      "AND TSR2.TIME_SERIES_DEF_ID=%s AND TSR1.OML_BU_ID=%s AND TSR1.OML_TEAM_ID=%s "
                                      "AND TSR1.INGEST_BATCH='%s') GROUP BY TSR1.OML_BU_ID, TSR1.OML_TEAM_ID"
                                      % (actual_value_tsr_def_id, sla_tsr_def_id, bu_id, team_id, ingest_time))
            if len(data) > 0:
                in_sla_count = data[0][2]
                total_count = data[0][3]
                in_sla_ratio = in_sla_count / total_count
                level = get_oml_level(kpi_id, team_rollup_id, in_sla_ratio)

                DBHelper.run_query("INSERT INTO METRICS_SUMMARY"
                                   "(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, OML_TEAM_ID, VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
                                   "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s, %s)"
                                   % (kpi_id, ingest_time, team_rollup_id, bu_id, team_id, in_sla_count,
                                      total_count, in_sla_ratio, level))


def generate_summary_ratio_of_2_independent_data_series(ingest_time, value1_tsr_def_id, value2_tsr_def_id, kpi_id,
                                                        fallback_computed_value_when_denominator_is_zero,
                                                        generate_summary_service_level=False,
                                                        generate_summary_repo_level=False):
    # Step 1/3: generate summary for rollup level 1 (per company-wide, ie Twitch)
    twitch_rollup_id = DBHelper.get_rollup_id("Twitch")
    value1 = DBHelper.run_query_flat_results("SELECT SUM(VALUE) FROM TIME_SERIES_RAW "
                                             "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'"
                                             % (value1_tsr_def_id, ingest_time))
    value2 = DBHelper.run_query_flat_results("SELECT SUM(VALUE) FROM TIME_SERIES_RAW "
                                             "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'"
                                             % (value2_tsr_def_id, ingest_time))

    if len(value1) > 0 and len(value2) > 0:
        value1 = value1[0]
        value2 = value2[0]
        if value2 > 0:
            value3 = float(value1 / value2)
        else:
            value3 = fallback_computed_value_when_denominator_is_zero

        level = get_oml_level(kpi_id, twitch_rollup_id, value3)
        DBHelper.run_query(
            "INSERT INTO METRICS_SUMMARY(KPI_ID, INGEST_BATCH, ROLLUP_ID, VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
            "VALUES(%s, '%s', %s, %s, %s, %s, %s)"
            % (kpi_id, ingest_time, twitch_rollup_id, value1, value2, value3, level))

    # Step 2/3: generate summary for rollup level 2 (per BU)
    bu_rollup_id = DBHelper.get_rollup_id("BU")
    bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
    for bu_id in bu_ids:
        value1 = DBHelper.run_query_flat_results(
            "SELECT SUM(VALUE) FROM TIME_SERIES_RAW WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s' AND OML_BU_ID=%s"
            % (value1_tsr_def_id, ingest_time, bu_id))
        value2 = DBHelper.run_query_flat_results(
            "SELECT SUM(VALUE) FROM TIME_SERIES_RAW WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s' AND OML_BU_ID=%s"
            % (value2_tsr_def_id, ingest_time, bu_id))
        if len(value1) > 0 and len(value2) > 0:
            value1 = value1[0]
            value2 = value2[0]
            if value2 > 0:
                value3 = float(value1 / value2)
            else:
                value3 = fallback_computed_value_when_denominator_is_zero

            level = get_oml_level(kpi_id, bu_rollup_id, value3)
            DBHelper.run_query(
                "INSERT INTO METRICS_SUMMARY(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
                "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s)" % (
                kpi_id, ingest_time, bu_rollup_id, bu_id, value1, value2, value3, level))

    # Step 3/3: generate summary for rollup level 3 (per team)
    team_rollup_id = DBHelper.get_rollup_id("Team")
    bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
    for bu_id in bu_ids:
        team_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_TEAM WHERE OML_BU_ID=%s" % bu_id)
        for team_id in team_ids:
            value1 = DBHelper.run_query_flat_results(
                "SELECT SUM(VALUE) FROM TIME_SERIES_RAW "
                "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s' AND OML_BU_ID=%s AND OML_TEAM_ID=%s"
                % (value1_tsr_def_id, ingest_time, bu_id, team_id))
            value2 = DBHelper.run_query_flat_results(
                "SELECT SUM(VALUE) FROM TIME_SERIES_RAW "
                "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s' AND OML_BU_ID=%s AND OML_TEAM_ID=%s"
                % (value2_tsr_def_id, ingest_time, bu_id, team_id))
            if len(value1) > 0 and len(value2) > 0:
                value1 = value1[0]
                value2 = value2[0]
                if value2 > 0:
                    value3 = float(value1 / value2)
                else:
                    value3 = fallback_computed_value_when_denominator_is_zero

                level = get_oml_level(kpi_id, team_rollup_id, value3)
                DBHelper.run_query("INSERT INTO METRICS_SUMMARY"
                                   "(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, OML_TEAM_ID, VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
                                   "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s, %s)"
                                   % (kpi_id, ingest_time, team_rollup_id, bu_id, team_id, value1, value2, value3, level))

    # optional: generate summary for rollup level 5 (per repo)
    if generate_summary_repo_level:
        repo_rollup_id = DBHelper.get_rollup_id("Repo")
        bu_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_BU")
        for bu_id in bu_ids:
            team_ids = DBHelper.run_query_flat_results("SELECT ID FROM OML_TEAM WHERE OML_BU_ID=%s" % bu_id)
            for team_id in team_ids:
                repo_ids = DBHelper.run_query_flat_results("SELECT ID FROM GHE_REPO WHERE OML_TEAM_ID=%s" % team_id)
                for repo_id in repo_ids:
                    value1 = DBHelper.run_query_flat_results(
                        "SELECT SUM(VALUE) FROM TIME_SERIES_RAW "
                        "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s' AND OML_BU_ID=%s AND OML_TEAM_ID=%s "
                        "AND GHE_REPO_ID=%s"
                        % (value1_tsr_def_id, ingest_time, bu_id, team_id, repo_id))
                    value2 = DBHelper.run_query_flat_results(
                        "SELECT SUM(VALUE) FROM TIME_SERIES_RAW "
                        "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s' AND OML_BU_ID=%s AND OML_TEAM_ID=%s "
                        "AND GHE_REPO_ID=%s"
                        % (value2_tsr_def_id, ingest_time, bu_id, team_id, repo_id))
                    if len(value1) > 0 and len(value2) > 0:
                        value1 = value1[0]
                        value2 = value2[0]
                        if value2 > 0:
                            value3 = float(value1 / value2)
                        else:
                            value3 = fallback_computed_value_when_denominator_is_zero

                        level = get_oml_level(kpi_id, repo_rollup_id, value3)
                        DBHelper.run_query("INSERT INTO METRICS_SUMMARY"
                                           "(KPI_ID, INGEST_BATCH, ROLLUP_ID, OML_BU_ID, OML_TEAM_ID, GHE_REPO_ID, "
                                           "VALUE1, VALUE2, COMPUTED_VALUE, OML_LEVEL) "
                                           "VALUES(%s, '%s', %s, %s, %s, %s, %s, %s, %s, %s)"
                                           % (kpi_id, ingest_time, repo_rollup_id, bu_id, team_id, repo_id, value1,
                                              value2, value3, level))

def ingest_ri(inherit, override):
    ingest_time = low_level_created_resolved_jira_query('RI-ALL', 'RI-CLOSED', 'ri')

    # generate summary entries in METRICS_SUMMARY
    tsr_def_ri_all_id = DBHelper.get_time_series_def_id('RI-ALL')
    tsr_def_ri_closed_id = DBHelper.get_time_series_def_id('RI-CLOSED')
    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='RI-SLA'")[0]
    # Generate summary data. Set "percent resolved RIs" 100% if total # of RIs = 0
    print("Generating summary metrics")
    generate_summary_ratio_of_2_independent_data_series(ingest_time, tsr_def_ri_closed_id, tsr_def_ri_all_id, kpi_id, 1)


def ingest_bugs(inherit, override):
    ingest_time = low_level_created_resolved_jira_query('S123ALL', 'S123CLOSED', 'bugs')

    # generate summary entries in METRICS_SUMMARY
    tsr_def_bugs_all_id = DBHelper.get_time_series_def_id('S123ALL')
    tsr_def_bugs_closed_id = DBHelper.get_time_series_def_id('S123CLOSED')
    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='BUGS-SLA'")[0]
    # Generate summary data. Set "percent bugs resolved" value to 100% if # of bugs = 0
    print("Generating summary metrics")
    generate_summary_ratio_of_2_independent_data_series(ingest_time, tsr_def_bugs_closed_id, tsr_def_bugs_all_id, kpi_id, 1)


def ingest_codecov(inherit, override):
    cc = OMLCodeCov()
    loc_all_key = "CC-LOC-ALL"
    loc_hit_key = "CC-LOC-HIT"
    loc_all_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % loc_all_key)[0][0]
    loc_hit_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % loc_hit_key)[0][0]

    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    count = 0
    print("Starting Ingest Batch %s..." % ingest_time)
    repo_list = []
    repo_team_bu = {}  # repo -> (bu_id, team_id)
    start = time.time()

    for oml_bu in OrgHelper.ref_BUs():
        for oml_team in OrgHelper.ref_teams_in_ref_bu(oml_bu):
            ghe_repos = OrgHelper.ghe_repos_in_ref_team(oml_team)
            if len(ghe_repos) == 0:
                continue

            for full_repo_name in ghe_repos:
                repo_team_bu[full_repo_name] = (oml_bu, oml_team)
                repo_list.append(full_repo_name)

    pool_outputs = run_parallel(cc.get_repo_codecov, repo_list, 10)

    for per_repo_result in pool_outputs:
        if per_repo_result is None:
            continue
        full_repo_name = per_repo_result['repo']
        loc_hit = per_repo_result['h']
        loc_total = per_repo_result['n']
        oml_bu, oml_team = repo_team_bu[full_repo_name]
        try:
            DBHelper.run_query(INSERT_RAW_REPO % (loc_all_id, ingest_time, loc_total, oml_bu, oml_team, full_repo_name))
            DBHelper.run_query(INSERT_RAW_REPO % (loc_hit_id, ingest_time, loc_hit, oml_bu, oml_team, full_repo_name))
            count += 1
            print("%s: %0.2f" % (full_repo_name, 100 * loc_hit / loc_total))
        except Exception as e:
            # print("Exception ingesting coverage data for %s: %s" % (full_repo_name, e))
            pass

    print("Done ingesting in %d seconds" % (time.time() - start))

    # validate
    for id, key in [(loc_all_id, loc_all_key), (loc_hit_id, loc_hit_key)]:
        actual = DBHelper.run_query("SELECT COUNT(*) FROM TIME_SERIES_RAW "
                                    "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'" % (id, ingest_time))[0][0]
        if actual == count:
            print("Successfully ingested %d entries for %s" % (actual, key))
        else:
            print("WARNING: Ingestion failure for %s. Expected %d. Actual %d." % (key, count, actual))

    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='CODECOV'")[0]
    # Generate summary data. Set "code coverage percent" value to 0% if total LOC = 0 (a repo must have source code)
    print("Generating summary metrics")
    generate_summary_ratio_of_2_independent_data_series(ingest_time, loc_hit_id, loc_all_id, kpi_id, 0, False, True)


def ingest_autoenv(inherit, override=None):
    """
    As of 10/12/2017, the input data is manually entered via the commandline --override flag because this data is not
    in any datastore.

    :param args:
    :return:
    """
    autoenv_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='AEC'")[0][0]
    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    print("Starting Ingest Batch %s..." % ingest_time)
    start = time.time()

    # Process inherited values
    if inherit:
        prev_ingest_time_raw = DBHelper.run_query("SELECT MAX(INGEST_BATCH) FROM TIME_SERIES_RAW "
                                                  "WHERE TIME_SERIES_DEF_ID=%s" % autoenv_id)[0][0]
        if prev_ingest_time_raw is None:
            print("A prior ingestion does NOT exist. NOT inheriting any metric samples.")
        else:
            prev_ingest_time = prev_ingest_time_raw.strftime('%Y-%m-%d %H:%M:%S')

            for oml_bu in OrgHelper.ref_BUs():
                for oml_team in OrgHelper.ref_teams_in_ref_bu(oml_bu):
                    for oml_service in OrgHelper.ref_services_in_ref_team(oml_bu, oml_team):
                        if inherit:
                            autoenv_result = DBHelper.run_query("SELECT VALUE FROM TIME_SERIES_RAW TSR "
                                                                "JOIN OML_SERVICE OS ON OS.ID=TSR.OML_SERVICE_ID "
                                                                "JOIN OML_TEAM OT ON OT.ID=TSR.OML_TEAM_ID "
                                                                "JOIN OML_BU OB ON OB.ID=TSR.OML_BU_ID "
                                                                "WHERE OB.NAME='%s' AND OT.NAME='%s' AND OS.NAME='%s' "
                                                                "AND TSR.INGEST_BATCH='%s' AND TSR.TIME_SERIES_DEF_ID=%s"
                                                                % (oml_bu, oml_team, oml_service, prev_ingest_time,
                                                                   autoenv_id))
                            if len(autoenv_result) > 0:
                                autoenv_state = autoenv_result[0][0]
                                DBHelper.run_query(INSERT_RAW_SERVICE % (autoenv_id, ingest_time, autoenv_state, oml_bu,
                                                                         oml_team, oml_service))
                                print("Ingested inherited data for %s" % oml_service)
                            else:
                                print("A prior ingestion for %s does NOT exist. Default to Unknown." % oml_service)
                                DBHelper.run_query(INSERT_RAW_SERVICE % (autoenv_id, ingest_time, -1, oml_bu,
                                                                         oml_team, oml_service))

    # Process override values. Individual format is <BU>/<TEAM>/<SERVICE>=<1 or 0 or -1>
    if override is not None:
        for entry in override:
            service_path, override_autoenv_state = entry.split("=")
            tokens = service_path.split("/")
            oml_bu = tokens[0]
            oml_team = tokens[1]
            print("Overriding sample value for %s to %s" % (service_path, override_autoenv_state))
            if len(tokens) == 3:
                oml_service = tokens[2]
                DBHelper.run_query(INSERT_RAW_SERVICE % (autoenv_id, ingest_time, override_autoenv_state,
                                                         oml_bu.strip(), oml_team.strip(), oml_service.strip()))
            else:
                DBHelper.run_query(INSERT_RAW_TEAM % (autoenv_id, ingest_time, override_autoenv_state,
                                                         oml_bu.strip(), oml_team.strip()))

    print("Done ingesting in %d seconds" % (time.time() - start))

    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='AEC'")[0]
    print("Generating summary metrics")
    generate_summary_ratio_of_true_vs_non_true_entries_in_single_series(ingest_time, autoenv_id, kpi_id,
                                                                        generate_service_rollup=True)


def find_repo_related_to_jenkins_job(jenkins_job_name, list_of_known_repos):
    for repo_name in list_of_known_repos:
        if repo_name.replace('/', '-') in jenkins_job_name.lower():
            return repo_name
    return None


def is_jenkins_job_blacklisted(jenkins_job_name, blacklist, whitelist):
    """
    Checks if specific jenkins job is blacklisted for the purposes of of counting it towards unit test metric

    :param jenkins_job_name: jenkins job name
    :param blacklist: list of blacklisted jenkins job
    :param whitelist: list of whitelist (precendence over blacklist)
    :return:  True if job should be blacklisted
    """
    for pattern in whitelist:
        if pattern in jenkins_job_name:
            return False

    for pattern in blacklist:
        if pattern in jenkins_job_name:
            return True

    return False


def populate_build_count_map(all_repos, jenkins_build_names, build_count_map, graphite_search, blacklist, whitelist):
    """
    Queries Graphite and populate the provided dictionary with a mapping of jenkins job name to its number of builds.

    :param all_repos: list of all known repos
    :param jenkins_build_names: list of jenkins build names
    :param build_count_map: target map
    :param graphite_search: graphite search prefix
    :param blacklist: blacklist
    :param whitelist: whitelist
    :return:
    """
    graphite = OMLGraphite()
    param_list = []
    for entry in jenkins_build_names:
        jenkins_job_name = entry['text']
        related_repo = find_repo_related_to_jenkins_job(jenkins_job_name, all_repos)
        if related_repo is None or is_jenkins_job_blacklisted(jenkins_job_name, blacklist, whitelist):
            continue

        param_list.append(("%s.%s.sum" % (graphite_search, entry['text']), 7))

    graphite_outputs = run_parallel(graphite.get_datapoints, param_list, 10)
    for datapoints_results in graphite_outputs:
        if len(datapoints_results) == 0:
            continue

        metric_name = datapoints_results[0]['target']
        jenkins_job_name = '-'.join(metric_name.split('.')[-2].split('-')[1:])
        datapoints = datapoints_results[0]['datapoints']
        # Unfortunately, Graphite doesn't respect "noNullPoints: True", so we need to find non-null range ourselves
        count_datapoints = len(datapoints)
        start_index = -1
        end_index = -1
        for x in range(0, count_datapoints):
            if datapoints[x][0] is not None:
                start_index = x
                break

        for x in range(count_datapoints-1, 0, -1):
            if datapoints[x][0] is not None:
                end_index = x
                break

        if start_index == -1 and end_index == -1:
            per_entry_count = 0
        else:
            per_entry_count = datapoints[end_index][0] - datapoints[start_index][0]

        if jenkins_job_name in build_count_map:
            build_count_map[jenkins_job_name] = build_count_map[jenkins_job_name] + per_entry_count
        else:
            build_count_map[jenkins_job_name] = per_entry_count

        print("\t%s: %d" % (jenkins_job_name, build_count_map[jenkins_job_name]))


def increment(target_dict, key, value):
    """
    Increment the value for given key in target dictionary.

    :param target_dict:
    :param key:
    :param value:
    :return:
    """
    if key in target_dict:
        target_dict[key] = target_dict[key] + value
    else:
        target_dict[key] = value


def ingest_unittest(inherit, override=None):
    """
    To get ratio of "test" builds to "build" builds, keep in mind that some build builds also run tests!

    The algorithm as follows (all queries are in a rolling 7-day window)
    1) Get all known build builds (from Graphite)
    2) Get all known test builds (from Graphite -- which uses assumption that *-test jenkins builds are test builds)
    3) Get builds that have test results (from RPS)
    4) Get builds that have code coverage results (a build that has code coverage must have been unit tested) (from Codecov)
    5) Determine which of the builds from step #1 are also test builds based on results from #3

    Finally, unit test ratio = test builds:build builds

    :param args:
    :return:
    """
    all_builds_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='BUILD-ALL'")[0][0]
    unittested_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='BUILD-UT'")[0][0]

    all_repos_known_to_oml = []
    repo_team_bu = {}  # repo -> (bu_id, team_id)
    for oml_bu in OrgHelper.ref_BUs():
        for oml_team in OrgHelper.ref_teams_in_ref_bu(oml_bu):
            ghe_repos = OrgHelper.ghe_repos_in_ref_team(oml_team)
            if len(ghe_repos) == 0:
                continue

            for full_repo_name in ghe_repos:
                repo_team_bu[full_repo_name] = (oml_bu, oml_team)
                all_repos_known_to_oml.append(full_repo_name)

    all_builds_counts = {}
    unittested_builds_counts = {}

    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    print("Starting Ingest Batch %s..." % ingest_time)

    start = time.time()

    graphite = OMLGraphite()
    builds_names = graphite.get_metrics("stats.counters.devtools.jenkins.builds.*")
    tests_names = graphite.get_metrics("stats.counters.devtools.jenkins.tests.*")
    print("Build Builds (Source=Graphite):")
    populate_build_count_map(all_repos_known_to_oml, builds_names, all_builds_counts,
                             "stats.counters.devtools.jenkins.builds",
                             ["pr_builder", "-deploy", "smoca", "TEMPLATE", "test", "coverage", "env_creator"], [])
    print("Test Builds (Source=Graphite):")
    populate_build_count_map(all_repos_known_to_oml, tests_names, unittested_builds_counts,
                             "stats.counters.devtools.jenkins.tests",
                             ["pr_builder", "-deploy", "smoca", "TEMPLATE", "build", "env_creator"],
                             ["test"])
    print("Done requesting data from Graphite")

    builds_with_test_results_according_to_rps = OMLRPS().get_builds_with_test_results()
    print("Test Builds (Source=RPS):")
    for entry in builds_with_test_results_according_to_rps:
        print("\t%s: %d" % (entry, builds_with_test_results_according_to_rps[entry]))
    print("Done requesting data from RPS")

    cc = OMLCodeCov()
    codecov_outputs = run_parallel( cc.get_builds_codecov, all_repos_known_to_oml, 10)
    print("Repo Commits with Tests (Source=Codecov):")
    for entry in codecov_outputs:
        if entry is None:
            continue
        sub_count = 0
        for commit in entry['commits']:
            sub_count += commit['count']
        print("\t%s/%s: %d" % (entry['owner']['username'], entry['repo']['name'], sub_count))

    codecoveraged_builds = {}
    for per_repo_result in codecov_outputs:
        if per_repo_result is None:
            continue
        full_repo_name = per_repo_result['repo']['name']
        per_repo_cc_build_count = 0
        for commit in per_repo_result['commits']:
            per_repo_cc_build_count += commit['count']
        codecoveraged_builds[full_repo_name] = per_repo_cc_build_count

    print("Done requesting data from Codecov")

    grand_total_builds = {}  # repo -> count
    grand_total_tests = {}  # repo -> count

    for jenkins_job_name in unittested_builds_counts:
        repo_name = find_repo_related_to_jenkins_job(jenkins_job_name, all_repos_known_to_oml)
        increment(grand_total_tests, repo_name, unittested_builds_counts[jenkins_job_name])

    for jenkins_job_name in all_builds_counts:
        repo_name = find_repo_related_to_jenkins_job(jenkins_job_name, all_repos_known_to_oml)
        increment(grand_total_builds, repo_name, all_builds_counts[jenkins_job_name])
        # Also, check if the "build" build has tests. If so, count it as a test build
        if jenkins_job_name in builds_with_test_results_according_to_rps:
            increment(grand_total_tests, repo_name, all_builds_counts[jenkins_job_name])

    for repo_name in codecoveraged_builds:
        increment(grand_total_tests, repo_name, codecoveraged_builds[repo_name])

    ingested_count = 0
    for repo_name in grand_total_builds:
        total_builds = grand_total_builds[repo_name]
        if total_builds > 0:
            oml_bu, oml_team = repo_team_bu[repo_name]
            total_tests = grand_total_tests.get(repo_name, 0)
            try:
                DBHelper.run_query(
                    INSERT_RAW_REPO % (all_builds_id, ingest_time, total_builds, oml_bu, oml_team, repo_name))
                DBHelper.run_query(
                    INSERT_RAW_REPO % (unittested_id, ingest_time, total_tests, oml_bu, oml_team, repo_name))
                print("Ingested unit test metrics for %s : %d/%d (%.2f%%)" % (repo_name, total_tests, total_builds,
                                                                              100*total_tests/total_builds))
                ingested_count += 1
            except Exception as e:
                print("Exception ingesting data for %s: %s" % (repo_name, e))

    print("Done ingesting in %d seconds" % (time.time() - start))

    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='UNITTEST'")[0]
    print("Generating summary metrics")
    generate_summary_ratio_of_2_independent_data_series(ingest_time, unittested_id, all_builds_id, kpi_id, 0)


def ingest_integtest(inherit, override=None):
    ingest_time = low_level_ingest_repo_based_binary_metric("INTEGTEST", inherit, override)
    integtest_tsr_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='INTEGTEST'")[0][0]
    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='INTEGTEST'")[0]
    print("Generating summary metrics")
    generate_summary_ratio_of_true_vs_non_true_entries_in_single_series(ingest_time, integtest_tsr_id, kpi_id,
                                                                        generate_repo_rollup=True)


def ingest_canaries(inherit, override=None):
    """

    :param args:
    :return:
    """
    deploy_canary_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='DEPLOY-CAN'")[0][0]
    deploy_all_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='DEPLOY-ALL'")[0][0]

    # Keep track of metric names. Do not process already processed names (wildcard matches can return the same set
    # of metrics, as there are environments named production-canary, prod-canary, etc
    processed_metric_names = []

    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    print("Starting Ingest Batch %s..." % ingest_time)
    start = time.time()

    all_repos = DBHelper.run_query_flat_results("SELECT DISTINCT(`NAME`) FROM GHE_REPO")
    graphite = OMLGraphite()

    g_orgs = graphite.get_metrics('stats.timers.deploys.deployment.*')
    for g_org in g_orgs:
        org = g_org['text']
        org_deploys = run_parallel(graphite.get_datapoints,
                                   [("stats.timers.deploys.deployment.%s.*.*prod*.start.count" % org, 7),
                                    ("stats.timers.deploys.deployment.%s.*.*canary*.start.count" % org, 7)],
                                   10)
        total_count = {}
        canaries_count = {}
        for deploy_set in org_deploys:
            for env_deploys in deploy_set:
                metric_name = env_deploys['target']
                full_repo_name = '/'.join(metric_name.split('.')[4:6]).lower()
                if metric_name not in processed_metric_names:
                    if full_repo_name in all_repos:
                        if full_repo_name not in total_count.keys():
                            total_count[full_repo_name] = 0
                            canaries_count[full_repo_name] = 0

                        for deploy_sample in env_deploys['datapoints']:
                            count = deploy_sample[0]
                            if count is not None:
                                total_count[full_repo_name] += 1
                                if 'canary' in metric_name:
                                    canaries_count[full_repo_name] += 1
                    else:
                        print("WARNING: Repo %s is not registered in OML" % full_repo_name)
                        break

                    processed_metric_names.append(metric_name)

        for full_repo_name in total_count.keys():
            oml_team_id = DBHelper.run_query_flat_results("SELECT OML_TEAM_ID FROM GHE_REPO WHERE `NAME`='%s'"
                                                          % full_repo_name)[0]
            oml_bu_id = DBHelper.run_query_flat_results("SELECT OML_BU_ID FROM OML_TEAM WHERE `ID`=%s"
                                                        % oml_team_id)[0]

            DBHelper.run_query(INSERT_RAW_REPO_OML_ID % (deploy_canary_id, ingest_time, canaries_count[full_repo_name],
                                                         oml_bu_id, oml_team_id, full_repo_name))
            DBHelper.run_query(INSERT_RAW_REPO_OML_ID % (deploy_all_id, ingest_time, total_count[full_repo_name],
                                                         oml_bu_id, oml_team_id, full_repo_name))

            print("Ingested deployments: %s: %d/%d" % (full_repo_name, canaries_count[full_repo_name],
                                                       total_count[full_repo_name]))


    print("Done ingesting in %d seconds" % (time.time() - start))
    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='CANARIES'")[0]
    print("Generating summary metrics")
    generate_summary_ratio_of_2_independent_data_series(ingest_time, deploy_canary_id, deploy_all_id, kpi_id, 0)


def ingest_configmgmt(inherit, override=None):
    capacity = OMLCapacity()
    cfg_all_key = "CFG-TOTAL"
    cfg_managed_key = "CFG-MANAGED"
    cfg_all_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % cfg_all_key)[0][0]
    cfg_managed_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % cfg_managed_key)[0][0]

    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    count = 0
    print("Starting Ingest Batch %s..." % ingest_time)
    start = time.time()

    cfg_management_summary = capacity.get_config_management_status_summary("bu")
    for entry in cfg_management_summary:
        oml_bu = OrgHelper.capacity_to_ref_bu(entry['name'])
        if oml_bu is not None:
            try:
                total_hosts = entry['total_count']
                managed_hosts = entry['total_config_mgmt_count']
                DBHelper.run_query(INSERT_RAW_BU % (cfg_all_id, ingest_time, total_hosts, oml_bu))
                DBHelper.run_query(INSERT_RAW_BU % (cfg_managed_id, ingest_time, managed_hosts, oml_bu))
                print("Ingested data for %s" % oml_bu)
                count += 1
            except Exception as e:
                print("Exception ingesting data for %s: %s" % (oml_bu, e))

    print("Done ingesting in %d seconds" % (time.time() - start))

    # validate
    for id, key in [(cfg_all_id, cfg_all_key), (cfg_managed_id, cfg_managed_key)]:
        actual = DBHelper.run_query("SELECT COUNT(*) FROM TIME_SERIES_RAW "
                                    "WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'" % (id, ingest_time))[0][0]
        if actual == count:
            print("Successfully ingested %d entries for %s" % (actual, key))
        else:
            print("WARNING: Ingestion failure for %s. Expected %d. Actual %d." % (key, count, actual))

    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='CFGMGMT'")[0]
    # Generate summary data. Set "percent hosts with cfg mgmt" 0% if total # of hosts = 0 (there must be hosts)
    print("Generating summary metrics")
    generate_summary_ratio_of_2_independent_data_series(ingest_time, cfg_managed_id, cfg_all_id, kpi_id, 0)


def ingest_alerts(inherit, override=None):
    inc_all_key = 'INC-ALL-1K'
    inc_alerts_key = 'INC-ALRT-1K'
    inc_all_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % inc_all_key)[0][0]
    inc_alerts_id = DBHelper.run_query("SELECT ID FROM TIME_SERIES_DEF WHERE `KEY`='%s'" % inc_alerts_key)[0][0]

    ingest_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    raw_count = 0
    print("Starting Ingest Batch %s..." % ingest_time)
    start = time.time()

    # O(N^2) to find mapping using stored values. No choice, since there's no formal structure in incidents tickets
    all_incidents = IngestHandlers().alerts("all", None, None, None)
    print("Found %d incidents from jira" % len(all_incidents))

    for oml_bu in OrgHelper.ref_BUs():
        for oml_team in OrgHelper.ref_teams_in_ref_bu(oml_bu):
            incident_components = OrgHelper.inc_components_in_ref_team(oml_team)
            if len(incident_components) == 0:
                continue

            has_alert_count = 0
            incident_count = 0
            for incident_component in incident_components:
                for incident in all_incidents:
                    if len(incident.fields.components) > 0 and incident.fields.components[0].name == incident_component:
                        incident_count +=1
                        if incident.fields.customfield_13701.value == "Yes":
                            has_alert_count += 1

            # At this point, we've summed up the matching incidents (with and without alerts) for each team
            if incident_count > 0:
                DBHelper.run_query(INSERT_RAW_TEAM % (inc_all_id, ingest_time, incident_count, oml_bu, oml_team))
                DBHelper.run_query(INSERT_RAW_TEAM % (inc_alerts_id, ingest_time, has_alert_count, oml_bu, oml_team))
                raw_count += 1

            print("Added to %s/%s : %d alerts/%d total" % (oml_bu, oml_team, has_alert_count, incident_count))

    print("Done ingesting in %d seconds" % (time.time() - start))

    # validate
    actual_alerts = DBHelper.run_query("SELECT COUNT(*) FROM TIME_SERIES_RAW WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'"
                              % (inc_alerts_id, ingest_time))[0][0]

    actual_all = DBHelper.run_query("SELECT COUNT(*) FROM TIME_SERIES_RAW WHERE TIME_SERIES_DEF_ID=%s AND INGEST_BATCH='%s'"
                           % (inc_all_id, ingest_time))[0][0]
    if actual_all == raw_count and actual_alerts == raw_count:
        print("Successfully ingested %d entries for %s and %s" % (actual_all, inc_alerts_key, inc_all_key))
    elif actual_all != raw_count:
        print("WARNING: Ingestion failure for %s. Expected %d. Actual %d." % (inc_all_key, raw_count, actual_all))
    else:
        print("WARNING: Ingestion failure for %s. Expected %d. Actual %d." % (inc_alerts_key, raw_count, actual_alerts))

    kpi_id = DBHelper.run_query_flat_results("SELECT ID FROM KPI WHERE `KEY`='INC-ALERTS'")[0]
    # Generate summary data. Set "percent incidents with alerts" to 100% if # of incidents = 0
    print("Generating summary metrics")
    generate_summary_ratio_of_2_independent_data_series(ingest_time, inc_alerts_id, inc_all_id, kpi_id, 1)


def main():
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest='command')
    for metric in METRICS:
        sub = subparsers.add_parser(metric, help="query OML metrics: %s" % metric)
        sub.set_defaults(func=eval("ingest_%s" % metric))  # map cli to python function: eg --ri will run ingest_ri()
        sub.add_argument('--inherit', dest='inherit', action='store_true',
                         help="Default. Inherit previous metric value if current sample is missing")
        sub.add_argument('--no-inherit', dest='inherit', action='store_false',
                         help="DO NOT inherit previous metric value if current sample is missing")
        sub.set_defaults(inherit=True)
        # NOTE: Ingestion must NEVER change previously ingested data. Ingested data must be immutable.
        #       Override only applies to data in a set batch of ingestion.
        sub.add_argument('--override', nargs='*',
                         help="Set one or more overrides, in the format <metric identifier>=<value>. "
                              "Metric identifier depends on the metric. "
                              "For eg: importtool.py canaries --override: ads/hypeman-ex=1")

    args = parser.parse_args()
    args.func(args.inherit, getattr(args, "override", []))


if __name__ == "__main__":
    main()