#!/bin/bash -e

#~
#~ Script for deleting data from Stockpile of those sensors which
#~ where already deleted from Solomon or MegaGraphite metabases
#~
#~ USAGE
#~    ./stockpile-gc <command> <cluster> [<command_opts>]
#~
#~ CLUSTERS
#~    SAS     first production cluster
#~    VLA     second production cluster
#~
#~ COMMANDS
#~    find_gc      find garbage sensor ids in Stockpile and write them into
#~                 given file (opts: output file)
#~
#~    rm_sensors   read sensor ids from given file and delete them from
#~                 Stockpile (opts: input file)
#~
#~    rm_gc        delete garbage sensor ids from Stockpile (combination of
#~                 find_gc and rm_sensors)
#~


SCRIPT_DIR="$(dirname $(readlink -f $0))"
ARCADIA_DIR="${SCRIPT_DIR}/../../.."
SOLOMON_DIR="${ARCADIA_DIR}/solomon/j"
DATA_DIR="$(pwd)/stockpile-gc-data-$$"


log() {
    echo "$@" >&2
}

usage() {
    sed -nr 's/^#~ ?//p' $0 >&2
}

build() {
    log "[-] building ..."
    ${ARCADIA_DIR}/ya make ${SOLOMON_DIR}/solomon-tool \
            > ${DATA_DIR}/build.log 2>&1
    log "[-] building [OK]"
}

run_java() {
    /usr/local/jdk-11/bin/java \
        -Xms16g -Xmx16g \
        -server -showversion \
        -classpath "${SOLOMON_DIR}/solomon-tool/solomon-tool/*" \
        -Djava.net.preferIPv6Addresses=true \
        "$@"
}

dump_sensors_from() {
    source="$1"
    cluster="$2"

    log "[-] dumping sensors from ${source} at ${cluster} ..."
    dir="${DATA_DIR}/${source}-${cluster}"
    mkdir -p ${dir}
    run_java ru.yandex.solomon.tool.cleanup.DumpSensorIdsFrom${source^} \
            ${cluster} \
            ${dir} \
            > ${DATA_DIR}/${source}-${cluster}.log 2>&1
    log "[-] dumping sensors from ${source} at ${cluster} [OK]"
}

find_garbage() {
    cluster="$1"
    gc_file="$2"

    log "[-] finding garbage ..."
    mkdir -p ${DATA_DIR}/gc
    mkdir -p ${DATA_DIR}/meta
    rm -f ${DATA_DIR}/gc.ids

    log "  * mergin metabase ids ..."
    seq 1 4096 | parallel -j32 \
        "sort --unique \
            --output ${DATA_DIR}/meta/{}.ids \
            ${DATA_DIR}/metabase-${cluster}/{}.ids \
            ${DATA_DIR}/graphite-gr/{}.ids \
            ${DATA_DIR}/graphite-bs/{}.ids \
            ${DATA_DIR}/graphite-ps/{}.ids"

    log "  * sorting stockpile ids ..."
    seq 1 4096 | parallel -j32 \
        "sort --unique \
            --output ${DATA_DIR}/stockpile-${cluster}/{}.ids \
            ${DATA_DIR}/stockpile-${cluster}/{}.ids"
    log "  * sorting stockpile ids [OK]"

    log "  * sets substraction ..."
    seq 1 4096 | parallel -j32 \
        "comm -23 \
            ${DATA_DIR}/stockpile-${cluster}/{}.ids \
            ${DATA_DIR}/meta/{}.ids \
            > ${DATA_DIR}/gc/{}.ids"

    log "  * merging into one file ..."
    for shard_id in $(seq 1 4096); do
        awk "{print \"${shard_id}/\" \$0}" ${DATA_DIR}/gc/${shard_id}.ids >> ${DATA_DIR}/gc.ids
    done

    log "[-] finding garbage [OK]"
}

find_gc() {
    cluster="$1"
    gc_file="$2"

    dump_sensors_from stockpile ${cluster} &
    dump_sensors_from metabase ${cluster} &
    dump_sensors_from graphite bs &
    dump_sensors_from graphite gr &
    dump_sensors_from graphite ps &

    wait

    find_garbage ${cluster} ${gc_file}
}

rm_sensors() {
    cluster="$1"
    gc_file="$2"

    log "[-] deleting garbage from stockpile ${cluster} ..."
    run_java ru.yandex.solomon.tool.cleanup.DeleteSensorsFromStockpile \
            ${cluster} \
            ${gc_file} --force \
            > ${DATA_DIR}/stockpile-delete.log 2>&1
    log "[-] deleting garbage from stockpile ${sp_cluster} [OK]"
}

main() {
    if [ $# -lt 2 ]; then
        log "at least two aguments expected"
        usage
        exit 1
    fi

    command="$1"

    case "$2" in
        SAS)
            cluster="PROD_STORAGE_SAS"
            ;;
        VLA)
            cluster="PROD_STORAGE_VLA"
            ;;
        *)
            log "invalid stockpile cluster ${cluster}"
            log "known stockpile clusters { SAS | VLA }"
            exit 1
    esac

    # skip two first args
    shift
    shift

    mkdir --parents ${DATA_DIR}
    log "using DATA_DIR=${DATA_DIR}"

    build

    case "${command}" in
        find_gc)
            if [ $# -eq 0 ]; then
                gc_file="${DATA_DIR}/gc.ids"
            else
                gc_file="$1"
            fi
            find_gc ${cluster} ${gc_file}
            ;;

        rm_sensors)
            if [ $# -eq 0 ]; then
                log "expected one more arg with filename"
                usage
                exit 1
            fi
            gc_file="$1"
            rm_sensors ${cluster} ${gc_file}
            ;;

        rm_gc)
            gc_file="${DATA_DIR}/gc.ids"
            find_gc ${cluster} ${gc_file}
            rm_sensors ${cluster} ${gc_file}
            ;;

        *)
            log "invaid command ${command}"
            usage
            exit 1
    esac
}

main $@

