#!/bin/bash

############# Log STDOUT & STDERR to file as well
exec > >(tee -i log.out)
exec 2>&1
#############

set -e
set -o pipefail

RES_FOLDER_PATH=./compare_get_raw
mkdir -p $RES_FOLDER_PATH

DATE='2018-02-13'

TOKEN='...';
MSPORT='8010'; # Бета с мастером (mysql)
CHPORT='8011'; # Бета c веткой (clickhouse)

declare -A LEVELS;
#    [context_on_site_stripe]='advnet_context_on_site_stripe'
#    [context_on_site_marke]='advnet_context_on_site_market'
#    [context_on_site_mcb]='advnet_context_on_site_mcb'
#    [search_on_site_market]='advnet_search_on_site_market'
#    [search_on_site_market_api]='advnet_search_on_site_market_api'
#    [context_on_site_market_api]='advnet_context_on_site_market_api'
#    [search_on_site_mcb]='advnet_search_on_site_mcb'
LEVELS=(
    [context_on_site_rtb]='advnet_context_on_site_rtb'
    [context_on_site_direct]='advnet_context_on_site_direct'
    [context_on_site_adblock]='advnet_context_on_site_adblock'

    [search_on_site_premium]='advnet_search_on_site_premium'
    [search_on_site_direct]='advnet_search_on_site_direct'

    [mobile_app_rtb]='mobile_app_rtb'

    [video_an_site_inpage]='video_an_site_inpage'
    [video_an_site_instream]='video_an_site_instream'
);



# см https://github.yandex-team.ru/partner/yandex-du-partner-rtb-sync-db-test/blob/master/partner-db/partnerdb-shrink-db.sh#L9
declare -A PageIdFieldNames;
PageIdFieldNames=(
    [statistics_mobile_app_page]='context_page_id'
    [statistics_mobile_app_rtb]='context_page_id'
    [statistics_ssp_context_rtb_page]='page_id'
    [statistics_ssp_context_rtb_rtb]='page_id'
    [statistics_ssp_mobile_app_page]='page_id'
    [statistics_ssp_mobile_app_rtb]='page_id'
    [statistics_turkey_stripe]='page_id'
    [statistics_video_an_site_inpage]='page_id'
    [statistics_video_an_site_inpage_publisher]='page_id'
    [statistics_video_an_site_instream]='page_id'
    [statistics_video_an_site_instream_publisher]='page_id'
    [statistics_video_an_site_page]='page_id'
);


function diff_files () {
    local MYSQL_FILE_PATH=$1
    local CH_FILE_PATH=$2
    local IS_EXIT_ON_ERROR=0; #$3

    date +"    %Y/%m/%d %T [8. diff]"
    COUNT=$( diff "$MYSQL_FILE_PATH"  "$CH_FILE_PATH" | wc -l || true )
    if [[ $COUNT -ne 0 ]]; then
        echo "    Diff lines count: $COUNT"
        echo "      " $( wc -l "$MYSQL_FILE_PATH" )
        echo "      " $( wc -l "$CH_FILE_PATH"    )
    else
        echo "    Diff - OK"
    fi

    if [[ $IS_EXIT_ON_ERROR -ne 0 && $COUNT -ne 0 ]]; then
        echo "Diff FAIL";
        exit 1
    fi
}

#-----------------------------------------

echo "Date: $DATE"

PAGE_IDS=$(
echo '
  SELECT  group_concat( page_id order by page_id)
  FROM    all_pages
  WHERE   login = "rambler-p"
' | mysql_partner2 --server=dev   --   --skip-column-names
);

###### Create Clickhouse table
date +"%Y/%m/%d %T [1. create ch table statistics]"
set +e
COUNT=$(echo "show tables" | clickhouse-client -m -d partner | grep '^statistics$' | wc -l)
set -e
if [[ $COUNT -ne 1 ]]; then
    echo -e $( ./rosetta_call --login=yndx-zurom --model=clickhouse_db --method=create_sql --args='["tables", ["statistics"]]' --format=json 2>/dev/null --no_lazy | jq '.data[0]' | grep -v '#END' | sed 's|"||g' 2>&1 ) \
        | clickhouse-client -m -d partner
fi

for LEVEL in "${!LEVELS[@]}"; do
    set +x

    echo ""
    echo "################ $LEVEL";

    CURL_OUT_FILE="${LEVEL}_${DATE}"
    CH_RES_FILE_PATH="$RES_FOLDER_PATH/${CURL_OUT_FILE}_clickhouse_aggregated.out"
    MYSQL_RES_FILE_PATH="$RES_FOLDER_PATH/${CURL_OUT_FILE}_mysql_aggregated.out"


    ####### Check previously processed files
    if [[ -f "$MYSQL_RES_FILE_PATH" && -f "$CH_RES_FILE_PATH" ]]; then
        echo 'EXIST'
        diff_files  "$MYSQL_RES_FILE_PATH"  "$CH_RES_FILE_PATH"
        continue;
    fi



    MODEL=${LEVELS[$LEVEL]}
    ACCESSOR="statistics_$MODEL"
    TABLE="statistics_$LEVEL"

    PAGE_FIELD_NAME=${PageIdFieldNames[$TABLE]}
    [[ -z $PAGE_FIELD_NAME ]] && PAGE_FIELD_NAME='campaign_id'

    set -x

    ####### ClickHouse
    date +"    %Y/%m/%d %T [2. stat update clickhouse]"
    if ! [[ -f "$CH_RES_FILE_PATH" ]]; then

        ./rosetta_call --login=yndx-zurom --model="$ACCESSOR" --method=update_statistics --no_lazy \
            --args='["from", "'$DATE'",  "to", "'$DATE'",  "for_month", 0,  "storage", "clickhouse" ]'

        set -x
        SQL="
            SELECT  count(*)
            FROM    statistics
            WHERE   dt = '${DATE}'
                    AND product_id='$LEVEL'
                    AND page_id IN ( ${PAGE_IDS} )
        ";
        set =x
        echo "$SQL" | clickhouse-client -m -d partner --format=Pretty


        CURL_FILE_PATH="$RES_FOLDER_PATH/${CURL_OUT_FILE}_clickhouse_curl.out"

        date +"    %Y/%m/%d %T [3. curl get_raw]"
        URL="https://dev-partner2.yandex.ru:8411/api/statistics/get_raw.json?oauth_token=${TOKEN}&from=${DATE}&to=${DATE}&level=${MODEL}&lang=ru"
        curl -s "$URL" > $CURL_FILE_PATH

        if grep -q '"result":"error"' $CURL_FILE_PATH; then
            echo "    curl FAILED - $URL"
            cat $CURL_FILE_PATH
            exit 1;
        fi

        date +"    %Y/%m/%d %T [6. perl add zero colunmns to ch file]"
        set +e
        cat  $CURL_FILE_PATH | grep -v -e '#dt' -e '#END' | perl -lane 'print join( "\t", map {$_ // 0} @F[0 .. 34]); ' | sort > "$CH_RES_FILE_PATH"
        set -e
    fi

    ####### MySQL
    date +"    %Y/%m/%d %T [4. stat update mysql]"
    if ! [[ -f "$MYSQL_RES_FILE_PATH" ]]; then

        ./rosetta_call --login=yndx-zurom --model="$ACCESSOR" --method=update_statistics --no_lazy \
                --args='["from", "'$DATE'",  "to", "'$DATE'",  "for_month", 0,  "storage", "mysql" ]'  \
             || { echo 'rosetta_call FAILED' ; exit 1; }

        set -x
        SQL="
            SELECT  count(*)
            FROM    $TABLE
            WHERE   dt = '${DATE}'
                    AND ${PAGE_FIELD_NAME}
                    IN  ( ${PAGE_IDS} )
        ";
        set =x
        echo "$SQL" | mysql_partner2 --server=dev  --  --table

        CURL_FILE_PATH="$RES_FOLDER_PATH/${CURL_OUT_FILE}_mysql_curl.out"
        PREPARED_FILE_PATH="$RES_FOLDER_PATH/${CURL_OUT_FILE}_mysql_prepared.out"

        date +"    %Y/%m/%d %T [5. curl get_raw]"
        URL="https://dev-partner2.yandex.ru:8410/api/statistics/get_raw.json?oauth_token=${TOKEN}&from=${DATE}&to=${DATE}&level=${MODEL}&lang=ru"
        curl -s "$URL" > "$CURL_FILE_PATH"

        if grep -q '"result":"error"' "$CURL_FILE_PATH"; then
            echo "$URL"
            cat "$CURL_FILE_PATH"
            exit 1;
        fi

        set +x
        MYSQL_DESC=$( echo "desc $TABLE" | mysql_partner2 --server=dev  --  --skip-column-names  | grep PRI | grep -v -e 'dsp_id' -e 'currency_id' )

        CH_PRI_COUNT=$(  echo "$MYSQL_DESC" | wc -l  )
        CH_PRI_FIELDS=$( echo "$MYSQL_DESC" | awk '{print $1}' | tr $'\n' ' ' | sed  's/\( \|$\)/ UInt64, /g; s|dt UInt64|dt Date|'  )
        CH_GROUP_BY=$(   echo "$MYSQL_DESC" | awk '{print $1}' | tr $'\n' ' ' | sed  's/ /, /g; s|, $||'      )

        METRICS_COUNT=$(expr 35 - $CH_PRI_COUNT)
        METRICS_FIELDS=$( for i in $(seq 1 $METRICS_COUNT); do echo -n "F${i} UInt64, "; done )
        METRICS_FIELDS=$(echo "$METRICS_FIELDS" | sed 's|, $||')
        METRICS_SUM_FIELDS=$( for i in $(seq 1 $METRICS_COUNT); do echo -n "sum(F${i}), "; done )
        METRICS_SUM_FIELDS=$( echo "$METRICS_SUM_FIELDS" | sed 's|, $||' )
        set -x


        date +"    %Y/%m/%d %T [6. perl add zero colunmns to mysql file]"
        set +e
        cat "$CURL_FILE_PATH" | grep -v -e '#dt' -e '#END' | perl -lane 'print join( "\t", map {$_ // 0} @F[0 .. 34]); ' \
            > $PREPARED_FILE_PATH
        set -e

        date +"    %Y/%m/%d %T [7. clickhouse-local group stat]"
        cat $PREPARED_FILE_PATH | clickhouse-local --input-format=TabSeparated -S "$CH_PRI_FIELDS $METRICS_FIELDS" \
            -q "select $CH_GROUP_BY, $METRICS_SUM_FIELDS from table group by $CH_GROUP_BY" 2> /dev/null | sort \
            > "$MYSQL_RES_FILE_PATH"
    fi

    ####### DIFF
    diff_files  "$MYSQL_RES_FILE_PATH"  "$CH_RES_FILE_PATH"  1

done
