#!/bin/bash
set -e
sleep $(( $RANDOM % 10 ))
export YT_SMART_FORMAT=1
export YT_USE_YAMR_DEFAULTS=1
export YT_SPEC='{ "job_io": {"table_writer": {"max_row_weight": 134217728}}, "map_job_io": {"table_writer": {"max_row_weight": 134217728}}, "reduce_job_io": {"table_writer": {"max_row_weight": 134217728}}, "sort_job_io": {"table_writer": {"max_row_weight": 134217728}}, "partition_job_io": {"table_writer": {"max_row_weight": 134217728}}, "merge_job_io": {"table_writer": {"max_row_weight": 134217728}}}'
export LC_ALL=ru_RU.UTF-8
export PATH=$PATH:/place/berkanavt/mapreduce/bin
DATE="$1"
DIR="$2"
TYPE="$3"
if [ "$DATE" == "" ]; then
   DATE=$(date +%Y%m%d --date="1 day ago") 
fi

if [ "$DIR" == "" ]; then
    DIR=$(dirname $0)
fi

if [ "$DIR" == "." ]; then
    DIR=$(dirname $0)
fi

if [ "$TYPE" == "" ]; then
    TYPE="unireports"
fi

ID=$(cat $DIR/id)
DIR=$DIR/$TYPE

#echo $DATE

# Archive old data, TRENCHER-24
if [ "$DATE" != "$1" ]
then
    cd $DIR
    old=$(date +%Y%m%d --date="90 days ago")
    if [ -d $old ]; then
        if [ `find $old -maxdepth 1 -type l  | wc -l` -gt 1 ]; then
            mv -v $old ../archive-links
        else
            mv -v $old ../archive
        fi
    fi
    cd -
fi

mkdir -p $DIR/$DATE
cd $DIR/$DATE
pwd

# MapReduce / unireports
echo $(date +%T): ../../parse.py $DATE $ID $TYPE ...
flock /var/lock/latency-$ID-$DATE -c "time ../../parse.py $DATE $ID $TYPE" >> parse.out 2>> parse.err

# R / KPI
if [ "$DATE" != "$1" ]
then
    # do not wait for spylog & technical reports, prepare KPI first
    # TODO: do we need global lock here? Replace with /var/lock/latency-$DATE
    (
        flock 9
        ../../svn.sh up ../../comments
        ../../recalc.sh $DATE kpi_simple,kpi_simple_50 web-russia-quality@yandex-team.ru
    ) 9>/var/lock/latency 
fi

# MapReduce / Spylog
#if [ "$TYPE" != "$3" ]; then
#    echo $(date +%T): ../../parse.py $DATE $ID unireports-spylog ...
#    flock /var/lock/latency-$ID-$DATE-spy -c "time ../../parse.py $DATE $ID unireports-spylog" >> parse-spy.out 2>> parse-spy.err
#fi

# SpeedIndex (legacy)
#echo $(date +%T): Reading speedindex ...
#export PATH=$PATH:/place/berkanavt/mapreduce/bin
#export MR_USER=trencher
#export DEF_MR_SERVER=sakura.search.yandex.net
#mapreduce-dev-sakura --read trencher_speedindex/$DATE | awk -F '\t' 'NF==4' > speedindex.$DATE.txt
#if [ ! -s speedindex.$DATE.txt ]; then
#    rm -f speedindex.$DATE.txt
#fi

# R / Tech charts
if [ "$DATE" != "$1" ]
then
    (
        # TODO: do we need global lock here? Replace with /var/lock/latency-$DATE
        flock 9
        ../../recalc.sh $DATE kpi,uaver,control,timeline,timelineperc,distr,detailed,https
        # trying to cut memory usage by calculating tech charts separately, TRENCHER-32
        ../../recalc.sh $DATE tech
    ) 9>/var/lock/latency 
    echo "------------------------------------------"
    echo Virtual memory usage by R - top 5 samples
    echo Date Time State KBytes
    atop -PPRM -r /var/log/atop/atop.log | grep $(date +%Y/%m/%d) | grep '(R)' | cut -f 4,5,9,11 -d ' ' | sort -rn -k 4 | head -5
    echo "------------------------------------------"
fi

cd ../..
echo $(date +%T): ./data_posting.sh ...
./data_posting.sh $DATE >> $DIR/$DATE/upload.out 2>> $DIR/$DATE/upload.err
echo $(date +%T): Done: $DATE

echo $(date +%T): ./backup.py ...
./backup.py -d $DIR -s $DATE -e $DATE >> $DIR/$DATE/backup.out 2>> $DIR/$DATE/backup.err
echo $(date +%T): Done: $DATE
