#!/bin/bash

PROGRAM=host2vec-model

BIN_DIR=$(dirname $0)
LOCAL_CONFIG=$BIN_DIR/$PROGRAM.$ENV_TYPE.properties
GLOBAL_CONFIG=$BIN_DIR/$PROGRAM.$ENV_TYPE.properties
RUN_DIR=$BIN_DIR
LOG_DIR=/place/db/www/logs

LoadConfig() {
    for line in `cat $1 | awk -F'=' -v OFS='=' '{ gsub(/\./, "_", $1); print}'`; do
        eval "$line"
    done;
}

LoadConfig $GLOBAL_CONFIG

export MR_RUNTIME=YT
#export YT_TOKEN=$yt_token #will be retrieved from Cloud secret vault
export YT_SPEC=`printf '{ "pool": "%s", "job_io": {"table_writer": {"max_row_weight": 134217728}}, "map_job_io": {"table_writer": {"max_row_weight": 134217728}}, "reduce_job_io": {"table_writer": {"max_row_weight": 134217728}}, "sort_job_io": {"table_writer": {"max_row_weight": 134217728}}, "partition_job_io": {"table_writer": {"max_row_weight": 134217728}}, "merge_job_io": {"table_writer": {"max_row_weight": 134217728}}}' $yt_pool`

cd $RUN_DIR

set -e
set -x
set -o pipefail

RUN_CMD="$BIN_DIR/$PROGRAM -G $GLOBAL_CONFIG -C $LOCAL_CONFIG -L $LOG_DIR/current-webmaster-host2vec-model-$IPORT"

do_train_spy_log() {
    $RUN_CMD host2vec_build_train_spy_log $*
    pigz -p 4 -c -d *.gz > dataset
    $BIN_DIR/train -train dataset -min-count 100 -debug 1 -binary 2 -window 10 -size 200 -threads 16 -output-vectors $file_host2vec_model_vectors -output $file_host2vec_model_words
}

do_train_similargroup() {
    $RUN_CMD host2vec_build_train_similargroup $*
    pigz -p 4 -c -d *.gz > dataset
    $BIN_DIR/train -train dataset -min-count 20 -debug 1 -binary 2 -window 10 -size 200 -threads 16 -output-vectors $file_host2vec_model_vectors -output $file_host2vec_model_words
}

build_model_spy_log() {
    do_train_spy_log $* 
    $RUN_CMD host2vec_upload_model_spy_log $*
}

build_model_similargroup() {
    do_train_similargroup $* 
    $RUN_CMD host2vec_upload_model_similargroup $*
}

build_model_similargroup $* #>> $LOG_DIR/current-webmaster-host2vec-model-sh-$IPORT 2>> $LOG_DIR/current-webmaster-host2vec-model-sh-err-$IPORT
build_model_spy_log $* #>> $LOG_DIR/current-webmaster-host2vec-model-sh-$IPORT 2>> $LOG_DIR/current-webmaster-host2vec-model-sh-err-$IPORT
