#!/bin/bash

export YT_TOKEN=`cat ~/.yt/robot_token`


YT_TABLE_CLUSTER_1='freud'

# Создаем папку "for_analytics"
yt create map_node //home/partner/for_analytics --proxy=$YT_TABLE_CLUSTER_1

SUFFIX='rtb01e'
YT_TABLE_PATH="//home/partner/for_analytics/nginx_statistics_logs_${SUFFIX}"


YT_TABLE_SCHEMA='[
    {name=ip;type=string};
    {name=login;type=string};
    {name=datetime;type=string};
    {name=method;type=string};
    {name=request;type=string};
    {name=status;type=uint16};
    {name=bytes_sent;type=uint64};
    {name=referer;type=string};
    {name=user_agent;type=string};
    {name=response_time;type=double};
    {name=request_time;type=double};
    {name=port;type=uint32};
    {name=request_id;type=string};
    {name=postdata;type=string}
]'


PCODE=$( echo '
use Date::Calc qw(Decode_Month);
while(<>){
    chomp;
    ($ip, undef, $http_login, $time_str, $request, $status, $bytes_sent, $referer, $user_agent, $cookie, $response_time, $request_time, $port, $request_id, $postdata)=
      map{ s/^"//; s/"$//; $_ eq "-" ? "" : $_ }
        split/\t/;

    next unless $request =~ m/\/v2\/(statistics|dashboard)/;

    my (undef, $d,$m,$y,$t) = split(/[\[\/:]/, $time_str, 5);
    $m=Decode_Month($m); $d=~s/^\[//; $t=~s/ .*$//;
    $time_str = sprintf("%04d-%02d-%02d %s", $y,$m,$d,$t);
    $request=~s| HTTP/.*$||; $request=~s/^(\w+) //; $method=$1;
    ($cookie_login)=($cookie=~m/yandex_login=([^;]+)/);
    $response_time=~s/[, :].*$//;
    $response_time ||= '0.0';
    $request_time  ||= '0.0';
    print join("\t", $ip, $cookie_login || $http_login, $time_str, $method, $request, $status, $bytes_sent, $referer, $user_agent,
    $response_time, $request_time, $port, $request_id, $postdata )
}' | tr $'\n' ' '
);


yt create table $YT_TABLE_PATH --attr "{
    dynamic=%false;
    schema=$YT_TABLE_SCHEMA
}" --proxy $YT_TABLE_CLUSTER_1

echo "$YT_TABLE_PATH created";


files=""
for i in $(seq 0 2); do
    ext=""
    if [ "$i" -eq 0 ]; then
        sudo  cat "/var/log/nginx/partner2.yandex.access-post.log"         | perl -F'\t' -lane "$PCODE" > "${i}_${SUFFIX}.log"
    elif [ "$i" -eq 1 ]; then
        sudo  cat "/var/log/nginx/partner2.yandex.access-post.log.1"       | perl -F'\t' -lane "$PCODE" > "${i}_${SUFFIX}.log"
    else
        sudo zcat "/var/log/nginx/partner2.yandex.access-post.log.${i}.gz" | perl -F'\t' -lane "$PCODE" > "${i}_${SUFFIX}.log"
    fi
    files="${files} ${i}_${SUFFIX}.log"
done


HEAVY_YT_PROXY=$(curl -s -H "Accept: text/plain" "http://${YT_TABLE_CLUSTER_1}.yt.yandex.net/hosts" | head -n1);

cat "$files" \
  | yt write "$YT_TABLE_PATH" --format \
    '<columns=[ip; login; datetime; method; request; status; bytes_sent; referer; user_agent; response_time; request_time; port; request_id; postdata];enable_type_conversion=%true>schemaful_dsv' \
    --proxy=freud

yt get $YT_TABLE_PATH/@row_count --proxy=freud
echo "    file ${i}_${SUFFIX}.log processed";
