#!/bin/bash

set -e
echo "Start infected-files.sh at " `date`
cd /u0/archive_logs/uploader

rsyncTarget=rsync://vdb.yandex.ru/incoming/
echo "Rsync target: $rsyncTarget"

yesterday=`date -d "yesterday" '+%Y%m%d'`
log="yandex_disk_uploader-events.log.$yesterday.gz"
echo "Search for infected files in log: $log"

resultFile=/var/tmp/yandex/disk/logs-cron/$yesterday-infected
tmpFile=/var/tmp/yandex/disk/logs-cron/$yesterday-infected-tmp
> $tmpFile # ensure file is clear
echo "Write all infected files to $resultFile. Tmp file is $tmpFile"

echo "Search infected files in uploader*/$log"
while read -r line; do
    echo "Process $line"
    logFile=`echo $line | awk '{ split($1, str, ":"); print str[1] }'`
    jobId=`echo $line | awk '{ print $3 }'`
    payloadInfo=`zgrep -E "$jobId.*stage=(payloadInfo|patchedPayloadInfo)" $logFile` || true
    if [ -n "$payloadInfo" ]; then
        contentType=`echo $payloadInfo | awk '{ split($7, str, "="); print substr(str[2], 0, length(str[2])-1) }'`
        hash=`echo $payloadInfo | awk '{ split($9, str, "="); print substr(str[2], 0, length(str[2])-1) }'`
        echo "Found $contentType $hash"
        echo -e "$hash\t$contentType" >> $tmpFile
    fi
done < <(zgrep "antivirus.*infected" uploader*/$log)

sort $tmpFile | uniq > $resultFile

echo "Copy result to target server"
rsync $resultFile $rsyncTarget

echo "Remove tmp file"
rm $tmpFile
