# -*- coding: utf-8 -*-
import SETTINGS

from yql.api.v1.client import YqlClient

client = YqlClient(db='hahn', token=SETTINGS.YQL_TOKEN)

# noinspection PyTypeChecker
request1 = client.query(ur"""
PRAGMA yt.InferSchema = '1';

$get_day_table_name = @@
import datetime

def get_error_table_name(delta):
    return '%s' % (datetime.date.today() - datetime.timedelta(days=delta)).strftime("%Y-%m-%d")
@@;

$table_name_days_before = Python::get_error_table_name("(Int8)->String", $get_day_table_name);

$parse_message = Python::parse("(String?)->String?", @@
def parse(log_string):
    # -*- coding: windows-1251 -*-
    import re
    
    if log_string:
        search_id = re.search("request_id\":\"(.*?)\"}", log_string)
        if search_id:
            log_string = re.sub(search_id.group(1), "", log_string)
        search_ckey = re.search("ckey \((.*?)\)", log_string)
        if search_ckey:
            log_string = re.sub(re.escape(search_ckey.group(1)), "", log_string)
        search_uid = re.search("uid \((.*?)\)", log_string)
        if search_uid:
            log_string = re.sub(search_uid.group(1), "", log_string)
    
    return log_string
@@);

-- Cчитаем распределение логов с группировкой по дню-хосту
INSERT INTO [home/mailfront/qa/logAlerts/duffmanError/errors_20_days] WITH TRUNCATE
    SELECT *
    FROM (
        SELECT 
            COUNT(*) as c,
            day, 
            host,
            reason,
            name,
            message
        FROM RANGE([logs/mail-duffman-http-log/1d], $table_name_days_before(21), $table_name_days_before(1))
        WHERE reason NOT LIKE "%SUCCESS%" AND
            reason NOT LIKE "%FINISHED%" AND
            reason NOT LIKE "%RESOLVED%" AND
            reason NOT LIKE "" AND
            host like "%ub%.web-api%" AND
            DateTime::GetHour(DateTime::TimestampFromString(WeakField(iso_eventtime, "String"))) < 8
        GROUP BY 
            WeakField(reason, "String") as reason,  
            WeakField(qloud_path, "String") as host,  
            WeakField(name, "String") as name, 
            $parse_message(WeakField(message, "String")) as message, 
            DateTime::ToDate(DateTime::TimestampFromString(WeakField(iso_eventtime, "String"))) as day
        UNION ALL
        SELECT 
            COUNT(*) as c, 
            day,
            host,
            reason, 
            name, 
            message
        FROM RANGE([logs/mail-duffman-access-log/1d], $table_name_days_before(21), $table_name_days_before(1)) as T
        WHERE 
            reason NOT LIKE "%SUCCESS%" AND
            reason NOT LIKE "%FINISHED%" AND
            reason NOT LIKE "%RESOLVED%" AND
            reason NOT LIKE "" AND
            host like "%ub%.web-api%" AND
            DateTime::GetHour(DateTime::TimestampFromString(WeakField(timestamp, 'String'))) < 8
        GROUP BY 
            WeakField(reason, 'String') as reason, 
            WeakField(qloud_path, "String") as host, 
            WeakField(name, 'String') as name, 
            $parse_message(WeakField(message, 'String')) as message, 
            DateTime::ToDate(DateTime::TimestampFromString(WeakField(timestamp, 'String'))) as day
    )
    ORDER BY c DESC;
""")

# noinspection PyTypeChecker
request2 = client.query(ur"""
PRAGMA yt.InferSchema = '1';

$get_day_table_name = @@
import datetime

def get_error_table_name(delta):
    return '%s' % (datetime.date.today() - datetime.timedelta(days=delta)).strftime("%Y-%m-%d")
@@;

$table_name_days_before = Python::get_error_table_name("(Int8)->String", $get_day_table_name);

-- Выбираем день-хост, в котором ночью набежало нормально ошибок (больше 300)
$meanful_data = (
Select asTuple(host, day) as meanful_errors
FROM RANGE([logs/mail-duffman-access-log/1d], $table_name_days_before(21), $table_name_days_before(1)) as T
WHERE 
    WeakField(reason, "String") NOT LIKE "%SUCCESS%" AND
    WeakField(reason, "String") NOT LIKE "%FINISHED%" AND
    WeakField(reason, "String") NOT LIKE "%RESOLVED%" AND
    WeakField(reason, "String") NOT LIKE "" AND
    WeakField(host, "String") like "%ub%.web-api%" AND
    DateTime::GetHour(DateTime::TimestampFromString(WeakField(timestamp, "String"))) < 8
GROUP BY WeakField(qloud_path, "String") as host, DateTime::ToDate(DateTime::TimestampFromString(WeakField(timestamp, "String"))) as day
HAVING COUNT(*)>300
);

-- Делаем список из зачений день-хост
$meanful_list = (
SELECT LIST(meanful_errors) as host_data_list FROM $meanful_data
);

-- Делаем общую таблицу, в которой будет распределение логов только за день-хост с нормальным количеством ошибок
INSERT INTO [home/mailfront/qa/logAlerts/duffmanError/unic_errors_day-host] WITH TRUNCATE
    SELECT * 
    FROM [home/mailfront/qa/logAlerts/duffmanError/errors_20_days]
    WHERE ListHas($meanful_list, asTuple(host, day));
""")

# noinspection PyTypeChecker,SqlDialectInspection
request3 = client.query(ur"""
-- Делаем таблицу ошибка - список с количеством её появления в разные дни на разных хостах и посчитанными перцентилями
INSERT INTO [home/mailfront/qa/logAlerts/duffmanError/ir_table] WITH TRUNCATE
    SELECT name, message, reason, PERCENTILE(c, 0.25) as p25, PERCENTILE(c, 0.75) as p75, MEDIAN(c) as m, (PERCENTILE(c, 0.75) - PERCENTILE(c, 0.25)) as IR, LIST(c) as c_list
    FROM [home/mailfront/qa/logAlerts/duffmanError/unic_errors_day-host]
    GROUP BY name, message, reason
    HAVING COUNT(c)>5
""")


request1.run()
print(request1)
request2.run()
print(request2)
request3.run()
print(request3)

