# -*- coding: utf-8 -*-
import SETTINGS

from yql.api.v1.client import YqlClient

client = YqlClient(db='hahn', token=SETTINGS.YQL_TOKEN)

# noinspection PyTypeChecker
request1 = client.query(ur"""
$get_day_table_name = @@
import datetime

def get_error_table_name(delta):
    return 'errors-%s' % (datetime.date.today() - datetime.timedelta(days=delta)).strftime("%Y-%m-%d")
@@;

$table_name_days_before = Python::get_error_table_name("(Int8)->String", $get_day_table_name);

$parse = Python::parse("(String?)->String?", @@
def parse(text):
    # -*- coding: windows-1251 -*-
    import json
    import re
    import ast
    
    keys = ["reason", "id", "method", "code", "type", "result", "error"]
    text_dict = {}
    if "{" in text:
        if "}" in text:
            text = text[:text.find('}') + 1]
        try:
            text=re.sub("request_id[\\\"\ \:]*[a-zA-Z0-9]*","",text)
            text=re.sub("[a-z0-9]{15,}","",text)
            text = re.sub(",\"\"", "", text)
            try:
                text = ast.literal_eval(text)
                for key in text.keys():
                    if key in keys:
                        text_dict[key] = str(text[key])
            except:
                return text
        except SyntaxError:
            return text
    else:
        return text
    text_dict=str(text_dict)
    return text_dict
@@);

-- Cчитаем распределение логов с группировкой по дню-хосту
INSERT INTO [home/mailfront/qa/logAlerts/errors_20_days] WITH TRUNCATE
    SELECT error_type, name, text, canonized_vhost, day, COUNT(*) as c
    FROM RANGE([//home/mailfront/qa/errors-test], $table_name_days_before(21), $table_name_days_before(1)) as T
    WHERE error_type="HandlerError"
    GROUP BY error_type, $parse(text) as text, T.request_dict{"name"} as name, canonized_vhost, DateTime::ToDate(DateTime::TimestampFromString(iso_eventtime)) as day
    HAVING COUNT(*)>3
    ORDER BY c DESC;
""")

# noinspection PyTypeChecker
request2 = client.query(ur"""
$get_day_table_name = @@
import datetime

def get_error_table_name(delta):
    return 'errors-%s' % (datetime.date.today() - datetime.timedelta(days=delta)).strftime("%Y-%m-%d")
@@;

$table_name_days_before = Python::get_error_table_name("(Int8)->String", $get_day_table_name);

-- Выбираем день-хост, в котором ночью набежало нормально ошибок (больше 300)
$meanful_data = (
Select asTuple(canonized_vhost, day) as meanful_errors
FROM RANGE([//home/mailfront/qa/errors-test], $table_name_days_before(21), $table_name_days_before(1)) as T
WHERE DateTime::GetHour(DateTime::TimestampFromString(iso_eventtime)) < 8 AND error_type="HandlerError"
GROUP BY canonized_vhost, DateTime::ToDate(DateTime::TimestampFromString(iso_eventtime)) as day
HAVING COUNT(*)>300
);

-- Делаем список из значений день-хост
$meanful_list = (
SELECT LIST(meanful_errors) as host_data_list FROM $meanful_data
);

-- Делаем общую таблицу, в которой будет распределение логов только за день-хост с нормальным количеством ошибок
INSERT INTO [home/mailfront/qa/logAlerts/unic_errors_day-host] WITH TRUNCATE
    SELECT * 
    FROM [home/mailfront/qa/logAlerts/errors_20_days]
    WHERE ListHas($meanful_list, asTuple(canonized_vhost, day));
""")

# noinspection PyTypeChecker,SqlDialectInspection
request3 = client.query(ur"""
INSERT INTO [home/mailfront/qa/logAlerts/ir_table] WITH TRUNCATE
    SELECT name, text, MEDIAN(c) as m, (PERCENTILE(c, 0.75) - PERCENTILE(c, 0.25)) as IR, LIST(c) as c_list
    FROM [home/mailfront/qa/logAlerts/unic_errors_day-host]
    GROUP BY name, text
    HAVING COUNT(c)>4
""")


request1.run()
print(request1)
request2.run()
print(request2)
request3.run()
print(request3)

