use hahn;

$script = @@
import re

def substring_between(main_str, str_start, str_end):
    exp = str(str_start) + '(.*?)' + str(str_end)
    r = re.compile(exp)
    m = r.search(main_str)
    if m:
        return m.group(1)
    else:
        return ''
@@;

$substring_between = Python::substring_between(
    "(String?, String, String)->String",
    $script
);

--count of queries with  DEADLINE_EXCEEDED
SELECT timestamp, $substring_between(message, 'tables=', ',kikimrQueries') AS table_name,
                  $substring_between(message, 'kikimrQueries=', '; result count =') AS yql_query
FROM hahn.[home/logfeller/logs/qloud-runtime-log/1d/2018-02-14]
WHERE qloud_project = 'qloud' AND qloud_application = 'logs' AND qloud_environment = 'kikimr'
      AND level = 10000
      AND message LIKE 'FAIL_YQL%'
      AND message LIKE '%DEADLINE_EXCEEDED%'
ORDER BY table_name, timestamp;

--yql queries with DEADLINE_EXCEEDED
SELECT timestamp, $substring_between(message, 'tables=', ',kikimrQueries') AS table_name,
                  $substring_between(message, 'kikimrQueries=', '; result count =') AS yql_query
FROM hahn.[home/logfeller/logs/qloud-runtime-log/1d/2018-02-14]
WHERE qloud_project = 'qloud' AND qloud_application = 'logs' AND qloud_environment = 'kikimr'
      AND level = 10000
      AND message LIKE 'FAIL_YQL%'
      AND message LIKE '%DEADLINE_EXCEEDED%'
ORDER BY table_name, timestamp;

--possible error messages substrings:
-- '%DEADLINE_EXCEEDED%'
-- '%datashards are overloaded%'
-- '%Type annotation%'
-- '%Table not found%'
-- '%Unexpected token absence%'
-- '%Unable to parse lucene query%';

--error common statistics
SELECT table_name, count(*) AS summary, sum(success) AS success, sum(fail) AS fail,
                   sum(timeout) AS timeout, sum(overloaded) AS overloaded, sum(type_annotation) AS type_annotation,
                   sum(table_not_found) AS table_not_found, sum(unexpected_token) AS unexpected_token,
                   sum(lucene_parse) AS lucene_parse FROM (
  SELECT $substring_between(message, 'tables=', ',kikimrQueries') AS table_name,
         CASE WHEN String::StartsWith(message, 'OK_YQL') THEN 1 ELSE 0 END AS success,
         CASE WHEN String::StartsWith(message, 'OK_YQL') THEN 0 ELSE 1 END AS fail,
         CASE WHEN String::Contains(message, 'DEADLINE_EXCEEDED') THEN 1 ELSE 0 END AS timeout,
         CASE WHEN String::Contains(message, 'datashards are overloaded') THEN 1 ELSE 0 END AS overloaded,
         CASE WHEN String::Contains(message, 'Type annotation') THEN 1 ELSE 0 END AS type_annotation,
         CASE WHEN String::Contains(message, 'Table not found') THEN 1 ELSE 0 END AS table_not_found,
         CASE WHEN String::Contains(message, 'Unexpected token') THEN 1 ELSE 0 END AS unexpected_token,
         CASE WHEN String::Contains(message, 'Unable to parse lucene query') THEN 1 ELSE 0 END AS lucene_parse
  FROM hahn.[home/logfeller/logs/qloud-runtime-log/1d/2018-02-14]
  WHERE qloud_project = 'qloud' AND qloud_application = 'logs' AND qloud_environment = 'kikimr'
        AND level = 10000
        AND (message LIKE 'FAIL_YQL%' OR message LIKE 'OK_YQL%')
        AND loggerName LIKE 'ru.yandex.qloud.kikimr.services.LogProxyService'
) GROUP BY table_name ORDER BY fail DESC;

--kikimr timeout and overloaded statistics

SELECT table_name, summary,
(timeout + overloaded) AS fail, ((timeout + overloaded) * 100 / summary) AS fail_percent,
timeout, (timeout * 100 / summary) AS timeout_percent,
overloaded, (overloaded * 100 / summary) AS overloaded_percent
FROM(
  SELECT table_name, count(*) AS summary, sum(timeout) AS timeout, sum(overloaded) AS overloaded
  FROM (
    SELECT $substring_between(message, 'tables=', ',kikimrQueries') AS table_name,
    CASE WHEN String::Contains(message, 'DEADLINE_EXCEEDED') THEN 1 ELSE 0 END AS timeout,
    CASE WHEN String::Contains(message, 'datashards are overloaded') THEN 1 ELSE 0 END AS overloaded
    FROM hahn.[home/logfeller/logs/qloud-runtime-log/1d/2017-10-31]
    WHERE qloud_project = 'qloud' AND qloud_application = 'logs' AND qloud_environment = 'kikimr'
    AND level = 10000
    AND (message LIKE 'FAIL_YQL%' OR message LIKE 'OK_YQL%')
    AND loggerName LIKE 'ru.yandex.qloud.kikimr.services.LogProxyService'
  ) GROUP BY table_name
) WHERE (timeout + overloaded) > 0 ORDER BY fail_percent DESC, summary DESC;