from argparse import ArgumentParser
from collections import namedtuple
from mail.unistat.cpp.cython.meters import (
    AccessLogCount,
    AccessLogCountByFirstStatusDigit,
    AccessLogCountByPathAndFirstStatusDigit,
    AccessLogRequestTimeHist,
    AccessLogRequestTimeHistByPath,
    HttpClientHttpRequestTotalTimeHist,
    HttpClientHttpRequestCountByStatus,
    PaLogRequestTimeHist,
    SupervisorLogRestartMeters,
)
from mail.unistat.cpp.cython.count_by_condition import (
    Counter,
    Condition,
    Signal,
    Filter,
    update_signal_names,
)
from mail.message_body.unistat.cpp.run import (
    run as run_unistat,
)
import os
import yaml
import logging
import mail.unistat.cpp.cython.logs as logs
import re


logging.basicConfig(level=logging.WARNING, format='[%(asctime)s] [%(levelname)s]: %(message)s')
log = logging.getLogger(__name__)


HTTP_HIST_BUCKETS = (0, 20, 100, 300, 1000)


SANITIEZER_STRATEGY = [
    Signal(cond=Condition.Contains('fetched from real sanitizer'),                         name='mbody_sanitizer_strategy_real_dmmm'),
    Signal(cond=Condition.Contains('real sanitizer failed, falling back to tykva'),        name='mbody_sanitizer_strategy_tykva_as_fallback_dmmm'),
    Signal(cond=Condition.Contains('tykva only mode on'),                                  name='mbody_sanitizer_strategy_tykva_only_dmmm'),
    Signal(cond=Condition.Contains('real sanitizer failed, no tykva fallback enabled'),    name='mbody_sanitizer_strategy_real_failed_no_tykva_dmmm'),
]


RETRY_REASONS = [
    Signal(cond=Condition.Contains('Connection refused'),        name='connection_refused'),
    Signal(cond=Condition.Contains('Operation canceled'),        name='operation_canceled'),
    Signal(cond=Condition.Contains('content read timeout'),      name='content_read_timeout'),
    Signal(cond=Condition.Contains('headers read timeout'),      name='headers_read_timeout'),
    Signal(cond=Condition.Contains('status read line timeout'),  name='status_read_timeout'),
    Signal(cond=Condition.Contains('request_timeout'),           name='status_read_timeout'),
    Signal(cond=Condition.AnyStr(),                              name='unknown'),
]

FALLBACK_REASONS = [
    Signal(cond=Condition.Contains('Mulcagate service responded with 404'),  name='mulcagate_responded_404'),
    Signal(cond=Condition.Contains('mulcagate internal error'),              name='mulcagate_internal_error'),
] + RETRY_REASONS


UNKNOWN_MID_HID = [
    Signal(cond=Condition.Contains('unknown mid'), name='mbody_error_unknown_mid_dmmm'),
    Signal(cond=Condition.Contains('unknown hid'), name='mbody_error_unknown_hid_dmmm'),
]


STORAGE_ERRORS = [
    Signal(cond=Condition.Contains('can\'t get meta'),                           name='failed_get_meta'),
    Signal(cond=Condition.Contains('can\'t parse meta'),                         name='failed_parse_meta'),
    Signal(cond=Condition.Contains('Can\'t create message storage, bad stid'),   name='invalid_stid'),
    Signal(cond=Condition.Contains('getHeader error: corrupted data'),           name='invalid_header'),
    Signal(cond=Condition.Contains('range message storage getByRange'),          name='failed_range'),
]


ERROR_LEVEL_FILTER = Filter(key='level', cond=Condition.Equals('error'))


PHISHING_SIGNAL = [
    Signal(cond=Condition.AnyStr(), name='phishing'),
]


def where_name_filter(where):
    return Filter(key='where_name', cond=Condition.Equals(where))


def make_service_log_meters():
    meters = []
    suffix = 'dmmm'

    meters.append(Counter('message', SANITIEZER_STRATEGY, [where_name_filter('sanitizer_strategy')]))

    fact_extractor_signals = update_signal_names(RETRY_REASONS, 'mbody_factextractor_error_{}', suffix)
    meters.append(Counter('exception.what', fact_extractor_signals, [ERROR_LEVEL_FILTER, where_name_filter('fact_extractor')]))

    storage_retry_signals = update_signal_names(RETRY_REASONS, 'mbody_storage_retry_reason_{}', suffix)
    meters.append(Counter('message', storage_retry_signals, [Filter(key='message', cond=Condition.Contains('retrying request to mulcagate due to error'))]))

    meters.append(Counter('exception.what', UNKNOWN_MID_HID, []))

    storage_filter = [Filter(key='exception.what', cond=Condition.Contains('error in forming message'))]

    storage_fallback_signals = update_signal_names(FALLBACK_REASONS, 'mbody_storage_fallback_reason_{}', suffix)
    meters.append(Counter('exception.what', storage_fallback_signals, storage_filter))

    storage_error_signals = update_signal_names(STORAGE_ERRORS, 'mbody_storage_error_{}', suffix)
    meters.append(Counter('exception.what', storage_error_signals, storage_filter))

    return meters


def make_access_log_meters(name_prefix, endpoints):
    meters = [
        AccessLogCount(name_prefix),
        AccessLogCountByFirstStatusDigit(name_prefix),
        AccessLogRequestTimeHist(HTTP_HIST_BUCKETS, 'access_log_request'),
    ]

    for endpoint in endpoints:
        meters += [
            AccessLogCountByPathAndFirstStatusDigit(endpoint, name_prefix),
            AccessLogRequestTimeHistByPath(HTTP_HIST_BUCKETS, endpoint, 'access_log_request'),
        ]

    return meters


def make_supervisor_meters():
    return [
        SupervisorLogRestartMeters('supervisor')
    ]


def parse_args():
    parser = ArgumentParser()
    parser.add_argument('-H', '--host', default='::')
    parser.add_argument('-p', '--port', default=8082, type=int)
    parser.add_argument('-d', '--dir', default='.')
    parser.add_argument('-s', action='store_true', help='read file logs from start')
    parser.add_argument('-l', '--log', help='path for yplatform.log')
    parser.add_argument('--supervisorlog', help='path for supervisord.log')
    parser.add_argument('service_config')
    return parser.parse_args()


def get_module_config(data, name):
    return next(v['configuration'] for v in data['config']['modules']['module'] if v['system']['name'] == name)


def get_logger_sink_path(data, name, sink_index=0):
    return os.path.join(os.curdir, data['config']['log'][name]['sinks'][sink_index]['path'])


def get_service_endpoints(tvm_guard_rules, exclude={'/ping', '/pumpkin/enable', '/pumpkin/disable'}):
    endpoints = []
    for rule in tvm_guard_rules:
        endpoints += [path for path in rule['paths'] if path not in exclude]
    return endpoints


def make_service_config(data):
    remove_scheme = re.compile(r"https?://")

    service_name = 'msg_body'
    service_config = get_module_config(data, service_name)
    mulcagate_config = get_module_config(data, 'mulcagate_client')

    storage_host = remove_scheme.sub('', mulcagate_config['hosts']).strip()
    sharpei = get_module_config(data, 'sharpei_client')
    sharpei_host = remove_scheme.sub('', sharpei['hosts']).strip()

    factex_url = remove_scheme.sub('', service_config['factex']['url']).strip()
    sanitizer_url = remove_scheme.sub('', service_config['sanitizer']['url']).strip()

    return ServiceConfig(
        httpclient_log=get_logger_sink_path(data, 'http_client'),
        access_log=get_logger_sink_path(data, 'access'),
        service_log=get_logger_sink_path(data, 'message_body'),
        phishing_log=get_logger_sink_path(data, 'phishing'),
        profiler_log=service_config['profiler_log'],

        storage_host=storage_host,
        sharpei_host=sharpei_host,
        factex_url=factex_url,
        sanitizer_url=sanitizer_url,
        service_endpoints=get_service_endpoints(service_config['tvm_guard']['rules']),
    )


def make_http_client_log_meters(cfg):
    return [
        HttpClientHttpRequestCountByStatus(cfg.storage_host,       'count_by_status_storage'),
        HttpClientHttpRequestCountByStatus(cfg.sharpei_host,       'count_by_status_sharpei'),
        HttpClientHttpRequestCountByStatus(cfg.factex_url,         'count_by_status_factex'),
        HttpClientHttpRequestCountByStatus(cfg.sanitizer_url,      'count_by_status_sanitizer'),

        HttpClientHttpRequestTotalTimeHist(HTTP_HIST_BUCKETS, cfg.storage_host,     'time_hist_storage'),
        HttpClientHttpRequestTotalTimeHist(HTTP_HIST_BUCKETS, cfg.sharpei_host,     'time_hist_sharpei'),
        HttpClientHttpRequestTotalTimeHist(HTTP_HIST_BUCKETS, cfg.factex_url,       'time_hist_factex'),
        HttpClientHttpRequestTotalTimeHist(HTTP_HIST_BUCKETS, cfg.sanitizer_url,    'time_hist_sanitizer'),
    ]


def make_pa_log_meters():
    return [
        PaLogRequestTimeHist((0, 10, 20, 30, 40, 50, 100, 500, 1000, 5000), 'postgresql',  'pa'),
    ]


ServiceConfig = namedtuple('ServiceConfig', (
    'httpclient_log',
    'access_log',
    'service_log',
    'profiler_log',
    'phishing_log',
    'storage_host',
    'factex_url',
    'sanitizer_url',
    'sharpei_host',
    'service_endpoints',
))


def main():
    args = parse_args()
    log.info('chdir %s' % os.path.abspath(args.dir))
    os.chdir(args.dir)

    with open(args.service_config) as f:
        config = make_service_config(yaml.load(f, Loader=yaml.FullLoader))

    fast_forward = args.s

    phishing_log = logs.TskvLog([], [Counter('tskv_format', update_signal_names(PHISHING_SIGNAL, '{}', 'dmmm'), [])], fast_forward, config.phishing_log)

    service_log = logs.TskvLog([], make_service_log_meters(), fast_forward, config.service_log)

    access_log = logs.AccessTskv([], make_access_log_meters('access_log', config.service_endpoints), fast_forward, config.access_log)
    http_client_log = logs.HttpClientLog([], make_http_client_log_meters(config), fast_forward, config.httpclient_log)

    pa_log = logs.PaLog([], make_pa_log_meters(), fast_forward, config.profiler_log)

    logs_list = [pa_log, service_log, phishing_log, access_log, http_client_log]

    if args.supervisorlog is not None:
        supervisor_log = logs.SupervisorLog(
            [],
            make_supervisor_meters(),
            fast_forward,
            args.supervisorlog
        )
        logs_list.append(supervisor_log)

    run_unistat(args.host, args.port, logs_list, args.log, logLevel='info')


if __name__ == '__main__':
    main()
