# -*- coding: utf-8 -*-

import argparse
import gzip
import io
import logging
import os
import pickle
import re
import shutil
import sys
import traceback
import urllib.parse
import warnings
from distutils.dir_util import mkpath
from pathlib import Path
from time import sleep

import paramiko
sys.path.append(str(Path(__file__).parent.parent.parent))
scriptPath = os.path.dirname(os.path.abspath(__file__))
sys.path.append(scriptPath)
from utils.send_message import send_message
from set_secret import set_secret

import pysftp

from duffman_parse import import_duffman_files, duffman_log_parse
from fatalerrorsParse import parse_fatalerrors, find_equal_fatalerrors
from handlerParse import parse_handlers, find_equal_handler
from jserrorParse import parse_jserrors, find_equal_jserror
from nseventsParse import parse_nsevents, find_equal_nsevents
from promiseParse import parse_promise, find_equal_promise

warnings.filterwarnings("ignore", category=UserWarning)
FORMAT = '%(asctime)s:%(levelname)s:%(name)s:%(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
log = logging.getLogger("logParser")

files_client = [
    "access.log",
    "access.log.0",
    "access.log.1",
    "access.log.2",
    "access.log.3",
    "access.log.*"
]


def import_test_logs(host, file_name):
    host_prefix = re.search("(ub[0-9]+|pr-[0-9]+|liza-rc-[0-9][0-9]+-[0-9]+)", host).group(1)
    host = 'nginx-1.nginx.%s.verstka-qa.mail.stable.qloud-d.yandex.net' % host_prefix
    my_key = pysftp.RSAKey.from_private_key(io.StringIO(os.environ["SSH_KEY"]), os.environ["SSH_PWD"])
    cnopts1 = pysftp.CnOpts()
    cnopts1.hostkeys = None
    remote_dir = '/ephemeral/var/log/nginx/mail/'
    path_to_test_logs = scriptPath + "/logs"
    while os.path.exists(path_to_test_logs):
        print('trying to delete directory')
        shutil.rmtree(path_to_test_logs, ignore_errors=True)
        sleep(1)
    print('directory removed')
    os.makedirs(path_to_test_logs)
    print('directory recreated')
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(host, username='robot-pinkie-pie', pkey=my_key)
    log.info('started error prepare')
    if file_name != "access.log.*":
        ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command(f" cat /ephemeral/var/log/nginx/mail/{file_name} | grep 'errorType' > /ephemeral/var/log/nginx/mail/{file_name}.errors")
        ssh_stdout.channel.recv_exit_status()
        log.info('exit received')
        log.info('ended error prepare')
        file_name = f'{file_name}.errors'
    ssh.close()
    with pysftp.Connection(host, username='robot-pinkie-pie', private_key=my_key, cnopts=cnopts1) as sftp:
        dir_items_to_remove = []
        all_access_files = sftp.listdir(remote_dir)
        for dir_item in all_access_files:
            if not re.match('access.log[.0-3]*(gz)?', dir_item):
                dir_items_to_remove.append(dir_item)
        for dir_item in dir_items_to_remove:
            all_access_files.remove(dir_item)

        if file_name == "access.log.*":
            dir_items = all_access_files
        else:
            dir_items = [file_name]
        for dir_item in dir_items:
            for access_file in all_access_files:
                if re.match(dir_item + '(.gz)?', access_file):
                    dir_item = re.match(dir_item + '(.gz)?', access_file).group(0)
                    break
            print(dir_item)
            sftp.get(
                remote_dir + str(dir_item),
                localpath=path_to_test_logs + '/logs_' + dir_item,
                preserve_mtime=True
            )
            print('ready ' + dir_item)
    files_in_dir = os.listdir(path_to_test_logs)
    f = open(path_to_test_logs + '/logs', "a")
    for file_in_dir in files_in_dir:
        if re.match('.*gz', file_in_dir):
            print('gz file ' + file_in_dir)
            g = gzip.open(os.path.join(path_to_test_logs, file_in_dir), 'rt', errors='ignore')
        else:
            print('just file ' + file_in_dir)
            g = open(os.path.join(path_to_test_logs, file_in_dir), 'r', errors='ignore')
        shutil.copyfileobj(g, f)
        g.close()
    f.close()


def import_prod_logs():
    remote_dir = '/ephemeral/LogParse/'
    my_key = pysftp.RSAKey.from_private_key(io.StringIO(os.environ["SSH_KEY"]), os.environ["SSH_PWD"])
    cnopts1 = pysftp.CnOpts()
    cnopts1.hostkeys = None
    local_dir = scriptPath + "/logs_prod"
    mkpath(local_dir)
    with pysftp.Connection('test-1.test.mail-scripts.mailfront.mail.stable.qloud-d.yandex.net',
                           username='robot-pinkie-pie', private_key=my_key,
                           cnopts=cnopts1) as sftp:
        dir_items = sftp.listdir_attr(remote_dir)
        for item in dir_items:
            # assuming the local system is Windows and the remote system is Linux
            # os.path.join won't help here, so construct remote_path manually
            if not re.match("[a-zA-Z\-]*\.py", item.filename):
                if not re.match(".*cron.*", item.filename):
                    print(item.filename)
                    remote_path = remote_dir + '/' + item.filename
                    local_path = os.path.join(local_dir, item.filename)
                    sftp.get(remote_path, local_path)


def pars_client_file():
    columns = [
        "errorType",
        "login",
        "msg",
        "text",
        "name",
        "reason",
        "selector",
        "loc",
        "request_dict",
        "part"
    ]
    logs_dictionary = []

    f = open(scriptPath + '/logs/logs', 'r')
    lines = 0
    for log in f:
        lines += 1
        if "errorType" in log:
            json_for_log = {
                'timestamp': '',
                'errorType': '',
                'login': '',
                'msg': '',
                'text': '',
                'name': '',
                'reason': '',
                'selector': '',
                'request_dict': '',
                'loc': '',
                'part': ''
            }
            timestamp = log[log.index('[') + 1:log.index(']')]
            json_for_log["timestamp"] = timestamp

            log = log[log.index('?') + 1:]
            try:
                log = log[:log.index("HTTP") - 1]
            except Exception:
                print("no HTTP found")
                print(log)
            t = urllib.parse.parse_qs(log)
            for set_param in t:
                t[set_param] = t[set_param][0]
                if set_param in columns:
                    json_for_log[set_param] = t[set_param]

            json_for_log["request_dict"] = t
            if (json_for_log["part"] == '') | (json_for_log["part"] == "0"):
                logs_dictionary.append(json_for_log)
    save_obj(logs_dictionary, "logs_pickle")
    return logs_dictionary


def pars_duffman_file():
    columns = [
        "reason",
        "status",
        "message",
        "handle"
    ]
    logs_dictionary = []
    logs_sub_dictionary = []

    f = open(scriptPath + '/duffman-access.tskv', 'r')
    lines = 0
    is_param_regexp = re.compile('.*=.*')
    log_splitter_regexp = re.compile('\s*')
    log_if_error_regexp = re.compile('.*(ERROR|EXCEPTION|FAILURE|REJECTED).*')
    log_error_match_regexp = re.compile('[^=]*(ERROR|EXCEPTION|FAILURE|REJECTED)')
    log_param_parse_regexp = re.compile('.*=')
    log_param_value_parse_regexp = re.compile('=[^\s]*')
    for log in f:
        if re.match(log_if_error_regexp, log):
            log = re.split(log_splitter_regexp, log)
            lines += 1
            json_for_log = {
                'reason': '',
                'status': '',
                'message': '',
                'handle': ''
            }
            for param in log:
                if re.match(is_param_regexp, param):
                    parameter = re.search(log_param_parse_regexp, param).group(0)[:-1]
                    parameter_value = re.search(log_param_value_parse_regexp, param).group(0)[1:]
                    if parameter in columns:
                        json_for_log[parameter] = parameter_value
            if "reason" in json_for_log:
                if re.match(log_error_match_regexp, json_for_log["reason"]):
                    logs_sub_dictionary.append(json_for_log)

    for log in logs_sub_dictionary:
        uniq = 1
        for uniq_log in logs_dictionary:
            full_equal = 1
            for column in columns:
                if log[column] != uniq_log[column]:
                    full_equal = 0
            if full_equal:
                uniq_log["count"] += 1
                uniq = 0
        if uniq:
            log["count"] = 1
            logs_dictionary.append(log)

    logs_dictionary = sort_by_count(logs_dictionary)
    save_obj(logs_dictionary, "logs_duffman_pickle")
    for line in logs_dictionary:
        print(line)
    return logs_dictionary


def save_obj(obj, name):
    with open(scriptPath + '/' + name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


def load_obj(name):
    with open(scriptPath + '/' + name, 'rb') as f:
        return pickle.load(f)


def sort_by_count(logs_to_sort):
    for i in range(len(logs_to_sort) - 1):
        for k in range(len(logs_to_sort) - 1):
            if logs_to_sort[k]["count"] < logs_to_sort[k + 1]["count"]:
                foo = logs_to_sort[k]
                logs_to_sort[k] = logs_to_sort[k + 1]
                logs_to_sort[k + 1] = foo
    return logs_to_sort


def run_test_log_grep(host, file_name):
    message = []
    try:
        import_test_logs(host, file_name)
        try:
            # pars_duffman_file()
            pars_client_file()
            parse_handlers("logs_pickle")
            parse_fatalerrors("logs_pickle")
            parse_nsevents("logs_pickle")
            parse_jserrors("logs_pickle")
            parse_promise('logs_pickle')
        except IOError as e:
            log.error(traceback.format_exc())

        try:
            message += find_equal_handler()
            message += find_equal_jserror()
            message += find_equal_nsevents()
            message += find_equal_fatalerrors()
            message += find_equal_promise()
        except IOError as e:
            log.error(traceback.format_exc())
    except IOError as e:
        log.error(traceback.format_exc())
    return message


def run_duffman_log_grep(host_to_grep, files):
    message = []
    try:
        import_duffman_files(host_to_grep, files)
        try:
            message += duffman_log_parse()
        except IOError as e:
            log.error(traceback.format_exc())
    except IOError as e:
        log.error(traceback.format_exc())
    return message


def grep_2d_logs_for_host(host_to_grep):
    set_secret.set_secrets()
    message = []
    mkpath(scriptPath + '/logs_test')
    import_prod_logs()
    if host_to_grep not in ['mail.yandex.ru', 'https://mail.yandex.ru']:
        message.append('<h2>Errors for host «%s»</h2> <h2>Today</h2>' % host_to_grep)
        message += run_test_log_grep(host_to_grep, 'access.log')
        message.append('<br><h3>Duffman logs</h3>')
        message += run_duffman_log_grep(host_to_grep, 'duffman')
        message.append('<br><br><h2>Yesterday</h2>')
        message += run_test_log_grep(host_to_grep, 'access.log.0')
        message.append('<br><h3>Duffman logs logs:</h3>')
        message += run_duffman_log_grep(host_to_grep, 'duffman0')
        message.append('<br>' * 4)
    return message


if __name__ == '__main__':
    set_secret.set_secrets()
    # parser = argparse.ArgumentParser(description='parse host')
    # parser.add_argument('-H', '--host', action="store", help="host to parse")
    # parser.add_argument('-f,"--file', action='store', help='concrete file to parse', dest='file')
    # host = parser.parse_args().host
    # file_name = parser.parse_args().file
    # while host not in hosts:
    #     host = input("Input host: ")
    # if file_name not in files_client:
    #     file_name = input("Input file name (access.log by default): ")
    #     if file_name == '':
    #         file_name = "access.log"
    #     while file_name not in files_client:
    #         file_name = input("File name is incorrect, input correct file name: ")
    # print(host)
    # mkpath(scriptPath + '/logs_test')
    # import_prod_logs()
    # import_duffman_files(host, 'duffman')
    # run_test_log_grep(host, file_name)
    # duffman_log_parse()
    logs = '\n'.join(grep_2d_logs_for_host('ub1-qa.mail.yandex.ru'))
    send_message(['a-zoshchuk@yandex-team.ru'], 'Греп', logs)
