#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import json
import sys
import os
import pdb
import dill
import pickle
import arrow
import datetime
import codecs
import argparse
import requests
import subprocess
import shlex
import re
from collections import defaultdict, Counter
import smtplib
import urllib

SERVER = "localhost"
RECIPIENTS = ['riddle', 'pecheny', 'qkrorlqr', 'manokk']
SUBJECT = "Atom distribution snapshots rollout monitoring"
FROM = "atom-rollout@avatar.search.yandex.net"


def delete_first(string):
    split_ = string.split(' ')
    result = ' '.join(split_[1:])
    result = re.sub(r'^\s+', r'', result)
    result = re.sub(r'\s+$', r'', result)
    return result


def get_files_by_prefix(prefix, directory=None):
    if not directory:
        directory = os.getcwd()
    files = os.listdir(directory)
    return [filename for filename in files if filename.startswith(prefix)]


def count_errors_in_file(filename):
    errors = 0
    with codecs.open(filename, 'r', 'utf8') as f:
        for line in f:
            if 'error' in line:
                errors += 1
    return errors


def tots(x):
    return int((x - datetime.datetime(1970, 1, 1)).total_seconds())


def tryint(x):
    try:
        return int(x)
    except ValueError:
        return -1


def makereport_errors(errors, hosts):
    return '\nDetails:\n\n' + '\n'.join('    `{}` on {}/{} hosts: {}'.format(
        error, len(errors[error]), len(hosts), ', '.join(errors[error]))
        for error in errors)


def makereport(errors, errorscounter,
               exceptions, exceptionscounter, hosts):
    return """{}

{}""".format(
        (makereport_errors(errors, hosts) if errorscounter > 0 else ''),
        (makereport_errors(exceptions, hosts) if exceptionscounter > 0 else '')
    )


def send_sms(recipients, message):
    url = ('https://golem.yandex-team.ru/api/sms/send.sbml?resps={}&msg={}'
           .format(','.join(recipients),
                   urllib.quote(message.encode('utf8').decode('utf8'))))
    r = requests.get(url)
    if 'slow down' in r.content:
        send_email(['pecheny'], 'you are ddosing golem')


def generate_sms(exceptionscounter, exceptions, hosts):
    return '{} exceptions on {}/{} hosts'.format(
        exceptionscounter,
        len(set().union(*(exceptions.values()))),
        len(hosts)
    )


def send_email(recipients, message):
    recipients = ['{}@yandex-team.ru'.format(rec) for rec in recipients]
    body = """\
From: {}
To: {}
Subject: {}

{}
""".format(FROM, ", ".join(recipients), SUBJECT, message).encode('utf8')
    server = smtplib.SMTP(SERVER)
    server.sendmail(FROM, recipients, body)
    server.quit()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    args = parser.parse_args()
    cwd = os.path.dirname(os.path.abspath(__file__))
    os.chdir(cwd)
    hosts = (subprocess.check_output(
        'sky list I@production_atom_search', shell=True)
        .decode('utf8', errors='replace')
        .split('\n')[:-1])
    errors = defaultdict(lambda: set())
    lasterrors = defaultdict(lambda: set())
    exceptions = defaultdict(lambda: set())
    lastexceptions = defaultdict(lambda: set())
    lastupdated = defaultdict(lambda: set())
    versions = defaultdict(lambda: set())
    lastchecked = defaultdict(lambda: defaultdict(lambda: 0))
    profiling = []
    profiling1 = []
    profiling2 = []
    profiling3 = []
    errorscounter = 0
    exceptionscounter = 0
    if os.path.isfile('lastchecked.pkl'):
        lastchecked = dill.load(open('lastchecked.pkl', 'rb'))
    # if os.path.isfile('lastchecked.json'):
        # with codecs.open('lastchecked.json','r','utf8') as f:
        # lastchecked = json.loads(f.read())

    for host in hosts:
        if args.debug:
            print('checking {}...').format(host)
        if not os.path.isdir(host):
            os.mkdir(host)
        os.chdir(host)
        bgn = datetime.datetime.now()
        subprocess.call(
            "rsync -az 'rsync://{}/logs/rerankd/current-rerankd_updater-*' ."
            .format(host),
            shell=True)
        profiling.append((datetime.datetime.now() - bgn).total_seconds())
        filenames = get_files_by_prefix('current-rerankd_updater')
        if len(filenames) != 2:
            errors['number of logfiles is {} and not 2: {}'
                   .format(len(filenames), ', '.join(filenames))].add(host)
            errorscounter += 1
        # if (len(filenames) == 2
        #     and abs(count_errors_in_file(filenames[0])
        #         - count_errors_in_file(filenames[1])) > 1):
        #     errorscounter += 1
        #     errors['files {} and {} have different number of errors'
        #         .format(filenames[0], filenames[1])].add(host)

        for filename in filenames:
            if filename.endswith('7300'):
                if args.debug:
                    print('    processing {}...').format(filename)
                bgn = datetime.datetime.now()
                uts = datetime.datetime.fromtimestamp(0)
                with codecs.open(filename, 'r', 'utf8') as f:
                    for line in f:
                        ts = tryint(line.split('\t')[0])
                        if ts > lastchecked[host][filename]:
                            if 'TSystemError' in line:
                                errorscounter += 1
                                errors[delete_first(line)].add(host)
                                ts = line.split('\t')[0]
                                try:
                                    its = datetime.datetime.fromtimestamp(
                                        int(ts))
                                    diffts = datetime.datetime.now() - its
                                    if diffts.total_seconds() < 600:
                                        lasterrors[
                                            delete_first(line)].add(host)
                                except ValueError:
                                    pass
                            if 'exception' in line and not 'rtmr' in line:
                                exceptionscounter += 1
                                exceptions[delete_first(line)].add(host)

                                try:
                                    its = datetime.datetime.fromtimestamp(
                                        int(ts))
                                    diffts = datetime.datetime.now() - its
                                    if diffts.total_seconds() < 600:
                                        lastexceptions[
                                            delete_first(line)].add(host)
                                except ValueError:
                                    pass
                            if 'updated, version:' in line and re.search(r'[0-9]{10}',
                                                                         line.split()[-1]):
                                uts = line.split()[-1]
                                try:
                                    uts = datetime.datetime.fromtimestamp(
                                        int(uts))
                                    diffts = datetime.datetime.now() - uts
                                    if diffts.total_seconds() < 600:
                                        lastupdated[uts].add(host)
                                except ValueError:
                                    pass
                lastchecked[host][filename] = ts
                profiling1.append(
                    (datetime.datetime.now() - bgn).total_seconds())
                versions[uts.strftime('%Y%m%d %H:%M:%S')].add(host)
        os.chdir('..')

    # with codecs.open('lastchecked.json', 'w') as f:
    #     f.write(json.dumps(lastchecked, indent=4))
    dill.dump(dict(lastchecked), open('lastchecked.pkl', 'wb'))
    olderrors = {}
    oldexceptions = {}
    if os.path.isfile('errors.pkl'):
        olderrors = pickle.load(open('errors.pkl', 'rb'))
    if os.path.isfile('exceptions.pkl'):
        oldexceptions = pickle.load(open('exceptions.pkl', 'rb'))

    differrors = {}
    for error in errors:
        if error not in olderrors:
            differrors[error] = errors[error]
    diffexceptions = {}
    for error in exceptions:
        if error not in oldexceptions:
            diffexceptions[error] = exceptions[error]

    # if differrors or diffexceptions:
    if lasterrors or lastexceptions:
        report = makereport(lasterrors, errorscounter,
                            lastexceptions, exceptionscounter, hosts)
        send_email(RECIPIENTS, report)
    if lastexceptions:
        send_sms(RECIPIENTS,
                 generate_sms(len(lastexceptions), lastexceptions, hosts))
        pass
    if lastupdated:
        report = 'Updated in last 10 minutes: \n\n{}'.format(
            '\n'.join(['{} ({}) on {}/{} hosts'.format(
                tots(x), x, len(lastupdated[x]), len(hosts)
            )
                for x in lastupdated]))
        send_email(RECIPIENTS, report)
    pickle.dump(dict(errors), open('errors.pkl', 'wb'))
    pickle.dump(dict(exceptions), open('exceptions.pkl', 'wb'))
    if args.debug:
        pdb.set_trace()
if __name__ == "__main__":
    main()
