#!/usr/bin/python
# -*- coding: utf-8 -*-


import sys
import os
import commands
import urllib2
from optparse import OptionParser, Option
import socket
import time
import multiprocessing
import re

origGetAddrInfo = socket.getaddrinfo


def getAddrInfoWrapper(host, port, family=0, socktype=0, proto=0, flags=0):
    return origGetAddrInfo(host, port, socket.AF_INET, socktype, proto, flags)

# replace the original socket.getaddrinfo by our version
socket.getaddrinfo = getAddrInfoWrapper


all_hosts = {
             'production': ("http://c.yandex-team.ru/api/groups2hosts/disk_mpfs",
                            "http://c.yandex-team.ru/api/groups2hosts/disk_webdav",),
             'test': ("http://c.yandex-team.ru/api/groups2hosts/disk_test_mpfs",
                    "http://c.yandex-team.ru/api/groups2hosts/disk_test_api",
                    "http://c.yandex-team.ru/api/groups2hosts/sync_test_sync"),
             'qa': "http://c.yandex-team.ru/api/groups2hosts/disk_mpfs_qa",
             'dev': "http://c.yandex-team.ru/api/groups2hosts/disk_mpfs_dev",
             'browser': (
                          "http://c.yandex-team.ru/api/groups2hosts/disk_sync",
                          "http://c.yandex-team.ru/api/groups2hosts/disk_xmpp",
                          ),
             'api': "http://c.yandex-team.ru/api/groups2hosts/disk_api",
             }

static_env = {
              'logstore': (
                            'logstore.disk.yandex.net',
                            'logstore1m.disk.yandex.net',
#                      'logstore-fol.mail.yandex.net',
#                      'logstore-myt.mail.yandex.net',
#                      'logstore-iva.mail.yandex.net',
                      )
              }

usage = "usage: ./%prog -h"

DEFAULT_PATH = '/var/log/mpfs/'
log_path = {'logstore': (
                          '/u0/archive_logs/mpfs/mpfs*/mpfs_',
                          '/u0/archive_logs/webdav/webdav*/mpfs_*',
                          )
                          }

option_list = (
               Option('-l', '--log',
                      action='store',
                      type='string',
                      dest='log',
                      help='log name in /var/log/mpfs/, \
                            by default search in all logs for this day'
                      ),
               Option('-s', '--string',
                      action='append',
                      type='string',
                      dest='string',
                      help='search string, may be used several times to \
                            clarify request: -s string1 -s string2'
                    ),
               Option('-H', '--host',
                      action='append',
                      type='string',
                      dest='hosts',
                      help='grep on selected host, may be used several times \
                              to define different hostnames: -H host1 -H host2'
                      ),
               Option('-e', '--env',
                      action='store',
                      dest='env',
                      help='env for grep: qa, test, production, dev, logstore; \
                              default: production',
                      default='production'
                      ),
               Option('-L', '--list',
                      action='store_true',
                      dest='list',
                      help='list all logs on selected host'
                      ),
               Option('-A', '--addr',
                      action='store_true',
                      dest='list_hosts',
                      help='list all known hosts'
                      ),
               Option('-C', '--ctx',
                      action='store',
                      default=None,
                      dest='ctx',
                      help='context, number of strings before and after'
                      ),
               Option('--cluster',
                      action='store',
                      default=None,
                      dest='cluster',
                      help='cluster: webdav, mpfs'
                      ),
               Option('-v', '--verbose',
                      action='store_true',
                      default=False,
                      dest='verbose',
                      help='Verbose output.'
                      ),
               )

parser = OptionParser(usage, option_list=option_list)
(options, args) = parser.parse_args()


class WorkerDescription(object):
    def __init__(self, i, p, b):
        self.id = i
        self.process = p
        self.busy = b


def grep_log(**kwargs):
    parent = kwargs['parent']
    queue = kwargs['queue']
    busy = kwargs['busy']
    empty_counter = 0
    while True:
        if os.getppid() != parent:
            sys.exit()
        try:
            job = queue.get(timeout=0.1)
        except Exception:
            busy.value = 0
            if empty_counter > 10:
                break
            else:
                empty_counter += 1
                time.sleep(0.01)
        else:
            host, grep_cmd = job
            busy.value = 1
            cmd = 'ssh -4 ' + host + grep_cmd
            if options.verbose:
                print cmd
            res = commands.getoutput(cmd)
            ssh_msg = 'logged and monitored.'
            if res and not res.replace('\n', '').rstrip().endswith(ssh_msg):
                for line in res.split(ssh_msg):
                    for each in line.split('\n'):
                        if each.strip() and re.match("^\d\d\d\d\-\d\d\-\d\d .*", each.strip()):
                            print host[:host.find('.')] + ': ' + each


def grep_all(hosts):
    if options.log:
        logs = log_path.get(options.env, DEFAULT_PATH)
        if not isinstance(logs, (list, tuple)):
            logs = (logs, )
        logs = map(lambda x: x + '%s' % options.log, logs)
    else:
        logs = log_path.get(options.env, DEFAULT_PATH)
        if not isinstance(logs, (list, tuple)):
            logs = (logs, )
        logs = map(lambda x: x + '*.log' % options.log, logs)
    queue = multiprocessing.Queue()
    parent = os.getpid()
    workers = []
    for i in xrange(20):
        busy = multiprocessing.Value('B', 1)
        kwargs = {
                  'queue': queue,
                  'busy': busy,
                  'parent': parent,
                  }
        worker_process = multiprocessing.Process(
            target=grep_log,
            kwargs=kwargs
        )
        worker_process.start()
        workers.append(WorkerDescription(i, worker_process, busy))
    for log in logs:
        extra_grep = ''
        grep_util = 'zfgrep'
        if options.ctx:
            grep_util = grep_util + ' -C %s' % options.ctx
        if len(options.string) > 1:
            for arg in options.string[1:]:
                extra_grep += ''' | grep '%s' ''' % arg
        grep_cmd = ''' "%s '%s' %s %s "''' % \
            (grep_util, options.string[0], log, extra_grep)
        for host in hosts:
            queue.put((host, grep_cmd))

    while workers:
        worker = workers.pop()
        if not worker.busy.value:
            worker.process.terminate()
        else:
            workers.append(worker)
            time.sleep(0.1)


def list_logs(hosts):
    cmd = '''ssh -4 developer@%s "ls -l /var/log/mpfs/*.log"''' % hosts[0]
    print commands.getoutput(cmd)


if __name__ == "__main__":
    if options.hosts:
        hosts = options.hosts
    else:
        hosts = []
        cluster = options.cluster
        if options.env in static_env:
            hosts.extend(static_env[options.env])
        else:
            hsts = all_hosts.get(options.env)
            if not isinstance(hsts, (tuple, list)):
                hsts = [hsts, ]
            for each in hsts:
                for each in filter(None, urllib2.urlopen(each).read().split('\n')):
                    if cluster:
                        if cluster[0] == '-':
                            if len(cluster) == 2 and each.split('.')[0][-1] == cluster[1]:
                                continue
                            elif len(cluster) > 1 and each.startswith(cluster[1:]):
                                continue
                        else:
                            if len(cluster) == 1 and each.split('.')[0][-1] != cluster:
                                continue
                            elif len(cluster) > 1 and not each.startswith(cluster):
                                continue
                    hosts.append(each)
    if options.list:
        list_logs(hosts)
        sys.exit()
    elif options.list_hosts:
        print '\n'.join(hosts)
        sys.exit()
    if not options.string:
        print 'empty search string'
        sys.exit(1)
    grep_all(hosts)
