#!/usr/bin/env python
# Provides: msearch-lucene-shard-check
# author: okkk

import urllib2
import json
import gevent
import gevent.monkey
from gevent import Timeout
from gevent.pool import Pool
from itertools import islice
import sys
import time
import os
from optparse import OptionParser

gevent.monkey.patch_all()

import subprocess

HOST_ANSWER_TIMEOUT = 131
URLLIB_ANSWER_TIMEOUT = 60
SLEEP = 3
TRIES = 2

OAUTH_TOKEN = "nanny_token"
BASE_URL = "https://nanny.yandex-team.ru/v2/services/"
SERVICE_NAME = "mail_search_prestable"
#COMMAND_URL="/current_state/hosts/"
COMMAND_URL = ""
REPLICA_COUNT = 16
CRITICAL_REPLICA_COUNT = 3
WARNING_REPLICA_COUNT = 2
LUCENE_STAT_PORT_OFFSET = 1

TEMP_RELEASE_INFO = '/tmp/mail_lucene_stat_release.txt'

# For testing purposes
# For tests - we stop before chunk number reached. For example - if we have
# 4 services with SHARD_REQUESTS_CHUNK = 100 and STOP_BEFORE__CHUNK_NUMBER = 1 - we do
# 400 requests (one chunk with size 100). (fix it later)
STOP_BEFORE_CHUNK_NUMBER = 0

#MANUAL_SEARCHMAPURL = False
#MANUAL_SEARCHMAPURL = "http://cmsearch.yandex.ru/res/gencfg/stable-102-r80/generated/lucene/mail.cfg"
MANUAL_SEARCHMAPURL = "http://mage.n.yandex-team.ru/api/v1.0/mail/getmap?revision=recluster_searchmap_split:tags/stable-131-r218&full=true"
# Create pool, how many request executed in parallel
pool = Pool(100)

optparser = OptionParser()
optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="verbose output")
(options, args) = optparser.parse_args()

def exceptionlist():
    with open('hostlist.txt', 'r') as f:
        s_list = f.read().splitlines() 
        return s_list

def make_auth_req(url, token=None):
    request = urllib2.Request(url)
    if token:
        request.add_header("Authorization", "OAuth {0}".format(token))
    attempts = 0
    while attempts < TRIES:
        try:
            result = urllib2.urlopen(request, timeout=20)
#            break
        except Exception as e:
            attempts +=1
            if options.verbose: print("Error reached {0} Url: {2} Retry count:{1}".format(e, attempts, url))
            time.sleep(SLEEP) 
#            print attempts, TRIES
            if attempts == TRIES:
                if options.verbose: print("Timeout!")
                raise Exception("Timeout exception!")
                break
            continue
        break
    return result.read()

def parse_json(data):
    return json.loads(data)


def get_searchmap_url(service_dict):
    if MANUAL_SEARCHMAPURL:
        return MANUAL_SEARCHMAPURL
    for res in service_dict['runtime_attrs']['content']['resources']['url_files']:
        if res.get('local_path') == 'searchmap.txt':
            return res['url']

def get_file_unparsed(filename):
    with open(filename, 'r') as f:
        return f.readlines()

def get_file_rbtorrent(link, downpath, cached):
    if not cached:
    # Get first download file in directory and ignore another
        cmd = "sky get -up -d".split(" ") + ["{0}".format(downpath)] + ["{0}".format(searchmap_url)]
        prc=subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out,err = prc.communicate()
        if options.verbose: print("Rbtorrent out:{0}, Error:{1}".format(err,out))
    for root, dirs, files in os.walk(downpath):
        for file_one in files:
            if options.verbose: print file_one
            return get_file_unparsed("{0}/{1}".format(downpath,file_one))

def get_file_by_http(searchmap_url):
    return make_auth_req(searchmap_url).split("\n")

def get_searchmap_parsed(searchmap_url,release):
    """
    :param searchmap_url:string
    :return: list[dict[service][iNum][{host:port},{host2:port2}...],dict[service2][iNum2][{host:{port}, {host2:port2}..]]
    """
    if options.verbose: print searchmap_url
    if os.path.isfile(TEMP_RELEASE_INFO):
        release_old=get_file_unparsed(TEMP_RELEASE_INFO)
        if not release_old == release:
            if searchmap_url.startswith('rbtorrent:'):
                searchmap_unparsed=get_file_rbtorrent(searchmap_url,"/tmp/mail_lucene_searchmap", cached=False)
            else:
                searchmap_unparsed=get_file_by_http(searchmap_url)
            with open(TEMP_RELEASE_INFO, 'w+') as out:
                out.write(release)
                out.close()
        else:
            if searchmap_url.startswith('rbtorrent:'):
                searchmap_unparsed=get_file_rbtorrent(searchmap_url,"/tmp/mail_lucene_searchmap", cached=True)
    else:
        with open(TEMP_RELEASE_INFO, 'w+') as out:
            out.write(release)
            out.close()
        if searchmap_url.startswith('rbtorrent:'):
            searchmap_unparsed=get_file_rbtorrent(searchmap_url,"/tmp/mail_lucene_searchmap")
        else:
            searchmap_unparsed=get_file_by_http(searchmap_url)

    return_dict = {}
    for line in searchmap_unparsed:
        if not line or not line.strip():
            continue
        if line.startswith("#"):
            continue
        services = line.split(" ")[0].split(",")
        for service in services:
            if not return_dict.get(service):
                return_dict[service]={}
            line_data = line.split(" ")[1].split(",")
            line_dict={}

            for el in line_data:
                if el.startswith("zk:"):
                    line_dict["zk"]=el.split("zk:")[1]
                else:
                    line_dict[el.split(":")[0]]=el.split(":")[1]

            iNum = "{0}:{1}".format("iNum",line_dict['iNum'])
            host = line_dict['host']
            lucene_stat_port = int(line_dict['search_port']) + LUCENE_STAT_PORT_OFFSET
            if not return_dict[service].get(iNum):
                return_dict[service][iNum] = []
            return_dict[service][iNum].append({host: lucene_stat_port})
    return return_dict

def chunklist(iterable, SIZE=100):
    l = len(iterable)
    for ndx in range(0, l, SIZE):
        yield iterable[ndx:min(ndx + SIZE, l)]


def fetch_host(host, pid):
    if options.verbose: print("Pid currently runing {0}".format(pid))
    result_dict = {}
    for host, port in host.iteritems():
        stat_url = "http://{0}:{1}/stat".format(host, int(port) + 1)
        stat_json=[]
        if options.verbose: print("Actual url is:{0}".format(stat_url))
        with Timeout(HOST_ANSWER_TIMEOUT, False):
            try:
                stat_json = json.loads(make_auth_req(stat_url))
            except Exception as e:
                stat_json = []
        metrics = parse_stat(stat_json)
        # if options.verbose: print("Parsing complete pid {0}".format(pid))
        # if options.verbose: print("Make result pid {0}".format(pid))
        result_dict["{0}:{1}".format(host, port)] = metrics
    # if options.verbose: print("Result dict{0}".format(result_dict))
    return result_dict


def dosomeasync_list(items):
    """ Function for greenlet
    @param items
    List of dicts [{host:port},{host2:port2}]
    """
    threads = []
#    pools = []
    pid = 0
    for element in items:
        pid += 1
        threads.append(pool.spawn(fetch_host, element, pid))
    gevent.joinall(threads)
    #[{inum1:{host:{metric1:0, metric2:1}, host2:...}}, {inum2:}]
    for thread in threads:
        result = thread.value
        for host, hostdata in result.iteritems():
            result[host] = hostdata
    result = [thread.value for thread in threads]
    #if options.verbose: print("Result: {0}".format(result))
    return result

def parse_stat(stat_json):
    result_dict = {}
    for metric in stat_json:
        if 'index-empty-total' in metric[0]:
            result_dict["index-empty-total"] = metric[1]
        if 'index-copy-progress_perc' in metric[0]:
            result_dict["index-copy-progress_perc"] = metric[1]
    if result_dict.get("index-empty-total") is None:
        result_dict["index-empty-total"] = False
    if result_dict.get("index-copy-progress_perc") is None:
        result_dict["index-copy-progress_perc"] = False
    return result_dict

def generate_uniq_hostlist(searchmap_parsed):
    uniq_hostlist = []
    for service, data in searchmap_parsed.iteritems():
        for iNum, hostlist in data.iteritems():
            for hostname in hostlist:
                if hostname not in uniq_hostlist:
                    uniq_hostlist.append(hostname)
    return uniq_hostlist

def get_databychunk(splitted_list):
    result_dict_host = {}
    num = 0
    num2 = 1
    host_metric = dosomeasync_list(splitted_list)
    for host in host_metric:
        for hostname, hostdata in host.iteritems():
            result_dict_host[hostname] = hostdata
#    for chunk in splitted_list:
#        if num2 == STOP_BEFORE_CHUNK_NUMBER:
#            continue
#        if options.verbose: print("Do some async with chunk number: {0}".format(num))
#        host_metric = dosomeasync_list(chunk)
#        for host in host_metric:
#            for hostname, hostdata in host.iteritems():
#                result_dict_host[hostname] = hostdata
#        num += 1
#        num2 += 1
    return result_dict_host

def get_aggregated_byshard(hostsdata, searchmap_parsed):
    result_dict = {}
    for service, data in searchmap_parsed.iteritems():
        result_dict[service] = {}
        for replica_number in range(REPLICA_COUNT + 2):
            # create dict for failed replicas in shard
            result_dict[service][replica_number] = []
        for iNum, inumdata in data.iteritems():
            False_Count = 0
            #result_dict[service][False_Count][iNum] = []
            temp_dict = {}
            temp_dict[iNum] = {}
            for hostname in inumdata:
                for host, port in hostname.iteritems():
                    if hostsdata["{0}:{1}".format(host, port)]['index-copy-progress_perc'] != 100.0 or hostsdata["{0}:{1}".format(host, port)]['index-empty-total'] != 0:
                        False_Count += 1
                    temp_dict[iNum]["{0}:{1}".format(host, port)] = hostsdata["{0}:{1}".format(host, port)]
            #if options.verbose: print("False C {0}".format(False_Count))
            result_dict[service][False_Count].append(temp_dict)
    return result_dict

def get_json(filename):
    try:
        with open(filename, 'r') as f:
            return json.loads(f.readlines()[0])
    except IOError:
        return None

def print_result_verbose(result):
    overall_list = []
    for service, service_data in result.iteritems():
        for False_Count in range(REPLICA_COUNT+1):
            if False_Count < WARNING_REPLICA_COUNT and False_Count == 0:
                print("======SERVICE: {0} {1}of{2} OK========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        for host in hosts.iteritems():
                            if not ( host[0].split(":")[0] in overall_list) and (str(host[1]['index-copy-progress_perc']) == "False" and str(host[1]['index-empty-total']) == "False"):
                                overall_list.append(host[0].split(":")[0])
    #                            print("Overall list {0}".format(overall_list))
                            print("inum {1}, Data: {2}".format(service, inumkey, host))
     
            if False_Count < WARNING_REPLICA_COUNT and False_Count > 0:
    #            continue
                print("======SERVICE: {0} {1}of{2} WARNING========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        for host in hosts.iteritems():
                            if not ( host[0].split(":")[0] in overall_list) and (str(host[1]['index-copy-progress_perc']) == "False" and str(host[1]['index-empty-total']) == "False"):
                                overall_list.append(host[0].split(":")[0])
    #                            print("Overall list {0}".format(overall_list))
                            print("inum {1}, Data: {2}".format(service, inumkey, host))
            if False_Count == WARNING_REPLICA_COUNT:
                print("======SERVICE: {0} {1}of{2} WARNING========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        for host in hosts.iteritems():
                            if not ( host[0].split(":")[0] in overall_list) and (str(host[1]['index-copy-progress_perc']) == "False" and str(host[1]['index-empty-total']) == "False"):
                                overall_list.append(host[0].split(":")[0])
    #                            print("Overall list {0}".format(overall_list))
                            print("inum {1}, Data: {2}".format(service, inumkey, host))
            if False_Count >= CRITICAL_REPLICA_COUNT:
                print("======SERVICE: {0} {1}of{2} CRITICAL========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                   for inumkey, hosts in inum.iteritems():
                       for host in hosts.iteritems():
                           if not ( host[0].split(":")[0] in overall_list) and (str(host[1]['index-copy-progress_perc']) == "False" and str(host[1]['index-empty-total']) == "False"):
                               overall_list.append(host[0].split(":")[0])
                           print("inum {1}, Data: {2}".format(service, inumkey, host))
    
    print('So we found {0} failed hosts.'.format(len(overall_list)))
    print ' +'.join(overall_list)

def get_juggler_errors(result):
    crit_flag = 0
    warn_flag = 0
    warnings = {}
    criticals = {}
    for service, service_data in result.iteritems():
        warnings[service] = []
        criticals[service] = []
        for False_Count in range(REPLICA_COUNT+1):
            if False_Count == WARNING_REPLICA_COUNT:
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        warnings[service].append(inumkey)
            if False_Count >= CRITICAL_REPLICA_COUNT:
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        criticals[service].append(inumkey)
        if criticals[service]:
            crit_flag = 1
        elif warnings[service]:
            warn_flag = 1
    return criticals,warnings,crit_flag,warn_flag
    
def die(check, level, message):
    print 'PASSIVE-CHECK:' + check + ';' + level + ';' + message
    sys.exit(0)

url = BASE_URL+SERVICE_NAME+COMMAND_URL
if options.verbose: print("Getting service {0} config by url {1}".format(SERVICE_NAME, url))
data = make_auth_req(url, OAUTH_TOKEN)
if options.verbose: print("Done")
if options.verbose: print("Parse service config.")
service_dict = parse_json(data)
release=service_dict['runtime_attrs']['content']['instances']['extended_gencfg_groups']['groups'][0]['release']
if options.verbose: print("Release is: {0}".format(release))
if options.verbose: print("Done")
if options.verbose: print("Get service {0} searchmap".format(SERVICE_NAME))
searchmap_url = get_searchmap_url(service_dict)
if options.verbose: print ("Searchmap_url {0}".format(searchmap_url))
if options.verbose: print("Done")
if options.verbose: print("Parse service {0} searchmap".format(SERVICE_NAME))
searchmap_parsed = get_searchmap_parsed(searchmap_url,release)
if options.verbose: print("Done")
if options.verbose: print("Start async jobs")

uniq_hostlist = generate_uniq_hostlist(searchmap_parsed)
#splitted_list = chunklist(uniq_hostlist)
#result_dict_host = get_databychunk(splitted_list)
result_dict_host = get_databychunk(uniq_hostlist)
result = get_aggregated_byshard(result_dict_host, searchmap_parsed)

if options.verbose: print_result_verbose(result)

criticals,warnings,crit_flag,warn_flag = get_juggler_errors(result)

crit_string=""
for k, v in criticals.iteritems():
    crit_string += " Service: {0} Count: {1} Inumlist: {2}".format(k, len(v), ', '.join(v))

warn_string=""
for k, v in warnings.iteritems():
    warn_string += " Service: {0} Count: {1} Inumlist: {2}".format(k, len(v), ', '.join(v))

if crit_flag:
    die('msearch-lucene-shard-check', '2', "3of4 FAILED. {0}".format(crit_string))
elif warn_flag:
    die('msearch-lucene-shard-check', '1', "2of4 FAILED. {0}".format(warn_string))
else:
    die('msearch-lucene-shard-check', '0', "OK")


