#!/usr/bin/env python
# Provides: disk_lucene_halted_shard
# author: okkk

import urllib2
import json
import gevent
import gevent.monkey
from gevent import Timeout
from gevent.pool import Pool
from itertools import islice
import sys
import time
import os
from optparse import OptionParser

gevent.monkey.patch_all()

import subprocess

HOST_ANSWER_TIMEOUT = 170
URLLIB_ANSWER_TIMEOUT = 30
SLEEP = 2
TRIES = 5

OAUTH_TOKEN = "nanny_token"
BASE_URL = "https://nanny.yandex-team.ru/v2/services/"
SERVICE_NAME = "disk_search_backend_prestable"
#COMMAND_URL="/current_state/hosts/"
COMMAND_URL = ""
REPLICA_COUNT = 4
CRITICAL_REPLICA_COUNT = 3
WARNING_REPLICA_COUNT = 2

TEMP_RELEASE_INFO = '/tmp/disk_lucene_stat_release.txt'
#FORCEMAP = "diskmap.txt"
FORCEMAP = False

# For testing purposes
#HOST_REQUESTS_CHUNK = 11000
# For tests - we stop before chunk number reached. For example - if we have
# 4 services with SHARD_REQUESTS_CHUNK = 100 and STOP_BEFORE__CHUNK_NUMBER = 1 - we do
# 400 requests (one chunk with size 100). (fix it later)
STOP_BEFORE_CHUNK_NUMBER = 0

#MANUAL_SEARCHMAPURL="http://cmsearch.yandex.ru/res/gencfg/stable-101-r32/generated/lucene/disk.cfg"
#MANUAL_SEARCHMAPURL="http://cmsearch.yandex.ru/res/gencfg/stable-105-r178/generated/lucene/disk.cfg"
MANUAL_SEARCHMAPURL = "http://mage.n.yandex-team.ru/api/v1.0/disk/getmap?revision=searchmap_split:tags/stable-131-r213&full=true"
print MANUAL_SEARCHMAPURL 
# Create pool, how many request executed in parallel
pool = Pool(30)

optparser = OptionParser()
optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="verbose output")
(options, args) = optparser.parse_args()

def make_auth_req(url, token=None):
    request = urllib2.Request(url)
    if token:
        request.add_header("Authorization", "OAuth {0}".format(token))
    attempts = 0
    while attempts < TRIES:
        try:
            result = urllib2.urlopen(request, timeout=20)
            break
        except Exception as e:
            attempts +=1
            if options.verbose: print("Error reached {0} Url: {2} Retry count:{1}".format(e, attempts, url))
            time.sleep(SLEEP) 
            if attempts == TRIES:
                if options.verbose: print("Timeout!")
                raise Exception("Timeout exception!")            
    return result.read()

def parse_json(data):
    return json.loads(data)

def get_file_by_http(searchmap_url):
    if FORCEMAP:
        with open(FORCEMAP, 'r') as f:
            return f.readlines()
    return make_auth_req(searchmap_url).split("\n")

def get_searchmap_parsed(searchmap_url,release):
    """
    :param searchmap_url:string
    :return: list[dict[service][iNum][{host:port},{host2:port2}...],dict[service2][iNum2][{host:{port}, {host2:port2}..]]
    """
    if options.verbose: 
        print searchmap_url

    searchmap_unparsed=get_file_by_http(searchmap_url)        

    return_dict = {}
    for line in searchmap_unparsed:
        if not line or not line.strip():
            continue
        if line.startswith("#"):
            continue
        # Get iNum
        services = line.split(" ")[0].split(",")
        for service in services:
            if not return_dict.get(service):
                return_dict[service]={}
            line_data = line.split(" ")[1].split(",")
            line_dict={}

            for el in line_data:
                if el.startswith("zk:"):
                    line_dict["zk"]=el.split("zk:")[1]
                else:
                    line_dict[el.split(":")[0]]=el.split(":")[1]
            
            iNum = "{0}:{1}".format("iNum",line_dict['iNum'])
            host = line_dict['host']
            search_port = line_dict['search_port']
            if not return_dict[service].get(iNum):
                return_dict[service][iNum] = []
            return_dict[service][iNum].append({host: search_port})
    return return_dict

def chunklist(iterable, SIZE=100):
    l = len(iterable)
    for ndx in range(0, l, SIZE):
        yield iterable[ndx:min(ndx + SIZE, l)]

def fetch_host(host, pid):
    if options.verbose: print("Pid currently runing {0}".format(pid))
    result_dict = {}
    for host, port in host.iteritems():
        stat_url = "http://{0}:{1}/stat".format(host, int(port)+2)
        stat_json=[]
        if options.verbose: print("Actual url is:{0}".format(stat_url))
        with Timeout(HOST_ANSWER_TIMEOUT, False):
            try:
                stat_json = json.loads(make_auth_req(stat_url))
            except Exception as e:
                stat_json = []
        metrics = parse_stat(stat_json)
        result_dict["{0}:{1}".format(host, port)] = metrics
    return result_dict


def dosomeasync_list(items):
    """ Function for greenlet
    @param items
    List of dicts [{host:port},{host2:port2}]
    """
    threads = []
    pid = 0
    for element in items:
        pid += 1
        threads.append(pool.spawn(fetch_host, element, pid))
    gevent.joinall(threads)
    #[{inum1:{host:{metric1:0, metric2:1}, host2:...}}, {inum2:}]
    for thread in threads:
        result = thread.value
        for host, hostdata in result.iteritems():
            result[host] = hostdata
    result = [thread.value for thread in threads]
    return result

def parse_stat(stat_json):
    result_dict = {}
    for metric in stat_json:
        if 'has-halted-shards_ammx' in metric[0]:
            result_dict["has-halted-shards_ammx"] = metric[1]
    if result_dict.get("has-halted-shards_ammx") is None:
        result_dict["has-halted-shards_ammx"] = False
    return result_dict

def generate_uniq_hostlist(searchmap_parsed):
    uniq_hostlist = []
    for service, data in searchmap_parsed.iteritems():
        for iNum, hostlist in data.iteritems():
            for hostname in hostlist:
                if hostname not in uniq_hostlist:
                    uniq_hostlist.append(hostname)
    return uniq_hostlist

def get_databychunk(splitted_list):
    result_dict_host = {}
    num = 0
    num2 = 1
    host_metric = dosomeasync_list(splitted_list)
    for host in host_metric:
        for hostname, hostdata in host.iteritems():
            result_dict_host[hostname] = hostdata
    return result_dict_host

def get_aggregated_byshard(hostsdata, searchmap_parsed):
    result_dict = {}
    for service, data in searchmap_parsed.iteritems():
        result_dict[service] = {}
        for replica_number in range(REPLICA_COUNT + 1):
            # create dict for failed replicas in shard
            result_dict[service][replica_number] = []
        for iNum, inumdata in data.iteritems():
            False_Count = 0
            #result_dict[service][False_Count][iNum] = []
            temp_dict = {}
            temp_dict[iNum] = {}
            for hostname in inumdata:
                for host, port in hostname.iteritems():
                    if hostsdata["{0}:{1}".format(host, port)]['has-halted-shards_ammx'] != 0:
                        False_Count += 1
                    temp_dict[iNum]["{0}:{1}".format(host, port)] = hostsdata["{0}:{1}".format(host, port)]
            if False_Count > REPLICA_COUNT:
                False_Count = REPLICA_COUNT
            result_dict[service][False_Count].append(temp_dict)
    return result_dict

def print_result_verbose(result):
    overall_list = []
    for service, service_data in result.iteritems():
        for False_Count in range(REPLICA_COUNT+1):
            if False_Count < WARNING_REPLICA_COUNT and False_Count == 0:
                print("======SERVICE: {0} {1}of{2} OK========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        for host in hosts.iteritems():
                            if not ( host[0].split(":")[0] in overall_list) and str(host[1]['has-halted-shards_ammx']) == "False":
                                overall_list.append(host[0].split(":")[0])
                            print("inum {1}, Data: {2}".format(service, inumkey, host))

            if False_Count < WARNING_REPLICA_COUNT and False_Count > 0:
                print("======SERVICE: {0} {1}of{2} WARNING========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        for host in hosts.iteritems():
                            if not ( host[0].split(":")[0] in overall_list) and str(host[1]['has-halted-shards_ammx']) == "False":
                                overall_list.append(host[0].split(":")[0])
                            print("inum {1}, Data: {2}".format(service, inumkey, host))
            if False_Count == WARNING_REPLICA_COUNT:
                print("======SERVICE: {0} {1}of{2} WARNING========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        for host in hosts.iteritems():
                            if not ( host[0].split(":")[0] in overall_list) and str(host[1]['has-halted-shards_ammx']) == "False":
                                overall_list.append(host[0].split(":")[0])
                            print("inum {1}, Data: {2}".format(service, inumkey, host))
            if False_Count >= CRITICAL_REPLICA_COUNT:
                print("======SERVICE: {0} {1}of{2} CRITICAL========".format(service, False_Count, REPLICA_COUNT))
                for inum in result[service][False_Count]:
                   for inumkey, hosts in inum.iteritems():
                       for host in hosts.iteritems():
                           if not ( host[0].split(":")[0] in overall_list) and str(host[1]['has-halted-shards_ammx']) == "False":
                               overall_list.append(host[0].split(":")[0])
                           print("inum {1}, Data: {2}".format(service, inumkey, host))

    print('So we found {0} failed hosts.'.format(len(overall_list)))
    print ' +'.join(overall_list)

def get_juggler_errors(result):
    crit_flag = 0
    warn_flag = 0
    warnings = {}
    criticals = {}
    for service, service_data in result.iteritems():
        warnings[service] = []
        criticals[service] = []
        for False_Count in range(REPLICA_COUNT+1):
            if False_Count == WARNING_REPLICA_COUNT:
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        warnings[service].append(inumkey)
            if False_Count >= CRITICAL_REPLICA_COUNT:
                for inum in result[service][False_Count]:
                    for inumkey, hosts in inum.iteritems():
                        criticals[service].append(inumkey)
        if criticals[service]:
            crit_flag = 1
        elif warnings[service]:
            warn_flag = 1
    return criticals,warnings,crit_flag,warn_flag

def die(check, level, message):
    print 'PASSIVE-CHECK:' + check + ';' + level + ';' + message
    sys.exit(0)

url = BASE_URL+SERVICE_NAME+COMMAND_URL
if options.verbose: print("Getting service {0} config by url {1}".format(SERVICE_NAME, url))
data = make_auth_req(url, OAUTH_TOKEN)
if options.verbose: print("Done")
if options.verbose: print("Parse service config.")
service_dict = parse_json(data)
release=service_dict['runtime_attrs']['content']['instances']['extended_gencfg_groups']['groups'][0]['release']
if options.verbose: print("Done")
if options.verbose: print("Get service {0} searchmap".format(SERVICE_NAME))
searchmap_url = MANUAL_SEARCHMAPURL
if options.verbose: print ("Searchmap_url {0}".format(searchmap_url))
if options.verbose: print("Done")
if options.verbose: print("Parse service {0} searchmap".format(SERVICE_NAME))
searchmap_parsed = get_searchmap_parsed(searchmap_url,release)
with open("test.txt", "w+") as f:
    f.write(json.dumps(searchmap_parsed))
    
if options.verbose: print("Done")
if options.verbose: print("Start async jobs")

uniq_hostlist = generate_uniq_hostlist(searchmap_parsed)
result_dict_host = get_databychunk(uniq_hostlist)
result = get_aggregated_byshard(result_dict_host, searchmap_parsed)

if options.verbose: print_result_verbose(result)

criticals,warnings,crit_flag,warn_flag = get_juggler_errors(result)

crit_string=""
for k, v in criticals.iteritems():
    crit_string += " Service: {0} Count: {1} Inumlist: {2}".format(k, len(v), ', '.join(v))

warn_string=""
for k, v in warnings.iteritems():
    warn_string += " Service: {0} Count: {1} Inumlist: {2}".format(k, len(v), ', '.join(v))

if crit_flag:
    die('dsearch-lucene-shard-check-halted', '2', "3of4 FAILED. {0}".format(crit_string))
elif warn_flag:
    die('dsearch-lucene-shard-check-halted', '1', "2of4 FAILED. {0}".format(warn_string))
else:
    die('dsearch-lucene-shard-check-halted', '0', "OK")

