#!/usr/bin/env python
# -*- coding: utf-8 -*-

import time
import datetime
from datetime import datetime as dt, date as dd
from urllib import urlencode
import urllib2
from lxml import html as lh, etree as le
import socket
import sys
from json import dumps as json_dumps
import yaml
import xml.etree.ElementTree as ET
from os import path,mkdir,chdir
import logging
from collections import OrderedDict as odict
from functools import wraps

def die(msg, exit_code=1):
    logging.error(msg)
    sys.exit(exit_code)


def retry(ExceptionToCheck, tries=3, delay=3, backoff=2):
    """Retry calling the decorated function using an exponential backoff.
        http://wiki.python.org/moin/PythonDecoratorLibrary#Retry"""
    def deco_retry(f):
        @wraps(f)
        def f_retry(*args, **kwargs):
            mtries, mdelay = tries, delay
            while mtries >= 1:
                try:
                    return f(*args, **kwargs)
                except ExceptionToCheck, e:
                    time.sleep(mdelay)
                    mtries -= 1
                    mdelay *= backoff
                else:
                    return f(*args, **kwargs)
            die(str(e.strerror))
        return f_retry
    return deco_retry


@retry((urllib2.HTTPError), tries=2, delay=20, backoff=2)
def getGolem(start_date, end_date, golem_user, priority, page_format, start_end):
    api_host = "https://golem.yandex-team.ru/events.sbml?"
    #req_fields = ['type','priority','resp','sdate_s','sdate_e','format','show_events_show_only_unreachable','object']
    #myreq = {x:"" for x in req_fields}
    myreq = {}
    myreq["type"] = "history"
    myreq["priority"] = priority
    myreq["resp"] = golem_user
    if start_end == "start":
        myreq["sdate_s"] = start_date
        myreq["sdate_e"] = end_date
    elif start_end == "end":
        myreq["edate_s"] = start_date
        myreq["edate_e"] = end_date
    myreq["format"] = page_format
    myreq["show_events_show_only_unreachable"] = ""
    myreq["object"] = ""
    req_en = urlencode(myreq)
    request = api_host + req_en
    logging.debug("Get " + request)
    response = urllib2.urlopen(request)
    data = response.read()
    return data


def parse_html(data):
    html_events = {}
    root = lh.document_fromstring(data)
    events_table = root.get_element_by_id('events_table')
    for event in events_table.iter('tr'):
        my_event = {}
        event_times = []
        event_id = event.get('data-id')
        if not event_id:
            continue
        my_event.setdefault(event_id)
        for event_time in event.find_class('time'):
            event_time_str = event_time.text_content()
            if event_time_str == '':
                event_time_timestmp = float('inf')
                event_times.append(event_time_timestmp)
                continue
            try:
                event_time_timestmp = time.mktime(dt.strptime(event_time_str, "%Y-%m-%d %H:%M:%S").timetuple())
            except ValueError:
                today = dt.now().date()
                event_time_time = dt.strptime(event_time_str, "%H:%M:%S").time()
                event_time_datetime = dt.combine(today, event_time_time)
                event_time_timestmp = time.mktime(event_time_datetime.timetuple())
            event_times.append(event_time_timestmp)
        my_event[event_id] = event_times
        html_events[event_id] = event_times
    return html_events


def parse_xml(data):
    root = ET.fromstring(data)
    for event in root.iter("event"):
        event_attr = event.attrib
        my_event = {}
        my_event["event_id"] = event_attr.pop("id","")
        my_event["event_start_unixtime"] = int(event_attr.pop("start_time_unixtime"))
        my_event["event_date"] = dd.fromtimestamp(my_event['event_start_unixtime'])
        my_event["has_downtime"] = event_attr.pop("has_downtime", "0")
        my_event["active_responsible"] = event_attr.pop("active_responsible","")
        my_event["seq"] = event_attr.pop("seq","")
        my_event["object_type"] = event_attr.pop("object_type","")
        my_event["event_type_id"] = event_attr.pop("eventtype_id","")
        my_event["object_location"] = event_attr.pop("object_location","")
        my_event["event_up_unixtime"] = event_attr.pop("up_time_unixtime","")
        my_event["event_type_downtime"] = event_attr.pop("eventtype_downtime","0")
        my_event["event_type"] = event_attr.pop("eventtype","")
        my_event["object_id"] = event_attr.pop("object_id","")
        my_event["priority"] = event_attr.pop("priority","")
        my_event["object_name"] = event_attr.pop("object_name","")
        my_event["last_manual_event_history"] = event_attr.pop("last_manual_eventhistory","").replace(u"\u2713", "").replace(u"\u260E", "call ").encode('utf8') #????encode
        my_event["parent_seq"] = event_attr.pop("parent_seq","")
        my_event["object_downtime"] = event_attr.pop("object_downtime","0")
        my_event["has_children"] = event_attr.pop("has_children","0")
        my_event["urgency"] = event_attr.pop("urgency","")
        my_event["etc"] = json_dumps(event_attr)
        yield my_event


#@retry((urllib.HTTPError), tries=2, delay=10, backoff=2)
def writetoCH(event, cl_user, cl_password, cl_host, cl_port, table):
    data = odict(sorted(event.items(), key=lambda t: t[0]))
    columns = []
    values = []
    for column, value in event.items():
        if value == '':
            continue
        columns.append(column)
        values.append(str(value).strip())
    toSend = "\t".join(values)
    chUrlPrefix = 'http://%s:%d/?query=INSERT' % (cl_host,cl_port) + '%20INTO'
    url = '%20'.join((chUrlPrefix, table, '(' + '%2C'.join(columns) + ')', 'FORMAT', 'TabSeparated'))
    try:
        request = urllib2.Request(url, toSend)
        _ = urllib2.urlopen(request, timeout=20).read()
    except urllib2.HTTPError as e:
        logging.error("Failed to send data to ClickHouse! Error: %s. Bad server: %s:%d." % (e.strerror, cl_host, cl_port))
    except Exception as e:
        logging.error("Failed to send data to ClickHouse! Error: %s. Bad server: %s:%d." % (e.strerror, cl_host, cl_port))


def write_new_date(last_date):
    start_today = (dd.today() + datetime.timedelta(days = 1)).strftime("%Y-%m-%d %H:%M:%S")
    with open('golem', 'w') as f:
        f.write(last_date.strftime("%Y-%m-%d %H:%M:%S")+"\n")
        f.write(start_today + "\n")


def get_date_from_file(filename):
    yesterday = dt.now() - datetime.timedelta(days=1)
    yesterday_begin = dt(yesterday.year, yesterday.month, yesterday.day,0,0,0,0)
    now = dt.now()
    today_begin = dt(now.year, now.month, now.day,0,0,0,0)
    try:
        with open(filename, 'r') as f:
            #spane_bordr = [next(f).strip() for x in xrange(2)]
            spane_bordr = []
            for line in f:
                spane_bordr.append(line.strip())
        start_date = dt.strptime(spane_bordr[0], "%Y-%m-%d %H:%M:%S")
        end_date = dt.strptime(spane_bordr[1], "%Y-%m-%d %H:%M:%S")
    except IOError as e:
        logging.error("Can't read time file, use last day")
        start_date = yesterday_begin
        end_date = now
    except (IndexError, ValueError) as e:
        if 'start_date' not in globals():
            logging.error("Can't parse time file")
            start_date = yesterday_begin
        logging.error("Can parse only start date time file")
        end_date = now
    else:
        logging.info("Use spane from %s to %s" % (str(start_date), str(end_date)))
    if dt.today() < start_date:
        die("Can't get info from future ;-) . Exit")
    return (start_date, end_date)


def get_date_from_clickhouse(cl_host, cl_port, table):
    req_host = "http://%s:%d/" % (cl_host, cl_port)
    req_qer = "?query=Select%20toDateTime(max(Timestamp))%20from%20"
    ans = urllib2.urlopen(req_host + req_qer + table).read().strip()
    return (dt.strptime(ans, "%Y-%m-%d %H:%M:%S"), dt.now())

if __name__ == '__main__':
    #start_date = dt.strptime("2016-12-28 00:00:00", "%Y-%m-%d %H:%M:%S")
    #end_date = dt.strptime("2017-01-08 00:05:00", "%Y-%m-%d %H:%M:%S")
    conffile = '/etc/yandex-du-monitoring-count/golem.conf'

    if not path.exists('/var/log/mon-count'):
        mkdir('/var/log/mon-count')
    logfile = '/var/log/mon-count/log.golem.%s' % dt.now().date().strftime("%Y%m")
    if 'debug' in sys.argv[1:]:
        logging.basicConfig(format = '%(levelname)-8s [%(asctime)s] %(message)s',level = logging.DEBUG,stream = sys.stdout)
    else:
        logging.basicConfig(format = '%(levelname)-8s [%(asctime)s] %(message)s',level = logging.DEBUG,filename = logfile)

    logging.info("Try to read config")
    try:
        with open(conffile, 'r') as cfg_file:
            cfg = yaml.load(cfg_file)
    except IOError as e:
        die("Can't open config file %s. Error: %s" % (conffile, e.strerror))
    except yaml.YAMLError as e:
        die("Can't parse config file %s. Error: %s" % (conffile, str(e).replace("\n", " "))) 
    golem_user = cfg["golem_users"]
    priority = cfg["priority"]
    cl_user = cfg["cl_user"]
    cl_password = ''
    cl_port = cfg["cl_port"]
    cl_host = cfg["cl_host"]
    table = cfg["clickhouse_table"].replace("_","%5F").replace(".","%2E")
    workdir = '/var/cache/yandex-du-monitoring-count'
    try:
        chdir(workdir)
    except OSError:
        mkdir(workdir)
        chdir(workdir)
    logging.info("---" * 10 + "Start" + "---" * 10)
    start_date, end_date = get_date_from_clickhouse(cl_host, cl_port, table)
    #start_date, end_date = get_date_from_file('golem')
    logging.info("Start getting info from Golem")
    logging.info("Get xml from Golem")
    xml_events_started = parse_xml(getGolem(start_date, end_date, golem_user, priority, "xml", "start"))
    xml_events_ended = parse_xml(getGolem(start_date, end_date, golem_user, priority, "xml", "end"))
    logging.info("Get html from Golem")
    html_events_started = parse_html(getGolem(start_date, end_date, golem_user, priority, "html", "start"))
    html_events_ended = parse_html(getGolem(start_date, end_date, golem_user, priority, "html", "end"))
    i = 0
    #Стоит ли создавать таблицу?
    """
    create table default.monitoring(event_id UInt32, event_date Date, active_responsible String, event_type String, event_type_downtime Enum8('0' = 0, '1' = 1), event_type_id UInt8, has_children Enum8('0' = 0, '1' = 1), has_downtime Enum8('0' = 0, '1' = 1), last_manual_event_history String, object_downtime Enum8('0' = 0, '1' = 1), object_id UInt32, object_location String, object_name String, object_type String, parent_seq UInt8, priority String, seq UInt8, event_start_unixtime DateTime, event_end_unixtime DateTime, event_up_unixtime DateTime, urgency String, etc String, start_end Enum8('In progress' = 1, 'Finished' = 2), Timestamp UInt32) ENGINE = MergeTree(event_date, (event_id, event_date), 8192)
    """
    write_timestmp = int(time.time())
    logging.info("Writing to clickhouse")
    for xml_event in xml_events_started:
        ev_id = xml_event.get("event_id")
        html_events_started[ev_id].remove(xml_event.get("event_start_unixtime"))
        if html_events_started.get(ev_id)[0] != float('inf'):
            xml_event["event_end_unixtime"] = int(html_events_started.get(ev_id)[0])
            xml_event["start_end"] = "Finished"
        else:
            xml_event["event_end_unixtime"] = ""
            xml_event["start_end"] = "In progress"
        xml_event["Timestamp"] = write_timestmp
        i += 1
        writetoCH(xml_event, cl_user, cl_password, cl_host, cl_port, table)
    logging.info("Writing to clickhouse")
    for xml_event in xml_events_ended:
        ev_id = xml_event.get("event_id")
        html_events_ended[ev_id].remove(xml_event.get("event_start_unixtime"))
        xml_event["event_end_unixtime"] = int(html_events_ended.get(ev_id)[0])
        xml_event["start_end"] = "Finished"
        xml_event["Timestamp"] = write_timestmp
        i += 1
        writetoCH(xml_event, cl_user, cl_password, cl_host, cl_port, table)
    logging.info("write %d rows to clickhouse" % i)
    write_new_date(end_date)
    logging.info("---" * 10 + "End" + "---" * 10)
