# coding=utf-8
import yt.wrapper as yt
import argparse
from xml.etree import ElementTree
import requests
from datetime import date

template = 'http://calendar.yandex.ru/export/holidays.xml?start_date=2005-01-01&country_id=225&for_yandex=1&out_mode=all&end_date='


def get_holidays():
    host = template + date.today().strftime("%Y-%m-%d")
    response = requests.get(host)
    string_xml = response.content
    tree = ElementTree.fromstring(string_xml)

    index = 0

    days_off = {}
    calendar_of_holidays = {}

    for elem in tree.iter():
        if (elem.tag == 'day'):
            days_off[elem.get('date')] = index
            calendar_of_holidays[elem.get('date')] = int(elem.get('is-holiday'))
            if (elem.get('is-holiday') == '1'):
                index += 1
    return days_off, calendar_of_holidays

days_off, calendar_of_holidays = get_holidays()


def calc_duration_reducer(key, row_iterator, days_off=days_off, calendar_of_holidays=calendar_of_holidays):

    from datetime import datetime, timedelta

    ONE_HOUR = 3600000
    ONE_DAY = 86400000

    def get_date(ts):
        return date.isoformat(datetime.fromtimestamp(ts/1000))

    def start_of_working_day(ts):
        return int(datetime.fromtimestamp(ts/1000).replace(hour=10, minute=0, second=0, microsecond=0).timestamp()*1000)

    def end_of_working_day(ts):
        return int(datetime.fromtimestamp(ts/1000).replace(hour=19, minute=0, second=0, microsecond=0).timestamp()*1000)

    def days_off_between(previous_ts, current_ts):
        return days_off[get_date(current_ts)] - days_off[get_date(previous_ts + ONE_DAY)]

    def is_working_day(date):
        return calendar_of_holidays[date] == 0

    def nearest_working_day(ts):
        formatted_date = get_date(ts)
        while not is_working_day(formatted_date):
            raw_date = date.fromisoformat(formatted_date) + timedelta(days=1)
            formatted_date = date.isoformat(raw_date)
        return formatted_date

    def days_between(previous_ts, current_ts):
        return (datetime.fromtimestamp(current_ts/1000).replace(hour=0, minute=0, second=0) -
                datetime.fromtimestamp(previous_ts/1000).replace(hour=23, minute=59, second=59, microsecond=999999)).days

    def changes_during_working_hours(previous_ts, current_ts):
        return (start_of_working_day(previous_ts) < previous_ts < end_of_working_day(previous_ts) or
                start_of_working_day(current_ts) < current_ts < end_of_working_day(current_ts))

    def calc_duration(previous_ts, current_ts):
        duration = 0
        if (get_date(previous_ts) != get_date(current_ts)):

            if is_working_day(get_date(previous_ts)) and end_of_working_day(previous_ts) > previous_ts:
                duration += end_of_working_day(previous_ts) - max(start_of_working_day(previous_ts), previous_ts)
            if is_working_day(get_date(current_ts)) and start_of_working_day(current_ts) < current_ts:
                duration += min(current_ts, end_of_working_day(current_ts)) - start_of_working_day(current_ts)
            duration += (days_between(previous_ts, current_ts) - days_off_between(previous_ts,
                                                                                  current_ts)) * 9 * ONE_HOUR

        elif changes_during_working_hours(previous_ts, current_ts):
            duration += min(current_ts, end_of_working_day(current_ts)) - max(start_of_working_day(previous_ts), previous_ts)
        return duration

    previous = None

    for current in row_iterator:
        if previous is not None:
            duration = calc_duration(previous['time_stamp'], current['time_stamp'])
            yield {
                'issue': previous['issue'],
                'time_stamp': previous['time_stamp'],
                'assignee': previous['assignee'],
                'assignee_name': previous['assignee_name'],
                'author': previous['author'],
                'categoryType': previous['categoryType'],
                'component': previous['component'],
                'dt': previous['dt'],
                'execution_type': previous['execution_type'],
                'month': previous['month'],
                'old_status': previous['old_status'],
                'priority': previous['priority'],
                'raw_status': previous['raw_status'],
                'status': previous['status'],
                'subtype': previous['subtype'],
                'support_component': previous['support_component'],
                'support_type': previous['support_type'],
                'tags': previous['tags'],
                'ticket_url': previous['ticket_url'],
                'type': previous['type'],
                'week': previous['week'],
                'duration': duration
            }
        previous = current

    yield {
        'issue': previous['issue'],
        'time_stamp': previous['time_stamp'],
        'assignee': previous['assignee'],
        'assignee_name': previous['assignee_name'],
        'author': previous['author'],
        'categoryType': previous['categoryType'],
        'component': previous['component'],
        'dt': previous['dt'],
        'execution_type': previous['execution_type'],
        'month': previous['month'],
        'old_status': previous['old_status'],
        'priority': previous['priority'],
        'raw_status': previous['raw_status'],
        'status': previous['status'],
        'subtype': previous['subtype'],
        'support_component': previous['support_component'],
        'support_type': previous['support_type'],
        'tags': previous['tags'],
        'ticket_url': previous['ticket_url'],
        'type': previous['type'],
        'week': previous['week'],
        'duration': 0
    }


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--yt_token', required=True, help='Hahn Yt token')
    parser.add_argument('--yt_pool', required=False, help='yt_pool')
    parser.add_argument('--source_table', required=True, help='Source table at yt')
    parser.add_argument('--target_table', required=True, help='Result table at yt')

    args = parser.parse_args()
    target_table = args.target_table
    source_table = args.source_table
    yt_token = args.yt_token
    yt_pool = args.yt_pool

    yt.config["proxy"]["url"] = "hahn"
    yt.config["token"] = yt_token
    yt.config["pool"] = yt_pool

    yt.run_reduce(calc_duration_reducer, source_table, target_table, reduce_by=['issue'], spec={"data_size_per_job": 100000})

    return


if __name__ == "__main__":
    main()
