#!/usr/bin/env python
# -*- coding: utf-8 -*-

import yt.wrapper as yt
import sys
import argparse
import datetime

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='hahn.yt.yandex.net:80', required=False)
    return parser

def retention(key,recs):
    yuid = key['key_uid']
    mindate = datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
    maxdate = datetime.datetime.fromtimestamp(0).replace(hour=0, minute=0, second=0, microsecond=0)
    alldates = []
    for rec in recs:
        this = datetime.datetime.fromtimestamp(rec['subkey_unixtime']).replace(hour=0, minute=0, second=0, microsecond=0)
        if this <= mindate:
            mindate = this
        if this >= maxdate:
            maxdate = this
        if not this in alldates:
            alldates.append(this)
    alldates.sort()
    max_ret = 0
    for i in range(len(alldates)):
        if i == len(alldates) - 1:
            max_ret = i
            continue
        if alldates[i+1] - alldates[i] > datetime.timedelta(1):
            break
        else:
            max_ret += 1
    yield {'yuid' : yuid,
           'rolling' : (maxdate - mindate).days,
           'retention' : max_ret}

def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})
    input = '//home/freshness/staff/itajn/FR-2386/yuids_timestamp'
    yt.run_sort(source_table = input,
                destination_table = input,
                sort_by = 'key_uid'
                )
    yt.run_reduce(retention,
                  source_table = input,
                  destination_table = input + '_processed',
                  reduce_by = 'key_uid',
                  )
    result = yt.read_table(input + '_processed')
    days_rolling = {'0' : 0}
    days_straight = {'0' : 0}
    for r in result:
        days_rolling['0'] += 1
        days_straight['0'] += 1
        if str(r['rolling']) in days_rolling:
            days_rolling[str(r['rolling'])] += 1
        else:
            days_rolling[str(r['rolling'])] = 1
        if str(r['retention']) in days_straight:
            days_straight[str(r['retention'])] += 1
        else:
            days_straight[str(r['retention'])] = 1
    print days_rolling
    print days_straight

if __name__ == '__main__':
    main()
