#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import yt.wrapper as yt
import sys
import argparse
import datetime
import time
import json

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest = "server", help = "mapreduce server", default = 'hahn.yt.yandex.net:80', required = False)
    parser.add_argument("--ts", dest = "timestamp", help = "timestamp", required = True)
    parser.add_argument("--mode", dest = "mode", help = "android or ios", default = 'android', required=False)
    parser.add_argument("--out", dest = "output", help = "output file", required = True)
    return parser

def makeinput(timestamp, mode, days):
    dates = []
    inputtables = []
    this =  datetime.datetime.fromtimestamp(timestamp).date()
    while len(dates) < days:
        dates.append(this.strftime('%Y-%m-%d'))
        this -= datetime.timedelta(1)
    for day in dates:
        input = '//home/freshness/keyboard_logs/' + mode + '/' + day
        if yt.exists(input):
            inputtables.append(input)
    return inputtables

def processuser(key, recs):
    max_timestamp = 0
    min_timestamp = 241000000000
    min_bg = 241000000000
    last_fg = ''
    first_fg = ''
    ev_last = ''
    ev_first = ''
    user = key['DeviceID']
    for r in recs:
        if not 'ParsedParams_Key1' in r:
            r['ParsedParams_Key1'] = ''
        if int(r['StartTimestamp']) < min_bg:
            min_bg = int(r['StartTimestamp'])
        if int(r['StartTimestamp']) > max_timestamp:
            if r['SessionType'] == 'SESSION_FOREGROUND' and r['EventType'] == 'EVENT_CLIENT':
                max_timestamp = int(r['StartTimestamp'])
                last_fg = r['ParsedParams_Key1']
                ev_last = r['EventValue']
        if int(r['StartTimestamp']) < min_timestamp:
            if r['SessionType'] == 'SESSION_FOREGROUND' and r['EventType'] == 'EVENT_CLIENT':
                min_timestamp = int(r['StartTimestamp'])
                first_fg = r['ParsedParams_Key1']
                ev_first = r['EventValue']

    yield {'user' : user,
           'first_action' : min_timestamp,
           'first_action_value' : ev_first,
           'first_fg' : first_fg,
           'last_action' : max_timestamp,
           'last_action_value' : ev_last,
           'last_fg' : last_fg,
           'bg_action' : min_bg
           }


def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})

    slices = [1, 7, 14]
    timestamp = int(args.timestamp[:10])
    dates = {}

    with yt.TempTable(prefix = "tmp_freshness") as output:
        input = makeinput(timestamp, args.mode, 100)
        yt.run_reduce(processuser,
                    source_table = input,
                    destination_table = output,
                    reduce_by = 'DeviceID',
                    spec = {'data_size_per_job': 16000000000}#~16GB
                    )

        result = yt.read_table(output)
        for row in result:
            if not row['last_action'] == 0:
                first = datetime.datetime.fromtimestamp(row['first_action'])
                last = datetime.datetime.fromtimestamp(row['last_action'])
            else:
                continue

            first_date = first.strftime('%Y-%m-%d')
            last_date = last.strftime('%Y-%m-%d')
            if first_date in dates:
                dates[first_date]['total'] += 1
            else:
                dates[first_date] = {'total':1}
            for s in slices:
                if last - first >= datetime.timedelta(s):
                    if str(s) in dates[first_date]:
                        dates[first_date][str(s)] += 1
                    else:
                        dates[first_date][str(s)] = 1

    with open(args.output, "w") as out:
        for m in ['fast', 'true']:
            if m == 'fast':
                this = datetime.datetime.fromtimestamp(timestamp).date() - datetime.timedelta(1)
            else:
                this = datetime.datetime.fromtimestamp(timestamp).date() - datetime.timedelta(7)
            for s in slices:
                currnt = (this - datetime.timedelta(s)).strftime('%Y-%m-%d')
                if currnt in dates:
                    if not str(s) in dates[currnt]:
                        dates[currnt][str(s)] = 0
                    out.write('_'.join(['retention', args.mode, m, str(s)+'d']) + '\t' + str(dates[currnt][str(s)] / dates[currnt]['total']) + '\t' + str(int(time.mktime((this - datetime.timedelta(s)).timetuple()))) + '\n')


if __name__ == '__main__':
    main()
