#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division

import yt.wrapper as yt
import sys
import argparse
import datetime
import time
import json
import copy
import numpy

forbidden_versions = ["1.5.5", "1.5.4", "1.5.3", "1.5.2"]

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='hahn.yt.yandex.net:80', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser

def average(a):
    return float(sum(a)) / len(a)

def processuser(key, recs):
    max_timestamp = 0
    min_timestamp = 241000000000
    min_bg = 241000000000
    last_fg = ''
    first_fg = ''
    ev_last = ''
    ev_first = ''
    user = key['DeviceID']
    for r in recs:
        if r.get("EventValue") and ("Swiping" in r.get("EventValue") or "Typing" in r.get("EventValue")):
            if int(r['StartTimestamp']) < min_timestamp:
                min_timestamp = int(r['StartTimestamp'])
            if int(r['StartTimestamp']) > max_timestamp:
                max_timestamp = int(r['StartTimestamp'])
    if min_timestamp != 241000000000:
        yield {'user' : user,
               'first_action' : min_timestamp,
               'last_action' : max_timestamp,
               }


def user_metrics(key, recs):
    translate = {"press" : "p", "time" : "t", "show" : "s", "suggest" : "su", "longtap" : "l", "autocorrect" : "a", "revert" : "r", "total" : "total", "del" : "d", "autocorrect_num" : "autocorrect_num", "revert_num" : "revert_num", "swipe_time" : "swd", "swipe_show" : "sws", "swipe_cancel" : "swcc", "swipe_del" : "swcl", "swipe_suggest" : "swcsc", "swipe_suggest_symbols" : "swcsl", "all_show" : "all_show", "swipe_num" : "swipe_num"}
    initial = {"total" : 0, "longtap" : 0, "time" : 0, "press" : 0, "autocorrect" : 0, "show" : 0, "del" : 0, "suggest" : 0, "revert" : 0, "autocorrect_num" : 0, "revert_num" : 0}
    swipe_track = {"swipe_num" : 0, "swipe_time" : 0, "swipe_show" : 0, "swipe_cancel" : 0, "swipe_del" : 0, "swipe_suggest" : 0, "swipe_suggest_symbols" : 0, "all_show" : 0}

    user = key["DeviceID"]
    params = {"all" : copy.deepcopy(initial)}
    swipe_params = {"all" : copy.deepcopy(swipe_track)}
    device = ""

    for r in recs:
        if r.get("Manufacturer"):
            if r.get("Manufacturer") == "Fly":
                device = "Fly"
            else:
                device = "other"
        try:
            raw_event = json.loads(r.get("EventValue"))
        except:
            continue
        if not "AppVersionName" in r:
            version = "undefined"
        else:
            version = r["AppVersionName"]
        if version in forbidden_versions:
            continue

        if not version in params:
            params[version] = copy.deepcopy(initial)
            swipe_params[version] = copy.deepcopy(swipe_track)

        if "Swiping" in raw_event:
            swiping = json.loads(raw_event["Swiping"])
            for key in swipe_track:
                key_t = translate[key]
                if key_t in swiping:
                    swipe_params["all"][key] += swiping[key_t]
                    swipe_params[version][key] += swiping[key_t]
            swipe_params["all"]["swipe_num"] += 1
            swipe_params[version]["swipe_num"] += 1

        if "Typing" in raw_event:
            typing = json.loads(raw_event["Typing"])
            for key in initial:
                key_t = translate[key]
                if key_t in typing:
                    params["all"][key] += typing[key_t]
                    params[version][key] += typing[key_t]
                elif key in typing:
                    params["all"][key] += typing[key]
                    params[version][key] += typing[key]
                params["all"]["total"] += 1
                params[version]["total"] += 1
            key_t = translate["autocorrect"]
            if key_t in typing and typing[key_t] > 0:
                params["all"]["autocorrect_num"] += 1
                params[version]["autocorrect_num"] += 1
            key_t = translate["revert"]
            if key_t in typing and typing[key_t] > 0:
                params["all"]["revert_num"] += 1
                params[version]["revert_num"] += 1

    metrics = {}
    print >> sys.stderr, params
    for v in params.keys():
        metrics[v] = {}
        if (params[v]["press"] > 0 or swipe_params[v]["swipe_num"] > 0) and params[v]["press"] < 100000 and (params[v]["time"] > 1000 or swipe_params[v]["swipe_time"] > 0) and (params[v]["show"] > 0 or swipe_params[v]["swipe_show"] > 0) and (params[v]["show"] > params[v]["del"] or swipe_params[v]["swipe_show"] > swipe_params[v]["swipe_del"]):
            filter = True
        else:
            filter = False

        name = "useful_symbols_per_second"
        try:
            value = 1000 * (params[v]["show"] - params[v]["del"]) / params[v]["time"]
            metrics[v][name] = value
            if filter:
                metrics[v][name + "_filter"] = value
        except ZeroDivisionError:
            pass

    metrics = purge(metrics)
    if metrics:
        yield {"user" : user,
               "metrics" : metrics,
               "device" : device
              }


#cleaning up versions with no statistics sent
def purge(metricsdict):
    for i in metricsdict.keys():
     if metricsdict[i] == {}:
        del metricsdict[i]
    return metricsdict


def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})

    slices = [(0,1), (1,7), (7,14), (14,30), (30,90), (90,241000)]

    startdate = datetime.datetime(2017,10,30)
    output = '//home/freshness/staff/itajn/KA-1227/newusers'
    if not yt.exists(output):
        yt.create_table(path = output, recursive = True)
    input = []
    #раскомментировать, если дата начала увеличилась
    # for i in range(180+90):
        # day = (startdate - datetime.timedelta(i)).strftime("%Y-%m-%d")
        # table = '//home/freshness/keyboard_logs/android/' + day
        # if yt.exists(table):
            # input.append(table)
    # yt.run_reduce(processuser,
                  # source_table = input,
                  # destination_table = output,
                  # reduce_by = 'DeviceID'
                  # )
    # yt.run_sort(source_table = output,
                # destination_table = output,
                # sort_by = "user")
    users = {}
    for row in yt.read_table(output):
        users[row["user"]] = datetime.datetime.fromtimestamp(row["first_action"])

    for i in range(180):
        current = startdate - datetime.timedelta(i)
        day = current.strftime("%Y-%m-%d")
        input = '//home/freshness/keyboard_logs/android/' + day
        if True:
            tmptable = '//home/freshness/staff/itajn/KA-1227/' + day
            #пересчёт дней - расскомитить, если появились новые дни
            # if current >= datetime.datetime(2017,7,15) and current < datetime.datetime(2017,8,11):
                # yt.run_reduce(user_metrics,
                              # source_table = input,
                              # destination_table = tmptable,
                              # reduce_by = "DeviceID"
                             # )

            allmetrics = {"Fly" : {}, "other": {}, "all": {}}
            for s in slices:
                allmetrics["Fly"][s] = {}
                allmetrics["other"][s] = {}
                allmetrics["all"][s] = {}
            for row in yt.read_table(tmptable):
                if row.get("device"):
                    d = row["device"]
                else:
                    d = "other"
                for s in slices:
                    if not row["user"] in users:
                        continue
                    if current - users[row["user"]] > datetime.timedelta(s[0]) and current - users[row["user"]] <= datetime.timedelta(s[1]):
                        for v in row["metrics"].keys():
                            if not v in allmetrics[d][s]:
                                allmetrics[d][s][v] = {}
                            if not v in allmetrics["all"][s]:
                                allmetrics["all"][s][v] = {}
                            for mt in row["metrics"][v].keys():
                                if mt in allmetrics[d][s][v]:
                                    allmetrics[d][s][v][mt].append(row["metrics"][v][mt])
                                else:
                                    allmetrics[d][s][v][mt] = [row["metrics"][v][mt]]
                                if mt in allmetrics["all"][s][v]:
                                    allmetrics["all"][s][v][mt].append(row["metrics"][v][mt])
                                else:
                                    allmetrics["all"][s][v][mt] = [row["metrics"][v][mt]]

            for d in allmetrics.keys():
                for s in allmetrics[d].keys():
                    for v in allmetrics[d][s].keys():
                        if v != "all":
                            continue
                        data = {"fielddate": day, "version": v, "slice": s, "device": d}
                        for mt in allmetrics[d][s][v].keys():
                                data[mt] = average(allmetrics[d][s][v][mt])
                                data[mt + "_users"] = len(allmetrics[d][s][v][mt])
                                # data[mt + "_50"] = numpy.percentile(allmetrics[d][s][v][mt], 50)
                                # data[mt + "_90"] = numpy.percentile(allmetrics[d][s][v][mt], 90)
                        print data

if __name__ == '__main__':
    main()
