#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division

import requests
import sys
import argparse
import datetime
import json
import numpy
import time
from collections import defaultdict

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--file", dest = "file", help = "input with CH data", required = True)
    parser.add_argument("--date", dest = "date", help = "date", required = True)
    parser.add_argument("--out", dest = "out", help = "file for calculated output", required = True)
    return parser


def calc_metrics(userdata):
    variants = defaultdict(int)
    problems = defaultdict(int)
    problems_correct = defaultdict(int)
    st_problems = defaultdict(int)
    st_problems_correct = defaultdict(int)
    not_variant = defaultdict(int)
    not_problem = defaultdict(int)

    for ts in sorted(userdata.keys()):
        if "results_by_variant_id" in userdata[ts]["params"]:
            for id in userdata[ts]["params"]["results_by_variant_id"]:
                subj = str(userdata[ts]["params"]["results_by_variant_id"][id]["subject_id"])
                variants["all"] += 1
                variants[subj] += 1
                problems[subj] += userdata[ts]["params"]["results_by_variant_id"][id]["solved"] + userdata[ts]["params"]["results_by_variant_id"][id]["unsolved"]
                problems_correct[subj] += userdata[ts]["params"]["results_by_variant_id"][id]["solved"]
        if "check_answer_button" in userdata[ts]["params"]:
            for subj in userdata[ts]["params"]["check_answer_button"]["by_subject_id"]:
                st_problems[subj] += 1
                if "true" in userdata[ts]["params"]["check_answer_button"]["by_subject_id"][subj]["by_status"]:
                    st_problems_correct[subj] += 1

    result = {"all" : {"variants_solved": 0, "problems_variants_solved" : 0, "problems_variants_correct" : 0, "problems_standalone_solved" : 0, "problems_standalone_correct" : 0, "activity_solved" : 0, "activity_correct" : 0}}
    subjlist = set(problems.keys() + st_problems.keys())
    for subj in subjlist:
        result[subj] = {}
        result[subj]["variants_solved"] = variants[subj]
        result["all"]["variants_solved"] += variants[subj]
        result[subj]["problems_variants_solved"] = problems[subj]
        result["all"]["problems_variants_solved"] += problems[subj]
        result[subj]["problems_variants_correct"] = problems_correct[subj]
        result["all"]["problems_variants_correct"] += problems_correct[subj]
        result[subj]["problems_variants_rate"] = 0 if not problems[subj] else problems_correct[subj]/problems[subj]
        result[subj]["problems_standalone_solved"] = st_problems[subj]
        result["all"]["problems_standalone_solved"] += st_problems[subj]
        result[subj]["problems_standalone_correct"] = st_problems_correct[subj]
        result["all"]["problems_standalone_correct"] += st_problems_correct[subj]
        result[subj]["problems_standalone_rate"] = 0 if not st_problems[subj] else st_problems_correct[subj]/st_problems[subj]
        result[subj]["activity_solved"] = st_problems[subj] + problems[subj]
        result["all"]["activity_solved"] += st_problems[subj] + problems[subj]
        result[subj]["activity_correct"] = st_problems_correct[subj] + problems_correct[subj]
        result["all"]["activity_correct"] += st_problems_correct[subj] + problems_correct[subj]

    result["all"]["problems_variants_rate"] = 0 if not result["all"]["problems_variants_solved"] else result["all"]["problems_variants_correct"]/result["all"]["problems_variants_solved"]
    result["all"]["problems_standalone_rate"] = 0 if not result["all"]["problems_standalone_solved"] else result["all"]["problems_standalone_correct"]/result["all"]["problems_standalone_solved"]

    return {"all" : result}

def main():
    args = HandleOption().parse_args()

    userdata = {}
    i = 0
    with open(args.file) as input:
        for line in input:
            if i % 10000 == 241:
                print >> sys.stderr, "Processing line %s" %i
            i += 1
            data = line.strip().split("\t")
            user = data[0]
            try:
                params = json.loads(data[1])
            except:
                params = {}
            timestamp = data[3]
            url = data[4]
            ref = data[5]
            if not user in userdata:
                userdata[user] = {}
            userdata[user][timestamp] = {"ref" : ref, "url" : url, "params" : params}

    allmetrics = {"all" : {}} #segments -> subjects -> metrics
    for user in userdata:
        metrics = calc_metrics(userdata[user])
        for label in metrics:
            for subj in metrics[label]:
                if not subj in allmetrics[label]:
                    allmetrics[label][subj] = {}
                for m in metrics[label][subj]:
                    if not m in allmetrics[label][subj]:
                        allmetrics[label][subj][m] = []
                    allmetrics[label][subj][m].append(metrics[label][subj][m])

    output_metrics = []
    for seg in allmetrics:
        for s in allmetrics[seg]:
            out_avg = {"fielddate" : args.date, "segment" : seg, "subject" : s, "calc" : "average"}
            out_med = {"fielddate" : args.date, "segment" : seg, "subject" : s, "calc" : "median"}
            for m in allmetrics[seg][s]:
                out_avg[m] = numpy.average(allmetrics[seg][s][m])
                out_med[m] = numpy.median(allmetrics[seg][s][m])
            output_metrics.append(out_avg)
            output_metrics.append(out_med)

    with open(args.out, "w") as output:
        json.dump(output_metrics, output, indent = 4)

if __name__ == "__main__":
    main()
