import sys
import urlparse
import re
import urllib
import urllib2
import math
from mapreducelib import MapReduce, Record, TemporaryTable
import argparse
import datetime
import time
import json
from os import listdir
from os.path import isfile, join
from datetime import date, datetime, timedelta


def parse_redir(recs):
    params = {}
    REFERER = re.compile(r'HTTP_REFERER=(https?://)(www\.)?yandex\.by')
    PID = re.compile(r'@@dtype=stred@@pid=0@@cid=2873@@')
    try:
        record = recs.value
    except:
        pass
    if REFERER.search(record) and PID.search(record):
        for item in record.split("@@"):
            if item.find('=') != -1:
                param = item.split("=")[0]
                value = item.split("=")[1]
                params[param] = value
        if 'path' in params.keys() and params['path'].find('morda_by_desktop.') != -1:
            for key in params:
                if key == 'since_first_change' or key == 'ratio' or key == 'text' or key == 'path':
                    yield Record(key, "", params[key])

def time_metrics(times):
    summ = 0
    n = 0
    count1 = 0
    count2 = 0
    count3 = 0
    count4 = 0
    count5 = 0
    count6 = 0
    times = map(float, times)
    times = sorted(times)
    if len(times) > 0:
        for time in times:
            summ += (time - summ)/(n + 1)
            n += 1
            if time <= 5000:
                count1 += 1
            if 5000 < time <= 10000:
                count2 += 1
            if 10000 < time <= 15000:
                count3 +=1
            if 15000 < time <= 20000:
                count4 +=1
            if 20000 < time < 25000:
                count5 += 1
            if time > 25000:
                count6 += 1
        quantile = sum(times[:int(9.5*len(times)/10)])/(9.5*len(times)/10)
        less5 = count1*100.0/len(times)
        less10 = count2*100.0/len(times)
        less15 = count3*100.0/len(times)
        less20 = count4*100.0/len(times)
        less25 = count5*100.0/len(times)
        more25 = count6*100.0/len(times)
        result = ['input_time_average' + '\t' + str(summ), 'average_quantile_95%' + '\t' + str(quantile), 'input_time_less_5_seconds_%' + '\t' + str(less5), 'input_time_5_10_seconds_%' + '\t' + str(less10),  'input_time_10_15_seconds_%' + '\t' + str(less15), 'input_time_15_20_seconds_%' + '\t' + str(less20), 'input_time_20_25_seconds_%' + '\t' + str(less25), 'input_time_more_25_seconds_%' + '\t' + str(more25), '0_5_seconds_%' + '\t' + str(less5), '0_10_seconds_%' + '\t' + str(less5 + less10), '0_15_seconds_%' + '\t' + str(less5 + less10 + less15), '0_20_seconds_%' + '\t' + str(less5 + less10 + less15 + less20), '0_25_seconds_%' + '\t' + str(less5 + less10 + less15 + less20 + less25)]
        return result

def used_metrics(path):
    total = float(len(path))
    not_u = 0
    not_sh = 0
    for item in path:
        if item.find('.not_used.p') != -1:
            not_u += 1
        if item.find('.not_shown.p') != -1:
            not_sh += 1
    try:
        used = (total - not_u - not_sh)/(total - not_sh)*100.0
        result = ['USED_%' + '\t' + str(used)]
        return result
    except ZeroDivisionError:
        pass

def query_metrics(queries):
    total = len(queries)
    word1 = 0
    word2 = 0
    word3 = 0
    word4 = 0
    word5 = 0
    word6 = 0
    word7 = 0
    word8 = 0
    s = 0.
    n = 0.
    cnt_symb = 0.
    for item in queries:
        s += (len(item) - s)/(n + 1)
        n += 1
        words = item.rstrip(" ").split(" ")
        if len(words) == 1:
            word1 += 1
        elif len(words) == 2:
            word2 += 1
        elif len(words) == 3:
            word3 += 1
        elif len(words) == 4:
            word4 += 1
        elif len(words) == 5:
            word5 += 1
        elif len(words) == 6:
            word6 += 1
        elif len(words) == 7:
            word7 += 1
        else:
            word8 += 1
    try:
        average = s
        result = ['average_query_length' + '\t' + str(average), 'total_queries' + '\t' + str(total), '1_word_queries_%' + '\t' + str(word1*100.0/total), '2_word_queries_%' + '\t' + str(word2*100.0/total), '3_word_queries_%' + '\t' + str(word3*100.0/total), '4_word_queries_%' + '\t' + str(word4*100.0/total), '5_word_queries_%' + '\t' + str(word5*100.0/total), '6_word_queries_%' + '\t' + str(word6*100.0/total), '7_word_queries_%' + '\t' + str(word7*100.0/total),'>7_word_queries_%' + '\t' + str(word8*100.0/total)]
        return result
    except ZeroDivisionError:
        pass

def saved_metrics(ratios):
    res = []
    res2 = []
    s = 0.
    n = 0.
    actions = []
    cnt = 0.
    for item in ratios:
        try:
            n = map(float, item.split("."))
        except ValueError:
            pass
        if len(n) < 3:
            continue
        if n[1] == 0:
            continue
        saved = (n[1] - n[0])/n[1]*100
        saved2 = (n[1] - n[0])
        action = n[-1]
        cnt += 1
        res.append(saved)
        res2.append(saved2)
        actions.append(action)
    try:
        saved_mid = sum(res)/len(res)
        saved_symb = sum(res2)/len(res2)
        average_actions = sum(actions)/cnt
        result = ['saved_%' + '\t' + str(saved_mid), 'saved_symbols' + '\t' + str(saved_symb), 'average_actions' + '\t' + str(average_actions)]
        return result
    except ZeroDivisionError:
        pass

def all_suggest_metrics(key,recs):
    times = []
    ratios = []
    queries = []
    path = []
    for r in recs:
        if r.key == 'since_first_change':
            time = r.value
            try:
                if int(time) > 0:
                    times.append(time)
            except ValueError:
                pass
        if r.key == 'ratio':
            ratios.append(r.value)
        if r.key == 'path':
            path.append(r.value)
        if r.key == 'text':
            query = urllib.unquote(r.value)
            if query == '' or query == ' ':
                continue
            queries.append(query)
    USED = used_metrics(path)
    SAVED = saved_metrics(ratios)
    QUERIES = query_metrics(queries)
    TIMES = time_metrics(times)
    if type(TIMES) == list:
        for r in TIMES:
            if type(r) == str and '\t' in r:
                r = r.split('\t')
                name = r[0]
                val = r[1]
                yield Record("by_morda_" + str(name), "", str(val))
    if type(USED) == list:
        for r in USED:
            if type(r) == str and '\t' in r:
                r = r.split('\t')
                name = r[0]
                val = r[1]
                yield Record("by_morda_" + str(name), "", str(val))
    if type(SAVED) == list:
        for r in SAVED:
            if type(r) == str and '\t' in r:
                r = r.split('\t')
                name = r[0]
                val = r[1]
                yield Record("by_morda_" + str(name), "", str(val))
    if type(QUERIES) == list:
        for r in QUERIES:
            if type(r) == str and '\t' in r:
                r = r.split('\t')
                name = r[0]
                val = r[1]
                yield Record("by_morda_" + str(name), "", str(val))

if __name__ == '__main__':
    try:
        date_now = datetime.now().date()
        date = str(date_now - timedelta(days=2)).split("-")
        date = "".join(date)
        src_table = 'redir_log/' + date
        dst_table = 'galamaj/suggest_metrics/by/desktop_morda_' + date
    except Exception, e:
        print "Error: %s" % (str(e))
        sys.exit(1)
    MapReduce.useDefaults(verbose=True, mrExec="./mapreduce", server="sakura00.search.yandex.net:8013", username="suggest")
    with TemporaryTable(project="suggest_input_times") as tmpTable:
        MapReduce.runMap(parse_redir, srcTable=src_table, dstTable=tmpTable, files=["blockstat.dict"])
        MapReduce.runReduce(all_suggest_metrics, srcTable=tmpTable, dstTable=dst_table, files=["blockstat.dict"])

        avgMetric = 0.
        for sample in MapReduce.getSample(dst_table, count=None):
            avgMetric = float(sample.value)
            avgName = str(sample.key)
            timestamp = time.mktime(time.strptime(date, '%Y%m%d'))
            razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s' % ("suggest_metrics_desktop", avgName, avgMetric, timestamp)
            print urllib2.urlopen(razladkiUrl).read()
            if avgName == 'by_morda_average_quantile_95%':
                quantile_time = avgMetric
            if avgName == 'by_morda_average_query_length':
                query = avgMetric
        metric = quantile_time/query
        razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s' % ("suggest_metrics_desktop", 'by_morda_avrg_time_per_symb', metric, timestamp)
        print urllib2.urlopen(razladkiUrl).read()
