import yt.wrapper

import json
import sys
import threading
import sys
import urlparse
import urllib2
import datetime
import math
import time
from datetime import date, datetime, timedelta
import argparse

def time_metrics(times):
    summ = 0
    n = 0
    count1 = 0
    count2 = 0
    count3 = 0
    count4 = 0
    count5 = 0
    count6 = 0
    times = map(float, times)
    times = sorted(times)
    if len(times) > 0:
        for time in times:
            if time <= 0:
                continue
            summ += (time - summ)/(n + 1)
            n += 1
            if time <= 5000:
                count1 += 1
            if 5000 < time <= 10000:
                count2 += 1
            if 10000 < time <= 15000:
                count3 +=1
            if 15000 < time <= 20000:
                count4 +=1
            if 20000 < time < 25000:
                count5 += 1
            if time > 25000:
                count6 += 1
        quantile = sum(times[:int(9.5*len(times)/10)])/(9.5*len(times)/10)
        less5 = count1*100.0/len(times)
        less10 = count2*100.0/len(times)
        less15 = count3*100.0/len(times)
        less20 = count4*100.0/len(times)
        less25 = count5*100.0/len(times)
        more25 = count6*100.0/len(times)
        result = ['input_time_average' + '\t' + str(summ), 'average_quantile_95%' + '\t' + str(quantile), 'input_time_less_5_seconds_%' + '\t' + str(less5), 'input_time_5_10_seconds_%' + '\t' + str(less10),  'input_time_10_15_seconds_%' + '\t' + str(less15), 'input_time_15_20_seconds_%' + '\t' + str(less20), 'input_time_20_25_seconds_%' + '\t' + str(less25), 'input_time_more_25_seconds_%' + '\t' + str(more25), '0_5_seconds_%' + '\t' + str(less5), '0_10_seconds_%' + '\t' + str(less5 + less10), '0_15_seconds_%' + '\t' + str(less5 + less10 + less15), '0_20_seconds_%' + '\t' + str(less5 + less10 + less15 + less20), '0_25_seconds_%' + '\t' + str(less5 + less10 + less15 + less20 + less25)]
        return result

def used_metrics(path):
    total = float(len(path))
    not_u = 0
    not_sh = 0
    for item in path:
        if item.find('.not_used.p') != -1:
            not_u += 1
        if item.find('.not_shown.p') != -1:
            not_sh += 1
    try:
        used = (total - not_u - not_sh)/(total - not_sh)*100.0
        result = ['used_%' + '\t' + str(used)]
        return result
    except ZeroDivisionError:
        pass

def query_metrics(queries):
    #total = len(queries)
    word1 = 0
    word2 = 0
    word3 = 0
    word4 = 0
    word5 = 0
    word6 = 0
    word7 = 0
    word8 = 0
    s = 0.
    n = 0.
    cnt_symb = 0.
    queries = map(int, queries)
    for item in queries:
        if item is None:
            continue
        n += 1
        if item == 1:
            word1 += 1
        elif item == 2:
            word2 += 1
        elif item == 3:
            word3 += 1
        elif item == 4:
            word4 += 1
        elif item == 5:
            word5 += 1
        elif item == 6:
            word6 += 1
        elif item == 7:
            word7 += 1
        else:
            word8 += 1
    total = n
    try:
        result = ['total_queries' + '\t' + str(total), '1_word_queries_%' + '\t' + str(word1*100.0/total), '2_word_queries_%' + '\t' + str(word2*100.0/total), '3_word_queries_%' + '\t' + str(word3*100.0/total), '4_word_queries_%' + '\t' + str(word4*100.0/total), '5_word_queries_%' + '\t' + str(word5*100.0/total), '6_word_queries_%' + '\t' + str(word6*100.0/total), '7_word_queries_%' + '\t' + str(word7*100.0/total),'>7_word_queries_%' + '\t' + str(word8*100.0/total)]
        return result
    except ZeroDivisionError:
        pass




def saved_metrics(ratios):
    res = []
    res2 = []
    s = 0.
    n = 0.
    actions = []
    length = []
    cnt = 0.
    for item in ratios:
        try:
            n = map(float, item.split("."))
        except ValueError:
            pass
        except AttributeError:
            pass
        if len(n) < 3:
            continue
        if n[1] == 0:
            continue
        saved = (n[1] - n[0])/n[1]*100
        saved2 = (n[1] - n[0])
        action = n[-1]
        cnt += 1
        res.append(saved)
        res2.append(saved2)
        actions.append(action)
        length.append(n[1])
    try:
        saved_mid = sum(res)/len(res)
        saved_symb = sum(res2)/len(res2)
        average_actions = sum(actions)/cnt
        average_length = sum(length)/cnt
        result = ['average_query_length' + '\t' + str(average_length), 'saved_%' + '\t' + str(saved_mid), 'saved_symbols' + '\t' + str(saved_symb), 'average_actions' + '\t' + str(average_actions)]
        return result
    except ZeroDivisionError:
        pass

def mapper(row):
    try:
        ip = row["country_by_ip"]
        if row["path"].find("_touch.") != -1 and ( ip == 225 or ip == 171 or ip == 170 or ip == 209 or ip == 168 or ip == 167 or ip == 169 or ip == 208 or ip == 207 or ip == 187 or ip == 149 or ip == 159 or ip == 983):
            yield row
    except KeyError:
        yield row
    except AttributeError:
        yield row



def count_metrics(key, records):
    times_m = []
    times_s = []
    times = []
    ratios_m = []
    ratios_s = []
    ratios = []
    queries_m = []
    queries_s = []
    queries = []
    path_m = []
    path_s = []
    path = []
    for record in records:
        try:
            region = record["country_by_ip"]
            query = record["text"]
            if query == '' or query == ' ' or record["since_first_change"] <= 0:
                continue
            words = len(query.rstrip(" ").split(" "))
            times.append(record["since_first_change"])
            ratios.append(record["ratio"])
            queries.append(words)
            path.append(record["path"])
            if record["path"].find("serp_") != -1:
                times_s.append(record["since_first_change"])
                ratios_s.append(record["ratio"])
                queries_s.append(words)
                path_s.append(record["path"])
            if record["path"].find("morda_") != -1:
                times_m.append(record["since_first_change"])
                ratios_m.append(record["ratio"])
                queries_m.append(words)
                path_m.append(record["path"])
        except KeyError:
            pass
        except AttributeError:
            pass
    times_serp = time_metrics(times_s)
    times_morda = time_metrics(times_m)
    times_total = time_metrics(times)
    saved_serp = saved_metrics(ratios_s)
    saved_morda = saved_metrics(ratios_m)
    saved = saved_metrics(ratios)
    queries_serp = query_metrics(queries_s)
    queries_morda = query_metrics(queries_m)
    queries = query_metrics(queries)
    ctr_serp = used_metrics(path_s)
    ctr_morda = used_metrics(path_m)
    ctr = used_metrics(path)

    yield {
        'ip' : region,
        'input_time_serp' : times_serp,
        'input_time_morda' : times_morda,
        'input_time' : times_total,
        'saved_serp' : saved_serp,
        'saved_morda' : saved_morda,
        'saved' : saved,
        'queries_serp' : queries_serp,
        'queries_morda' : queries_morda,
        'queries' : queries,
        'ctr_serp' : ctr_serp,
        'ctr_morda' : ctr_morda,
        'ctr' : ctr}

IP_TO_COUNTRY_NAME_MAP = {
    225: "Ru",
    187: "Ua",
    149: "By",
    159: "Kz",
    983: "Tr",
    171: "Uz",
    170: "Tm",
    209: "Tj",
    168: "Am",
    167: "Az",
    169: "Ge",
    207: "Kg",
    208: "Md"
}


def get_country_name(ip):
    return IP_TO_COUNTRY_NAME_MAP.get(ip)

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', help='to date for calculation (format: YYYY-MM-DD)')
    # parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    # parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates

def calc(date):
    inputTable = '//home/suggest-dev/suggest_logs/redir-log_preparates/raw/' + date
    outputTable = '//home/suggest-dev/galamaj/suggest_metrics/touch/old_' + date
    yt.wrapper.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    yt.wrapper.create("table", path = outputTable, recursive = True, ignore_existing = True, attributes = None)
    outputPath = yt.wrapper.TablePath(outputTable) #, append = True
    print "%s   %s -> %s" % (str(datetime.now()), inputTable, outputTable)
    yt.wrapper.run_map_reduce(mapper, count_metrics, inputTable, outputPath, reduce_by='country_by_ip', format = yt.wrapper.JsonFormat())
    return outputTable

def push_to_razladki(outputTable):
    avgMetric = 0.
    date = str(outputTable).split('/')[-1].split('_')[-1]
    timestamp = time.mktime(time.strptime(date, '%Y-%m-%d'))
    print timestamp
    m = []
    for row in yt.wrapper.read_table(outputTable, raw = False):
        reg = get_country_name(row["ip"])
        for key in row.keys():
            try:
                if type(row[key]) != list:
                    continue
                elif row["ip"] == 225:
                    if key.find("_serp") != -1:
                        for item in row[key]:
                            avgName = "serp_" + item.split("\t")[0]
                            avgMetric = item.split("\t")[-1]
                            razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s&override=1' % ("suggest_metrics_touch", avgName, avgMetric, timestamp)
                            print avgName, avgMetric
                            print urllib2.urlopen(razladkiUrl).read()
                    elif key.find("_morda") != -1:
                        for item in row[key]:
                            avgName = "morda_" + item.split("\t")[0]
                            avgMetric = item.split("\t")[-1]
                            razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s&override=1' % ("suggest_metrics_touch", avgName, avgMetric, timestamp)
                            print avgName, avgMetric
                            print urllib2.urlopen(razladkiUrl).read()
                    else:
                        for item in row[key]:
                            avgName = item.split("\t")[0]
                            avgMetric = item.split("\t")[-1]
                            razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s&override=1' % ("suggest_metrics_touch", avgName, avgMetric, timestamp)
                            print avgName, avgMetric
                            print urllib2.urlopen(razladkiUrl).read()

                else:
                    if type(row[key]) != list:
                        continue
                    if key.find("_serp") != -1:
                        for item in row[key]:
                            avgName = reg + "_serp_" + item.split("\t")[0]
                            avgMetric = item.split("\t")[-1]
                            razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s&override=1' % ("suggest_metrics_touch_rkubt", avgName, avgMetric, timestamp)
                            print avgName, avgMetric
                            print urllib2.urlopen(razladkiUrl).read()
                    if key.find("_morda") != -1:
                        for item in row[key]:
                            avgName = reg + "_morda_" + item.split("\t")[0]
                            avgMetric = item.split("\t")[-1]
                            razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s&override=1' % ("suggest_metrics_touch_rkubt", avgName, avgMetric, timestamp)
                            print avgName, avgMetric
                            print urllib2.urlopen(razladkiUrl).read()
            except:
                time.sleep(300)


if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    for date in dates:
        metrics = calc(date)
        push_to_razladki(metrics)

