import yt.wrapper

import threading
import sys
import urlparse
import re
import urllib
import urllib2
import math
import time
import datetime
from datetime import date, datetime, timedelta
import argparse

def mapper(row):
    try:
        ip = row["country_by_ip"]
        input_time = float(row["since_first_change"])
        ratio = map(float, row["ratio"].split("."))
        #len_query = row["query_length"]
        actions = ratio[-1]
        if row["service"].find("_touch") != -1 and (ip == 225 or ip == 187 or ip == 149 or ip == 159 or ip == 983 or ip == 171 or ip == 170 or ip == 209 or ip == 168 or ip == 167 or ip == 169 or ip == 208 or ip == 207):
            if 0 < input_time < 600000 and 0 < row["query_length"] < 121 and row["text"] != row["prev_query"] and row["text"] != '' and row["text"] != ' ':
                if row["text"].endswith(' '):
                    len_query = row["query_length"] - 1
                else:
                    len_query = row["query_length"]
                atps = input_time/len_query        #avg_time_per_symbol
                atpa = input_time/actions           #avg_time_per_action
                yield {'ip' : ip,
                       'avg_time_per_symbol' : atps,
                       'avg_time_per_action' : atpa,
                       'time' : input_time,
                       'path' : row["path"],
                       'query_length' : len_query}
    except:
        pass

def average(measures):
    avg = 0.
    n = 0.
    for item in measures:
        avg += (item - avg)/(n + 1)
        n += 1
    return avg

def count_metrics(key, recs):
    atps = []
    atps_m = []
    atps_s = []
    atps_used = []
    atps_m_used = []
    atps_s_used = []
    atpa = []
    atpa_m = []
    atpa_s = []
    atpa_used = []
    atpa_m_used = []
    atpa_s_used = []
    times = []
    times_m = []
    times_s = []
    times = []
    length = []
    length_s = []
    length_m = []
    length_used = []
    length_s_used = []
    length_m_used = []
    times_m_used = []
    times_s_used = []
    for rec in recs:
        ip = rec["ip"]
        atps.append(rec['avg_time_per_symbol'])
        atpa.append(rec['avg_time_per_action'])
        times.append(rec['time'])
        length.append(rec['query_length'])
        if rec["path"].find("serp_") != -1:
            atps_s.append(rec['avg_time_per_symbol'])
            atpa_s.append(rec['avg_time_per_action'])
            times.append(rec['time'])
            length_s.append(rec['query_length'])
        if rec["path"].find("morda_") != -1:
            atps_m.append(rec['avg_time_per_symbol'])
            atpa_m.append(rec['avg_time_per_action'])
            times.append(rec['time'])
            length_m.append(rec['query_length'])
        if rec["path"].find("touch.mouse.p") != -1 or rec["path"].find("touch.tpah.p") != -1 or rec["path"].find("touch.keyboard.p") != -1:
            atps_used.append(rec['avg_time_per_symbol'])
            atpa_used.append(rec['avg_time_per_action'])
            times.append(rec['time'])
            length_used.append(rec['query_length'])
            if rec["path"].find("serp_") != -1:
                atps_s_used.append(rec['avg_time_per_symbol'])
                atpa_s_used.append(rec['avg_time_per_action'])
                times.append(rec['time'])
                length_s_used.append(rec['query_length'])
            if rec["path"].find("morda_") != -1:
                atps_m_used.append(rec['avg_time_per_symbol'])
                atpa_m_used.append(rec['avg_time_per_action'])
                times.append(rec['time'])
                length_m_used.append(rec['query_length'])
    avrg_time_per_symb = average(atps)
    avrg_time_per_act = average(atpa)
    serp_avrg_time_per_symb = average(atps_s)
    serp_avrg_time_per_act = average(atpa_s)
    morda_avrg_time_per_symb = average(atps_m)
    morda_avrg_time_per_act = average(atpa_m)
    used_avrg_time_per_symb = average(atps_used)
    used_avrg_time_per_act = average(atpa_used)
    used_serp_avrg_time_per_symb = average(atps_s_used)
    used_serp_avrg_time_per_act = average(atpa_s_used)
    used_morda_avrg_time_per_symb = average(atps_m_used)
    used_morda_avrg_time_per_act = average(atpa_m_used)
    avrg_length = average(length)
    serp_avrg_length = average(length_s)
    morda_avrg_length = average(length_m)
    used_avrg_length = average(length_used)
    used_serp_avrg_length = average(length_s_used)
    used_morda_avrg_length = average(length_m_used)
    yield {'ip' : ip,
           'avrg_time_per_symb_by_query' : avrg_time_per_symb,
           'avrg_time_per_act_by_query' : avrg_time_per_act,
           'serp_avrg_time_per_symb_by_query' : serp_avrg_time_per_symb,
           'serp_avrg_time_per_act_by_query' : serp_avrg_time_per_act,
           'morda_avrg_time_per_symb_by_query' : morda_avrg_time_per_symb,
           'morda_avrg_time_per_act_by_query' : morda_avrg_time_per_act,
           'used_avrg_time_per_symb_by_query' : used_avrg_time_per_symb,
           'used_avrg_time_per_act_by_query' : used_avrg_time_per_act,
           'used_serp_avrg_time_per_symb_by_query' : used_serp_avrg_time_per_symb,
           'used_serp_avrg_time_per_act_by_query' : used_serp_avrg_time_per_act,
           'used_morda_avrg_time_per_symb_by_query' : used_morda_avrg_time_per_symb,
           'used_morda_avrg_time_per_act_by_query' : used_morda_avrg_time_per_act,
           'average_query_length' : avrg_length,
           'serp_average_query_length' : serp_avrg_length,
           'morda_average_query_length' : morda_avrg_length,
           'used_average_query_length' : used_avrg_length,
           'used_serp_average_query_length' : used_serp_avrg_length,
           'used_morda_average_query_length' : used_morda_avrg_length
           }


def get_country_name(ip):
    country = ""
    if ip == 225:
        country = "Ru"
        return country
    if ip == 187:
        country = "Ua"
        return country
    if ip == 149:
        country = "By"
        return country
    if ip == 159:
        country = "Kz"
        return country
    if ip == 983:
        country = "Tr"
        return country
    if ip == 171:
        country = "Uz"
        return country
    if ip == 170:
        country = "Tm"
        return country
    if ip == 209:
        country = "Tj"
        return country
    if ip == 168:
        country = "Am"
        return country
    if ip == 167:
        country = "Az"
        return country
    if ip == 169:
        country = "Ge"
        return country
    if ip == 207:
        country = "Kg"
        return country
    if ip == 208:
        country = "Md"
        return country

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', help='to date for calculation (format: YYYY-MM-DD)')
    # parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    # parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates

def calc(date):
    inputTable = '//home/suggest-dev/suggest_logs/redir-log_preparates/clean/' + date
    outputTable = '//home/suggest-dev/suggest/galamaj/suggest_metrics/time_per_symb/old_' + date
    yt.wrapper.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    yt.wrapper.create("table", path = outputTable, recursive = True, ignore_existing = True, attributes = None)
    outputPath = yt.wrapper.TablePath(outputTable) #, append = True
    print "%s   %s -> %s" % (str(datetime.now()), inputTable, outputTable)
    # yt.wrapper.run_map(mapper, inputTable, outputPath, format = yt.wrapper.JsonFormat())
    yt.wrapper.run_map_reduce(mapper, count_metrics, inputTable, outputPath, reduce_by='ip', format = yt.wrapper.JsonFormat())
    return outputTable


def push_to_razladki(outputTable):
    avgMetric = 0.
    date = str(outputTable).split('/')[-1].split('_')[-1]
    timestamp = time.mktime(time.strptime(date, '%Y-%m-%d'))
    print timestamp
    for row in yt.wrapper.read_table(outputTable, raw = False):
        reg = get_country_name(row["ip"])
        for key in row.keys():
            if key == "ip":
                continue
            if row["ip"] == 225:
                avgName = key
                avgMetrics = row[key]
                razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s&override=1' % ("suggest_metrics_touch", avgName, avgMetrics, timestamp)
                print avgName, avgMetrics
                print urllib2.urlopen(razladkiUrl).read()
            else:
                avgName = reg + "_" + key
                avgMetrics = row[key]
                razladkiUrl = 'http://launcher.razladki.yandex-team.ru/save_new_data/%s?%s=%s&ts=%s&override=1' % ("suggest_metrics_touch_rkubt", avgName, avgMetrics, timestamp)
                print avgName, avgMetrics
                print urllib2.urlopen(razladkiUrl).read()


if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    for date in dates:
        metrics = calc(date)
        push_to_razladki(metrics)
