import yt.wrapper as yt
import json
from datetime import date, datetime, timedelta
import argparse
import requests
from collections import defaultdict, Counter

########################################################################################################################

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', default='2018-01-11',  help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', default='2018-01-11', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict", default='/home/galamaj/blockstat.dict', required=False)
    parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    parser.add_argument('--output', default='output', help='output date')
    # parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    # parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates

########################################################################################################################

def count_actions(key, rows):
    requests = 0
    voice_req = 0
    history = 0
    zero_suggest = 0
    used = 0
    queries = []
    word = 0
    SUGGEST_SHOWN =[]
    history_pos = []
    actions = []       #only 2 kinds of elements is possible 'suggest' if was a SUGGEST action and 'request' for SEARCH_REQUEST action and they must be consecutive in time
    error = 0
    uuid = None
    for row in rows:
        try:
            if row['app_id'] != 'ru.yandex.searchplugin':
                continue
            uuid = row['uuid']
            params = row['params']
            app_version = row['app_version']
            if params is None:
                continue
            if row['action'] == 'UI_ACTION' and 'possible_screens' in params.keys():
                if params['possible_screens'] == ['SUGGEST'] and params['ui_path'] != 'voice search' and 'original_path' not in params.keys():
                    actions.append('suggest')
                    if params['ui_path'] == 'suggest/HISTORY':
                        history += 1
                    if params['ui_path'] == 'suggest/WORD':
                        word += 1
                    if params['ui_path'] == 'zero_suggest':
                        zero_suggest += 1
                if params['possible_screens'] == ['SUGGEST'] and params['ui_path'].find('history/') != -1:
                    history_pos.append(params['ui_path'].split('/')[-1])
            if row['action'] == 'SUGGEST_SHOWN':
                if params['request_time'] > 0:
                    SUGGEST_SHOWN.append(params['request_time'])
            if row['action'] == 'SEARCH_REQUEST':
                actions.append('request')
                queries.append(params['query'])
                if params['querySource'] == 'DIALOG':
                    voice_req += 1
        except KeyError:
            error += 1
            continue
    for i, act in enumerate(actions):
        if act == 'request':
            requests += 1
            if i != 0 and actions[i-1] == 'suggest':
                used += 1
    if uuid is not None:
        yield {'actions_list':actions,
           'all_requests':requests,
           'requests_used':used,
           'word':word,
           'history':history,
           'history_pos':history_pos,
           'zero_suggest':zero_suggest,
           'times':SUGGEST_SHOWN,
           'voice_requests':voice_req,
           'queries':queries,
           'app_version':app_version,
           'KeyError':error
           }

########################################################################################################################

def average(measures):
    avg = 0.
    n = 0.
    for item in measures:
        if item is not None:
            avg += (item - avg)/(n + 1)
            n += 1
    return avg

def median(lst):
    lst = sorted(lst)
    if len(lst) < 1:
            return None
    if len(lst) %2 == 1:
            return lst[((len(lst)+1)/2)-1]
    else:
            return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0

########################################################################################################################

def used_metrics(date):
    inputTable = '//home/mobilesearch/analytics/app_sessions/clean/' + date
    outputTable = '//home/suggest-dev/galamaj/analytics/searchapp/metrics/preparates/' + date
    outputTable2 = '//home/suggest-dev/galamaj/analytics/searchapp/metrics/by_versions/' + date
    yt.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    yt.config["proxy"]["url"] = "hahn.yt.yandex.net"
    yt.config["pool"] = 'robot-suggestor-dev'
    yt.create("table", path = outputTable, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable2, recursive = True, ignore_existing = True, attributes = None)
    with yt.TempTable('//home/suggest-dev/galamaj/analytics/searchapp', prefix='logs_') as tmpTable:
        yt.run_sort(inputTable, destination_table=tmpTable, sort_by=['uuid', 'app_session_id', 'timestamp'])
        yt.run_reduce(count_actions, tmpTable, outputTable, reduce_by=['uuid','app_session_id'], spec = {'data_size_per_job': 16000000000})
        yt.run_sort(outputTable, sort_by=['app_version'])
        yt.run_reduce(used_by_versions, outputTable, outputTable2, reduce_by=['app_version'], spec = {'data_size_per_job': 16000000000})
        history_positions = []
        times_shown_suggest = []
        result = {'total_requests':0, 'used_requests':0, 'word_clicks':0, 'history_clicks':0, 'zero_suggest_clicks':0, 'voice_requests':0}
        for row in yt.read_table(outputTable):
            result['total_requests'] += int(row['all_requests'])
            result['used_requests'] += int(row['requests_used'])
            result['word_clicks'] += int(row['word'])
            result['history_clicks'] += int(row['history'])
            history_positions += row['history_pos']
            times_shown_suggest += row['times']
            result['zero_suggest_clicks'] += int(row['zero_suggest'])
            result['voice_requests'] += int(row['voice_requests'])
        positions = Counter(history_positions)
        #print result
        d = {}
        data = []
        pos10 = 0
        d['fielddate'] = date
        d['app_version'] = "\tall_users\t"
        for i in positions:
            try:
                if int(i) >= 10:
                    pos10 += float(positions[i])
                else:
                    k = 'history' + i
                    value = float(positions[i])/(result['total_requests'] - result['voice_requests'])
                    d[k] = str(value)
            except ValueError:
                continue
        if result['total_requests'] > result['voice_requests']:
            d['history10'] = str(pos10/(result['total_requests'] - result['voice_requests']))
        else:
            d['history10'] = '0'
        d['median_suggest_shown'] = str(median(times_shown_suggest))
        d['avg_suggest_shown'] = str(sum(times_shown_suggest)/len(times_shown_suggest))
        for key in result.keys():
            d[key] = str(result[key])
        for key in d.keys():
            if d[key] == "None":
                del d[key]
        data.append(d)
        print data
        return data

########################################################################################################################

def used_by_versions(key, rows):
    history_positions = []
    times_shown_suggest = []
    result = {'total_requests':0, 'used_requests':0, 'word_clicks':0, 'history_clicks':0, 'zero_suggest_clicks':0, 'voice_requests':0}
    for row in rows:
        app_ver = row['app_version']
        result['total_requests'] += int(row['all_requests'])
        result['used_requests'] += int(row['requests_used'])
        result['word_clicks'] += int(row['word'])
        result['history_clicks'] += int(row['history'])
        history_positions += row['history_pos']
        times_shown_suggest += row['times']
        result['zero_suggest_clicks'] += int(row['zero_suggest'])
        result['voice_requests'] += int(row['voice_requests'])
    positions = Counter(history_positions)
    #print result
    d = {}
    pos10 = 0
    for i in positions:
        try:
            if int(i) >= 10:
                pos10 += float(positions[i])
            else:
                k = 'history' + i
                if result['total_requests'] > result['voice_requests']:
                    value = float(positions[i])/(result['total_requests'] - result['voice_requests'])
                else:
                    value = 0
                d[k] = str(value)
        except ValueError:
            continue
    if result['total_requests'] > result['voice_requests']:
        d['history10'] = str(pos10/(result['total_requests'] - result['voice_requests']))
    else:
        d['history10'] = '0'
    d['median_suggest_shown'] = str(median(times_shown_suggest))
    if len(times_shown_suggest) != 0:
        d['avg_suggest_shown'] = str(sum(times_shown_suggest)/len(times_shown_suggest))
    else:
        d['avg_suggest_shown'] = '0'
    d['app_version'] = app_ver
    for key in result.keys():
        d[key] = str(result[key])
    yield d

########################################################################################################################

def read_data(date):
    inputTable = '//home/suggest-dev/galamaj/analytics/searchapp/metrics/by_versions/' + date
    yt.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    print "%s ->  %s" % (str(datetime.now()), inputTable)
    data = []
    for row in yt.read_table(inputTable):
        # if row['total_queries'] < 1000:
        #     continue
        d = {}
        d['fielddate'] = date
        d['app_version'] = "\t" + row['app_version'] + "\t"
        for key in row.keys():
            if key == 'app_version':
                continue
            d[key] = str(row[key])
        for key in d.keys():
            if d[key] == "None":
                del d[key]
        data.append(d)
    return data


def post_to_stat(data, scale='d', beta = False, _append_mode=1):
    stat_token = open('/home/galamaj/.statistica/token','r').read().rstrip('\n')

    '''data is dict of values :
    data = [
                {'fielddate': '2015-02-03', 'project': 'Portal', 'name': '400', 'field1': 99},
                {'fielddate': '2015-02-03', 'project': 'Morda', 'name': '400', 'field1': 30},
                {'fielddate': '2015-02-04', 'project': 'Portal', 'name': '400', 'field1': 19},
                {'fielddate': '2015-02-04', 'project': 'Morda', 'name': '400', 'field1': 102},
            ]
    '''
    print data
    name = '/Adhoc/galamaj/searchapp_suggest_metrics/used_metrics_from_searchapp_preparates'
    r = requests.post(
        'https://upload.stat.yandex-team.ru/_api/report/data',
        headers = {
            'Authorization' : 'OAuth ' + stat_token
        },
        data={
            'name': name,
            'scale': scale,
            'json_data': json.dumps({'values': data}),
            '_append_mode':_append_mode
        },
    )
    print r.status_code
    print r.text


########################################################################################################################


if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    for date in dates:
        print date
        data = used_metrics(date)
        post_to_stat(data)
        data2 = read_data(date)
        post_to_stat(data2)
    print "Done"

