import yt.wrapper as yt
import json
from datetime import date, datetime, timedelta
import argparse
import requests
from collections import defaultdict, Counter
from qb2.api.v1 import QB2, extractors as qe, filters as qf, resources as qr
from nile.api.v1 import (
    clusters,
    Record,
    aggregators as na
)

########################################################################################################################

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', default='2018-01-11',  help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', default='2018-01-11', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict", default='/home/galamaj/blockstat.dict', required=False)
    parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    parser.add_argument('--output', default='output', help='output date')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates


def extract_time(str):
    data = str.split('\",\"')
    for item in data:
        if item.find("time") != -1:
            result = int(item.split('\":\"')[-1].rstrip('\"}'))
    return result

def extract_type(str):
    data = str.split('\",\"')
    for item in data:
        if item.find("type") != -1:
            result = item.split('\":\"')[-1].rstrip('\"}')
    return result

########################################################################################################################

def parse_event(EventValue):
    try:
        event = json.loads(EventValue)
        return event
    except ValueError:
        pass

def get_event_name(event_value_parsed):
    return event_value_parsed['type'] if event_value_parsed.get('type') else event_value_parsed['scarab:type'] if  event_value_parsed.get('scarab:type') else None

def get_sessionid(event_value_parsed):
    return event_value_parsed['mobileSessionId'] if event_value_parsed.get('mobileSessionId') else None

def get_process(event_value_parsed):
    return event_value_parsed['process'] if event_value_parsed.get('process') else None

def get_time(event_value_parsed):
    return event_value_parsed['timestamp'] if event_value_parsed.get('timestamp') else None

def get_exprt(event_value_parsed):
    if event_value_parsed.get('configTestBuckets'):
            return event_value_parsed['configTestBuckets']
    else:
        return None

########################################################################################################################

def parse_log(date):
    #cluster = clusters.Hahn().env(templates=dict(job_root='home/suggest-dev/galamaj/suggest_metrics/searchapp/parsed_' + date))
    cluster = clusters.Hahn(pool='robot-suggestor-dev').env(templates=dict(job_root='home/suggest-dev/galamaj/suggest_metrics/searchapp/preparates/' + date))
    job = cluster.job()

    log = job.table('statbox/metrika-mobile-log/' + date)
    #log = job.table('//logs/metrika-mobile-log/30min/2018-01-15T19:00:00')

    records = log.qb2(
        log='metrika-mobile-log',
        fields = [
            'device_id',
            'uuid',
            'session_type',
            'event_date',
            'api_key',
            'app_version',
            'session_start_date',
            'event_timestamp',
            'raw_event_value',
            'device_type',
            'geo_id',
            qe.custom('event_value_parsed', lambda raw_event_value: parse_event(raw_event_value)),
            qe.custom('mobileSessionId', get_sessionid, 'event_value_parsed').allow_override(),
            qe.custom('timestamp', get_time, 'event_value_parsed').allow_override(),
            qe.custom('event_name', get_event_name, 'event_value_parsed').allow_override(),
            qe.custom('event_process', get_process, 'event_value_parsed').allow_override(),
            qe.custom('configTestBuckets', get_exprt, 'event_value_parsed').allow_override()
        ],
        filters = [
            qf.equals('device_type', 'PHONE'),
            qf.equals('api_key', 10321),
            qf.compare('app_version', '>=', '5.20'),
            qf.compare('event_process', '!=', 'APPLAUNCH'),
            qf.compare('event_process', '!=', 'DATASYNC'),
            qf.custom(lambda a: a and a.upper() == a,'event_name'),
            qf.one_of('event_name', [
                'TIMING_EVENT',
                'PREFETCH_COMMIT_EVENT',
                'MOBILE_HISTORY_OPENED_EVENT',
                'EXPERIMENT_CONFIG_RECEIVED_EVENT',
                'experiment_config_received',
                'SUGGEST_EVENT'])
        ]

    )

    print type(records)

    prep = records.put('$job_root')
    job.run()
    print "Parsing is finished"

########################################################################################################################

def average(measures):
    avg = 0.
    n = 0.
    for item in measures:
        if item > 300000:
            continue
        if item is not None:
            avg += (item - avg)/(n + 1)
            n += 1
    return avg

def median(lst):
    lst = sorted(lst)
    if len(lst) < 1:
            return None
    if len(lst) %2 == 1:
            return lst[((len(lst)+1)/2)-1]
    else:
            return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0

########################################################################################################################

def reduce_event_times(key, rows):
    ts_start = None
    ts_end = None
    starts = []
    ends = []
    error = 0
    test_id = None
    times = []
    for row in rows:
        if row['mobileSessionId'] is None:
            continue
        uuid = row['uuid']
        app_version = row["app_version"]
        sessionid = row['mobileSessionId']
        # if row['configTestBuckets'] is not None:
        #     test_id = row['configTestBuckets']
        try:
            if row['event_name'] == 'TIMING_EVENT' and row['event_value_parsed']['process'] == 'SUGGEST' and row['event_value_parsed']['stage'] == 'REQUEST_STARTED':
                starts.append(int(row['event_value_parsed']['time']))
            if row['event_name'] == 'TIMING_EVENT' and row['event_value_parsed']['process'] == 'SUGGEST' and row['event_value_parsed']['stage'] == 'REQUEST_FINISHED':
                ends.append(int(row['event_value_parsed']['time']))
            if row['event_name'] == 'TIMING_EVENT' and row['event_value_parsed']['process'] == 'SUGGEST' and row['event_value_parsed']['stage'] == 'REQUEST_ERROR':
                ends.append(int(row['event_value_parsed']['time']))
                error +=1
        except KeyError:
            continue
    if len(starts) != 0 and len(ends) != 0:
        ends = sorted(ends)
        starts = sorted(starts)
        input_time = ends[-1] - starts[0]
        for i in range(len(starts)):
            if i >= len(ends):
                break
            time = ends[i] - starts[i]
            if 0 < time < 300000:
                times.append(time)
        if 0 < input_time < 600000 and len(times) != 0:
            yield {'uuid':uuid,
                #'test_id':test_id,
                'app_version':app_version,
                'mobileSessionId':sessionid,
                'input_time':input_time,
                'errors':error,
                'starts':starts,
                'ends':ends,
                'times':times}

def request_started_finished_metrics(key, rows):
    req_times = []
    input_times = []
    errors = 0
    total = 0
    for row in rows:
        app_version = row["app_version"]
        # if row['configTestBuckets'] is not None:
        #     test_id = row['configTestBuckets']
        req_times.append(row["input_time"])
        input_times += row["times"]
        errors += row["errors"]
    avg_request_times= average(input_times)
    median_request_times = median(input_times)
    avg_input_time = average(req_times)
    median_input_time = median(req_times)
    yield {"app_version":app_version,
           #'test_id':test_id,
           "avg_start_finish_suggest_request_time":avg_request_times,
           "avg_input_time_by_suggest_requests":avg_input_time,
           "median_start_finish_suggest_request_time":median_request_times,
           "median_input_time_by_suggest_requests":median_input_time,
           "errors_in_suggest_requests":errors}


########################################################################################################################

def used_metrics(key, rows):
    test_id = None
    actions = []
    requests = 0
    used = 0
    voice_query = 0
    result = {"MOBILE_HISTORY_OPENED_EVENT":0, "PREFETCH_COMMIT_EVENT":0, "SUGGEST_EVENT":0}
    instant_suggest = {"WORD":0, "FULLTEXT":0, "SEARCH_HISTORY":0, "FACT":0, "NAV":0, "INSERT":0, "SHOWN":0, "CLICK": 0}
    sdk_suggest = {"SUGGEST_EVENT":0, "WORD":0, "FULLTEXT":0, "SEARCH_HISTORY":0, "FACT":0, "NAV":0, "INSERT":0, "SHOWN":0, "CLICK":0}
    for row in rows:
        uuid = row['uuid']
        event_name = row['event_name']
        app_version = row['app_version']
        # if row['configTestBuckets'] is not None:
        #     test_id = row['configTestBuckets']
        if event_name in result.keys():
            result[event_name] += 1
        try:
            if event_name == 'TIMING_EVENT' and row['event_value_parsed']['process'] == "SPEECHKIT":
                if len(actions) == 0:
                    actions.append('speechkit')
                if actions[-1] == 'speechkit':
                    continue
                else:
                    actions.append('speechkit')
            if event_name == 'SUGGEST_EVENT':
                actionType = row['event_value_parsed']['suggestActionType']
                sourceType = row['event_value_parsed']['suggestSourceType']
                suggestType = row['event_value_parsed']['suggestType']
                if suggestType == "SUGGEST_SDK":
                    sdk_suggest['SUGGEST_EVENT'] += 1
                    sdk_suggest[actionType] += 1
                    sdk_suggest[sourceType] += 1
                else:
                    instant_suggest[sourceType] += 1
                    instant_suggest[actionType] += 1
                if actionType == "CLICK" or sourceType == "SEARCH_HISTORY":
                    actions.append('suggest')
            if event_name == "PREFETCH_COMMIT_EVENT":
                actions.append("request")
        except KeyError:
            continue
    sdk = {}
    for key in sdk_suggest.keys():
        sdk[key + "_SDK"] =sdk_suggest[key]
    result.update(instant_suggest)
    result.update(sdk)
    for i, act in enumerate(actions):
        if act == 'request':
            requests += 1
            if i != 0 and actions[i-1] == 'suggest':
                used += 1
            if i != 0 and actions[i-1] == 'speechkit':
                voice_query += 1
    if uuid is not None:
        result["uuid"] = uuid
        result["app_version"] = app_version
        #result["test_id"] = test_id
        result["requests"] = requests
        result["used"] = used
        result["voice_requests"] = voice_query
    yield result

'''
"suggestActionType": "INSERT",
    "suggestSourceType": "SEARCH_HISTORY"
"suggestType": "SUGGEST"

"suggestActionType": "SHOWN",
    "suggestSourceType": "FACT",
    "suggestType": "INSTANT",

"suggestActionType": "SHOWN",
    "suggestSourceType": "NAV",
    "suggestType": "INSTANT",

"suggestActionType": "CLICK",
    "suggestSourceType": "WORD",
    "suggestType": "INSTANT"

"suggestActionType": "CLICK",
    "suggestSourceType": "WORD",
    "suggestType": "SUGGEST_SDK"

"suggestActionType": "CLICK",
    "suggestSourceType": "FULLTEXT",
    "suggestType": "SUGGEST_SDK" '''

########################################################################################################################

def time_till_first_request_started(key, rows):
    test_id = None
    ts_opened = 0
    ts_start = 0
    times = []
    for row in rows:
        uuid = row['uuid']
        app_version = row["app_version"]
        # if row['configTestBuckets'] is not None:
        #     test_id = row['configTestBuckets']
        if row['timestamp'] is None:
            continue
        try:
            if row['event_name'] == 'MOBILE_HISTORY_OPENED_EVENT':
                ts_opened = int(row['timestamp'])
            if row['event_name'] == 'TIMING_EVENT' and row['event_value_parsed']['process'] == 'SUGGEST' and row['event_value_parsed']['stage'] == 'REQUEST_STARTED':
                ts_start = int(row['timestamp'])
            if ts_opened != 0 and ts_start != 0:
                time = ts_start - ts_opened
                if 0 < time < 60000:
                    times.append(time)
                ts_opened = 0
            ts_start = 0
        except (KeyError, ValueError):
            continue
    if len(times) != 0:
        yield {'uuid': uuid,
            'app_version':app_version,
            #'test_id':test_id,
            'time_from_opened_till_request_started':times}


########################################################################################################################

def time_till_prefetch(key, rows):
    test_id = None
    ts_opened = 0
    ts_start = 0
    ts_prefetch = 0
    times = []
    query_times = []
    for row in rows:
        uuid = row['uuid']
        app_version = row["app_version"]
        # if row['configTestBuckets'] is not None:
        #     test_id = row['configTestBuckets']
        if row['timestamp'] is None:
            continue
        try:
            if row['event_name'] == 'MOBILE_HISTORY_OPENED_EVENT':
                ts_opened = int(row['timestamp'])
            if row['event_name'] == 'PREFETCH_COMMIT_EVENT':
                ts_prefetch = (int(row['timestamp']))
            if ts_opened != 0 and ts_prefetch != 0:
                query_time = ts_prefetch - ts_opened
                if 0 < query_time < 300000:
                    query_times.append(query_time)
                ts_opened = 0
            ts_prefetch = 0
        except (KeyError, ValueError):
            continue
    if len(query_times) != 0:
        yield {'uuid': uuid,
            'app_version':app_version,
            #'test_id':test_id,
            'time_from_opened_till_prefetch':query_times}

########################################################################################################################

def spread_test_id(key, rows):
    test_id = None
    for row in rows:
        if row['configTestBuckets'] is not None:
            test_id = row['configTestBuckets']
        row['configTestBuckets'] = test_id
        yield row

########################################################################################################################

def times_by_version(key, rows):
    times_till_req = []
    times_till_prefetch = []
    for row in rows:
        app_version = row["app_version"]
        if "time_from_opened_till_request_started" in row:
            times_till_req += row["time_from_opened_till_request_started"]
        if "time_from_opened_till_prefetch" in row:
            times_till_prefetch += row["time_from_opened_till_prefetch"]
    avg_time_till_req = average(times_till_req)
    avg_time_till_prefetch = average(times_till_prefetch)
    median_time_till_req = median(times_till_req)
    median_time_till_prefetch = median(times_till_prefetch)
    yield {"app_version":app_version,
           "avg_time_till_request_started":avg_time_till_req,
           "avg_time_till_prefetch":avg_time_till_prefetch,
           "median_time_till_request_started":median_time_till_req,
           "median_time_till_prefetch":median_time_till_prefetch
           }

########################################################################################################################

def used_by_versions(key, rows):
    result = {"MOBILE_HISTORY_OPENED_EVENT":0, "PREFETCH_COMMIT_EVENT":0, "SUGGEST_EVENT":0, "WORD":0, "FULLTEXT":0, "SEARCH_HISTORY":0, "FACT":0, "NAV":0, "INSERT":0, "SHOWN":0, "CLICK": 0,
              "SUGGEST_EVENT_SDK":0, "WORD_SDK":0, "FULLTEXT_SDK":0, "SEARCH_HISTORY_SDK":0, "FACT_SDK":0, "NAV_SDK":0, "INSERT_SDK":0, "SHOWN_SDK":0, "CLICK_SDK":0,
              "used":0, "requests":0, "voice_requests":0}
    for row in rows:
        app_ver = row['app_version']
        for key in row.keys():
            if key == "uuid" or key == "app_version" or key == "test_id":
                continue
            result[key] += int(row[key])
    result['app_version'] = app_ver
    yield result

########################################################################################################################

def read_data(TableName):
    inputTable = TableName
    yt.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    print "%s ->  %s" % (str(datetime.now()), inputTable)
    data = []
    for row in yt.read_table(inputTable):
        # if row['total_queries'] < 1000:
        #     continue
        d = {}
        d['fielddate'] = date
        d['app_version'] = "\t" + row['app_version'] + "\t"
        for key in row.keys():
            if key == 'app_version':
                continue
            d[key.lower()] = str(row[key])
        for key in d.keys():
            if d[key] == "None":
                del d[key]
        data.append(d)
    return data


def post_to_stat(data, scale='d', beta = False, _append_mode=1):
    stat_token = open('/home/galamaj/.statistica/token','r').read().rstrip('\n')
    '''data is dict of values :
    data = [
                {'fielddate': '2015-02-03', 'project': 'Portal', 'name': '400', 'field1': 99},
                {'fielddate': '2015-02-03', 'project': 'Morda', 'name': '400', 'field1': 30},
                {'fielddate': '2015-02-04', 'project': 'Portal', 'name': '400', 'field1': 19},
                {'fielddate': '2015-02-04', 'project': 'Morda', 'name': '400', 'field1': 102},
            ]
    '''
    print data
    name = '/Adhoc/galamaj/searchapp_suggest_metrics/used_and_clicks'
    r = requests.post(
        'https://upload.stat.yandex-team.ru/_api/report/data',
        headers = {
            'Authorization' : 'OAuth ' + stat_token
        },
        data={
            'name': name,
            'scale': scale,
            'json_data': json.dumps({'values': data}),
            '_append_mode':_append_mode
        },
    )
    print r.status_code
    return r.text

def post_to_stat2(data, scale='d', beta = False, _append_mode=1):
    stat_token = open('/home/galamaj/.statistica/token','r').read().rstrip('\n')
    '''data is dict of values :
    data = [
                {'fielddate': '2015-02-03', 'project': 'Portal', 'name': '400', 'field1': 99},
                {'fielddate': '2015-02-03', 'project': 'Morda', 'name': '400', 'field1': 30},
                {'fielddate': '2015-02-04', 'project': 'Portal', 'name': '400', 'field1': 19},
                {'fielddate': '2015-02-04', 'project': 'Morda', 'name': '400', 'field1': 102},
            ]
    '''
    print data
    name = '/Adhoc/galamaj/searchapp_suggest_metrics/suggest_times_metrics'
    r = requests.post(
        'https://upload.stat.yandex-team.ru/_api/report/data',
        headers = {
            'Authorization' : 'OAuth ' + stat_token
        },
        data={
            'name': name,
            'scale': scale,
            'json_data': json.dumps({'values': data}),
            '_append_mode':_append_mode
        },
    )
    print r.status_code
    return r.text

########################################################################################################################

def total_metrics(date):
    inputTable1 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/times_pref_' + date
    inputTable2 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/times_start_' + date
    inputTable3 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/start_fin_times_' + date
    inputTable4 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/used/by_version_' + date
    inputTable5 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/joined_times_' + date
    yt.config["pool"] = 'robot-suggestor-dev'
    result = {}
    req_times = []
    input_times = []
    times_till_req = []
    times_till_prefetch = []
    errors = 0
    data = []
    for row in yt.read_table(inputTable4):
        for key in row.keys():
            if key == "uuid" or key == "app_version" or key == "test_id":
                continue
            if key not in result:
                result[key] = 0
            result[key] += int(row[key])
    for row in yt.read_table(inputTable3):
        req_times.append(row["input_time"])
        input_times += row["times"]
        errors += row["errors"]
    result["avg_start_finish_suggest_request_time"] = average(input_times)
    result["median_start_finish_suggest_request_time"] = median(input_times)
    result["avg_input_time_by_suggest_requests"] = average(req_times)
    result["median_input_time_by_suggest_requests"] = median(req_times)
    result["errors_in_suggest_requests"] = errors
    yt.run_merge([inputTable1, inputTable2], inputTable5)
    for row in yt.read_table(inputTable5):
        if "time_from_opened_till_request_started" in row:
            times_till_req += row["time_from_opened_till_request_started"]
        if "time_from_opened_till_prefetch" in row:
            times_till_prefetch += row["time_from_opened_till_prefetch"]
    result["avg_time_till_request_started"] = average(times_till_req)
    result["avg_time_till_prefetch"] = average(times_till_prefetch)
    result["median_time_till_request_started"] = median(times_till_req)
    result["median_time_till_prefetch"] = median(times_till_prefetch)
    d = {}
    d['fielddate'] = date
    d['app_version'] = "\tall_versions\t"
    for key in result.keys():
        if key == 'app_version':
            continue
        d[key.lower()] = str(result[key])
    for key in d.keys():
        if d[key] == "None":
            del d[key]
    data.append(d)
    return data

########################################################################################################################

def join_results(key, rows):
    result = {}
    for k in key.keys():
        result[k] = key[k]
    for row in rows:
        for k in row:
            result[k] = row[k]
    yield result

########################################################################################################################

def times_metrics(date):
    inputTable = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/preparates/' + date
    #outputTable = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/with_test_ids_' + date
    outputTable1 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/times_pref_' + date
    outputTable2 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/times_start_' + date
    outputTable3 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/input_times_metrics_' + date
    outputTable4 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/start_fin_times_' + date
    outputTable5 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/times/start_fin_times_by_version_' + date
    outputTable6 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/used/' + date
    outputTable7 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/used/by_version_' + date
    outputTable8 = '//home/suggest-dev/galamaj/suggest_metrics/searchapp/suggest_metrics/' + date
    yt.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    yt.config["proxy"]["url"] = "hahn.yt.yandex.net"
    yt.config["pool"] = "robot-suggestor-dev"
    yt.create("table", path = outputTable1, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable2, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable3, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable4, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable5, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable6, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable7, recursive = True, ignore_existing = True, attributes = None)
    yt.create("table", path = outputTable8, recursive = True, ignore_existing = True, attributes = None)
    print "%s   %s -> %s -> %s -> %s" % (str(datetime.now()), inputTable, outputTable3, outputTable5, outputTable7)
    #yt.run_sort(inputTable, sort_by=['uuid','event_name'])
    #yt.run_reduce(spread_test_id, inputTable, outputTable, reduce_by=['uuid'], spec = {'data_size_per_job': 16000000000})#, format = yt.JsonFormat())
    yt.run_sort(inputTable, sort_by=['uuid', 'mobileSessionId'])
    yt.run_reduce(reduce_event_times, inputTable, outputTable4, reduce_by=['uuid','mobileSessionId'], spec = {'data_size_per_job': 16000000000})#, format = yt.JsonFormat())
    yt.run_sort(outputTable4, sort_by=['app_version'])
    yt.run_reduce(request_started_finished_metrics, outputTable4, outputTable5, reduce_by=['app_version'], spec = {'data_size_per_job': 16000000000})
    print "%s -> %s -> %s" %(inputTable, outputTable6, outputTable7)
    yt.run_sort(inputTable, sort_by=['uuid', 'timestamp'])
    yt.run_reduce(used_metrics, inputTable, outputTable6, reduce_by=['uuid'], spec = {'data_size_per_job': 16000000000})#, format = yt.JsonFormat())
    yt.run_sort(outputTable6, sort_by=['app_version'])
    yt.run_reduce(used_by_versions, outputTable6, outputTable7, reduce_by=['app_version'], spec = {'data_size_per_job': 16000000000})#, format = yt.JsonFormat())
    print "%s -> %s -> %s -> %s" %(inputTable, outputTable1, outputTable2, outputTable3)
    yt.run_reduce(time_till_prefetch, inputTable, outputTable1, reduce_by=['uuid'], spec = {'data_size_per_job': 16000000000})
    yt.run_reduce(time_till_first_request_started, inputTable, outputTable2, reduce_by=['uuid'], spec = {'data_size_per_job': 16000000000})
    yt.run_sort(outputTable1, sort_by=['app_version'])
    yt.run_sort(outputTable2, sort_by=['app_version'])
    yt.run_reduce(times_by_version, [outputTable1,outputTable2], outputTable3, reduce_by=['app_version'])#, spec = {'data_size_per_job': 16000000000})
    yt.remove(inputTable)
    yt.run_sort(outputTable3, sort_by=['app_version'])
    yt.run_sort(outputTable5, sort_by=['app_version'])
    yt.run_sort(outputTable7, sort_by=['app_version'])
    yt.run_reduce(join_results, [outputTable3, outputTable5, outputTable7], outputTable8, sort_by=['app_version'], reduce_by=['app_version'])
    return outputTable8

########################################################################################################################

if __name__ == '__main__':
    args = parse_args()
    result = open(args.output, 'w')
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    for date in dates:
        print date
        parse_log(date)
        table = times_metrics(date)
        data = read_data(table)
        post_to_stat(data)
        data_all = total_metrics(date)
        post_to_stat(data_all)
    print "Done"

