import yt.wrapper as yt
import sys
import argparse
import datetime
from datetime import date, datetime, timedelta
import re

########################################################################################################################

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Ya.Stroka metrics DAU calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    parser.add_argument('--experiments', help='Comma separated experiments numbers (format: 12345,67891,35474)')
    parser.add_argument('--output1', default='output_by_days', help='results by days')
    parser.add_argument('--output2', default='output_all_days', help='result all days')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if not from_date and not to_date:
        from_date = datetime.strftime(datetime.now()- timedelta(days=1), date_format)
        to_date = from_date
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates

########################################################################################################################

class StartAction:
    def __init__(self, distr_dict):
        self.distr_dict = distr_dict
        p = self.distr_dict["portal"]
        print len(p)
        s = self.distr_dict["switch"]
        print len(s)
        c = self.distr_dict["commercial"]
        print len(c)

    def __call__(self, row):
        params = {}
        clid = None
        if row['clid'] is not None and "-" in row['clid']:
            clid = row['clid'].split("-")[0]
        else:
            clid = row['clid']
        if clid in self.distr_dict['portal']:
            row['distr_type'] = "portal"
        elif clid in self.distr_dict['switch']:
            row['distr_type'] = "switch"
        elif clid in self.distr_dict['commercial']:
            row['distr_type'] = "commercial"
        else:
            row['distr_type'] = None
        yield row

def parse_websessions(row):
    BUTTON = re.compile(r'\{\"children\"\:\[\{\"ctag\"\:\"button\"\}\]\,\"ctag\"\:\"str\_view\"\}')
    FIELD = re.compile(r'\{\"children\"\:\[\{\"ctag\"\:\"field\"\}\]\,\"ctag\"\:\"str\_view\"\}')
    params = {'yandexuid' : None, 'path' : None, 'ui' : None, 'ver' : None, 'dayuse' : None, 'str_view' : None, 'clid1' : None}
    value = row['value']
    if '/' in row['key']:
        uid = row['key'].split('/')[-1]
    elif '.' not in row['key'] and len(row['key']) > 13:
        uid = row['key']
    else:
        uid = None
    params["yandexuid"] = uid
    for item in value.split("\t"):
        if BUTTON.search(item):
            params['str_view'] = 'button'
        if FIELD.search(item):
            params['str_view'] = 'field'
        if len(re.findall("=", item)) == 1:
            key = item.split("=")[0]
            val = item.split("=")[-1]
            if key in params.keys():
                params[key] = val
        else:
            vars = item.lstrip('vars=').split(",")
            for var in vars:
                key = var.split("=")[0]
                if key.startswith("-"):
                    key = key.lstrip("-")
                    val = var.split("=")[-1]
                    if key in params.keys():
                        params[key] = val
        if params['ver'] is None and params['path'] is not None and params['path'].find('exp.') != -1:
            params['ver'] = params['path'].split(".")[1]
        if params['path'] is not None and params['ui'] is not None:
            yield params

def check_dayuse(dayuse):
    dayuse = int(dayuse)
    result = None
    if dayuse == 0:
        result = '1'
    if dayuse == 1:
        result = '2'
    if dayuse == 2:
        result = '3'
    if dayuse == 3:
        result = '4'
    if dayuse == 4:
        result = '5'
    if 4 < dayuse < 10:
        result = '5-10'
    if 10 <= dayuse < 50:
        result = '11-50'
    if 50 <= dayuse < 100:
        result = '51-100'
    if 100 <= dayuse < 200:
        result = '101-200'
    if dayuse > 200:
        result = '200+'
    return result



def group_by_one_ui(key, recs):
    ACTION = re.compile(r'(voice|searchbar|tablo|text)\.')
    EXP = re.compile(r'exp\.')
    path = []
    uids = []
    str_view = []
    clid1 = []
    dayuse = []
    for rec in recs:
        ui = rec['ui']
        ver = rec['ver']
        uid = rec['yandexuid']
        if uid is None:
            uid = 'none'
        uids.append(uid)
        if rec['str_view'] is not None:
            str_view.append(rec['str_view'])
        if rec['clid1'] is not None:
            clid1.append(rec['clid1'])
        if rec['dayuse'] is not None:
            d = check_dayuse(rec['dayuse'])
            dayuse.append(d)
        if ACTION.search(rec['path']):
            path.append(rec['path'])
        if EXP.search(rec['path']):
            exp = rec['path'].split('.')
            ver = 'exp.' + exp[1]
    uids = list(set(uids))
    if len(uids) == 1:
        yandexuid = uids[0]
    elif len(uids) == 2 and 'none' in uids:
        uids.remove('none')
        yandexuid = uids[0]
    else:
        yandexuid = None
    if yandexuid is not None:
        yield {'ui' : ui,
           'dayuse' : check_param(dayuse),
           'yandexuid' : yandexuid,
           'ver' : ver,
           'str_view' : check_param(str_view),
           'path' : path,
           'clid' : check_param(clid1)}


def check_param(param):
    if len(param) == 0:
        param = None
    else:
        param = param[0]
    return param


# def join_tables(key, recs):
#     ACTION = re.compile(r'(exp|voice|searchbar|tablo|text)\.')
#     path = []
#     dayuse = None
#     version = None
#     str_view = None
#     record = {}
#     for rec in recs:
#         record['yandexuid'] = rec['yandexuid']
#         record['ui'] = rec['ui']
#         if 'distr_type' in rec:
#             record['distr_type'] = rec['distr_type']
#         else:
#             str_view = rec['str_view']
#             version = rec['ver']
#             if 'path' in rec and rec['path'] is not None and ACTION.search(rec['path']):
#                 path.append(rec['path'])
#             if 'dayuse' in rec:
#                 dayuse = rec['dayuse']
#     record['path'] = path
#     record['dayuse'] = dayuse
#     record['ver'] = version
#     record['str_view'] = str_view
#     if record['path'] is not None:
#         yield record




def check_actions(path):
    actions = {'voice': False, 'text': False, 'tablo': False, 'voice_in': False, 'text_in': False, 'tablo_click': False}
    for item in path:
        p = item.split(".")
        if "voice" in p:
            actions['voice'] = True
        if "voice_in" in p:
            actions['voice_in'] = True
        if "text" in p:
            actions['text'] = True
        if "text_in" in p:
            actions['text_in'] = True
        if "tablo" in p:
            actions['tablo'] = True
            if "click" in p:
                actions['tablo_click'] = True
    return actions

def count_dau(key, rows):
    exp = 0.
    counted = {'voice': 0, 'voice_in': 0, 'total': 0, 'only_start': 0, 'at_least_1_action': 0, 'all_actions': 0, 'text': 0, 'text_in': 0, 'tablo': 0,'tablo_click': 0, 'voice_and_text' : 0, 'voice_and_tablo' : 0, 'text_and_tablo' : 0}
    parts = {'voice_part': 0, 'voice_in_part': 0, 'only_start_part': 0, 'at_least_1_action_part': 0, 'all_actions_part': 0, 'text_part': 0, 'text_in_part': 0, 'tablo_part': 0,'tablo_click_part': 0, 'voice_and_text_part' : 0, 'voice_and_tablo_part' : 0, 'text_and_tablo_part' : 0}
    for k in key.keys():
        key_name = k
    for row in rows:
        counted[key_name] = row[key_name]
        row['check'] = None
        if row["yandexuid"] is None and (row["ui"] == '{none}' or row["ui"] is None):
            continue
        counted['total'] += 1
        path = row["path"]
        if path == []:
            counted['only_start'] += 1
        else:
            counted['at_least_1_action'] += 1
            actions = check_actions(path)
            for k, v in actions.items():
                if v == True:
                    counted[k] += 1
            if actions['voice_in'] == True and actions['text_in'] == True and actions['tablo_click'] == False:
                counted['voice_and_text'] +=1
            if actions['voice_in'] == True and actions['tablo_click'] == True and actions['text_in'] == False:
                counted['voice_and_tablo'] += 1
            if actions['text_in'] == True and actions['tablo_click'] == True and actions['voice_in'] == False:
                counted['text_and_tablo'] += 1
            if actions['text_in'] == True and actions['tablo_click'] == True and actions['voice_in'] == True:
                counted['all_actions'] += 1
    total = float(counted['total'])
    for key in counted.keys():
        if key == key_name or key == 'total' or counted[key] is None:
            continue
        else:
            val = float(counted[key])/total
            k = key + '_part'
            parts[k] = val
    counted.update(parts)
    yield counted


def distr_type(date):
    distr_dict = {'portal' : [], 'switch' : [], 'commercial' : []}
    for row in yt.read_file('//statbox/statbox-dict/' + date + '/distr_report'):
        items = row.split("\t")
        type = items[0]
        cid = items[1]
        if type == "ya-self-distribution":
            distr_dict['portal'].append(cid)
        elif type == "ya-switch-distribution":
            distr_dict['switch'].append(cid)
        else:
            distr_dict['commercial'].append(cid)
    return distr_dict

def count_total_dau(input, output):
    counted = {'voice': 0, 'voice_in': 0, 'total': 0, 'only_start': 0, 'at_least_1_action': 0, 'all_actions': 0, 'text': 0, 'text_in': 0, 'tablo': 0,'tablo_click': 0, 'voice_and_text' : 0, 'voice_and_tablo' : 0, 'text_and_tablo' : 0}
    parts = {'voice_part': 0, 'voice_in_part': 0, 'only_start_part': 0, 'at_least_1_action_part': 0, 'all_actions_part': 0, 'text_part': 0, 'text_in_part': 0, 'tablo_part': 0,'tablo_click_part': 0, 'voice_and_text_part' : 0, 'voice_and_tablo_part' : 0, 'text_and_tablo_part' : 0}
    for row in yt.read_table(input):
        row['check'] = None
        if row["yandexuid"] is None and (row["ui"] == '{none}' or row["ui"] is None):
            continue
        counted['total'] += 1
        path = row["path"]
        if path == []:
            counted['only_start'] += 1
        else:
            counted['at_least_1_action'] += 1
            actions = check_actions(path)
            for k, v in actions.items():
                if v == True:
                    counted[k] += 1
            if actions['voice_in'] == True and actions['text_in'] == True and actions['tablo_click'] == False:
                counted['voice_and_text'] +=1
            if actions['voice_in'] == True and actions['tablo_click'] == True and actions['text_in'] == False:
                counted['voice_and_tablo'] += 1
            if actions['text_in'] == True and actions['tablo_click'] == True and actions['voice_in'] == False:
                counted['text_and_tablo'] += 1
            if actions['text_in'] == True and actions['tablo_click'] == True and actions['voice_in'] == True:
                counted['all_actions'] += 1
    total = float(counted['total'])
    for key in counted.keys():
        if key == 'total' or counted[key] is None:
            continue
        else:
            val = float(counted[key])/total
            k = key + '_part'
            parts[k] = val
    counted.update(parts)
    print counted
    yt.write_table(output, [counted], raw=False)




def main(date, distr_dict):
    # access_log = '//statbox/export-access-log/' + date
    websessions = '//home/desktop/searchband/sessions/' + date
    # started = '//home/suggest-dev/galamaj/tmp/yastroka_started_5' + date
    parsed = '//home/suggest-dev/galamaj/tmp/yastroka_parsed_' + date
    joined = '//home/suggest-dev/galamaj/tmp/yastroka_joined_' + date
    final_data = '//home/suggest-dev/galamaj/tmp/yastroka_final_data_' + date
    ver_dau = '//home/suggest-dev/galamaj/tmp/yastroka_dau/ver_' + date
    dayuse_dau = '//home/suggest-dev/galamaj/tmp/yastroka_dau/dayuse_' + date
    distr_dau = '//home/suggest-dev/galamaj/tmp/yastroka_dau/distr_' + date
    strview_dau = '//home/suggest-dev/galamaj/tmp/yastroka_dau/strview_' + date
    total_dau = '//home/suggest-dev/galamaj/tmp/yastroka_dau/total_' + date
    if not yt.exists(ver_dau):
        yt.create_table(path=ver_dau, recursive=True)
    if not yt.exists(dayuse_dau):
        yt.create_table(path=dayuse_dau, recursive=True)
    if not yt.exists(distr_dau):
        yt.create_table(path=distr_dau, recursive=True)
    if not yt.exists(strview_dau):
        yt.create_table(path=strview_dau, recursive=True)
    if not yt.exists(final_data):
        yt.create_table(path=final_data, recursive=True)
    if not yt.exists(total_dau):
        yt.create_table(path=total_dau, recursive=True)
    # yt.run_map(parse_websessions, websessions, parsed)
    # yt.run_sort(parsed, sort_by=['ui','yandexuid'])
    yt.run_reduce(group_by_one_ui, parsed, joined, sort_by = ['ui'], reduce_by = ['ui'])
    yt.run_map(StartAction(distr_dict), joined, final_data)
    yt.run_sort(final_data, sort_by=['ver'])
    yt.run_reduce(count_dau, final_data, ver_dau, sort_by = ['ver'], reduce_by = ['ver'])
    yt.run_sort(final_data, sort_by=['dayuse'])
    yt.run_reduce(count_dau, final_data, dayuse_dau, sort_by = ['dayuse'], reduce_by = ['dayuse'])
    yt.run_sort(final_data, sort_by=['str_view'])
    yt.run_reduce(count_dau, final_data, strview_dau, sort_by = ['str_view'], reduce_by = ['str_view'])
    yt.run_sort(final_data, sort_by=['distr_type'])
    yt.run_reduce(count_dau, final_data, distr_dau, sort_by = ['distr_type'], reduce_by = ['distr_type'])
    count_total_dau(final_data, total_dau)



if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    for date in dates:
        distr_dict = distr_type(date)
        main(date, distr_dict)

