import yt.wrapper
import sys
import urlparse
import re
import urllib
import urllib2
import math
import json
import time
import datetime
from datetime import date, datetime, timedelta
import argparse
import ast

def top_func(str_dict):
    if str_dict:
        class_dict = ast.literal_eval(str_dict)
        class_dict.pop('unknown')
        top_class = sorted(class_dict.items(), key=lambda x: x[1])[::-1][:3]
        top_class = [x[0] for x in top_class]
        return top_class
    return []


def mapper(row):
    VERSION = re.compile(r'=1_9_\d')
    VOICE = re.compile(r'voice\.voice_in\.')
    DAYUSE = re.compile(r'\-dayuse=0\,\-')
    vars = {}
    value = row['value']
    key = row['key']
    if VERSION.search(value) and VOICE.search(value) and DAYUSE.search(value):
        ip_uid = key.split('/')
        if len(ip_uid) == 2:
            yuid = ip_uid[-1]
        else:
            yuid = None
        records = value.split('\t')
        for rec in records:
            if rec == "url=":
                vars["url"] = None
            if rec.startswith('vars='):
                items = rec.lstrip("vars=").split(",")
                for item in items:
                    param = item.split("=")[0]
                    val = item.split("=")[1]
                    vars[param] = val
            elif rec.startswith("path"):
                if rec.find(".p0.") != -1:
                    action = rec.split(".p0.")[-1]
                if rec.find(".p1.") != -1:
                    action = rec.split(".p1.")[-1]
                vars["action"] = action
            else:
                data = rec.split("=")
                param = data[0]
                val = data[1]
                vars[param] = val
        yield {
            'yandexuid' : yuid,
            'dayuse' : vars["-dayuse"],
            'ui' : vars["-ui"],
            'voiceid' : vars["-voiceid"],
            'text' : vars["text"],
            'url' : vars["url"],
            'user_input' : vars["user_input"],
            'log' : vars["log"],
            'action' : vars["action"]
            }

def group_by_action(key, records):
    for record in records:



def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--timestamp', help='date timestamp for calculation')
    parser.add_argument('--from_date', default='2017-02-28',  help='from date for calculation (format: YYYY-MM-DD)')
    parser.add_argument('--to_date', default='2017-02-28', help='to date for calculation (format: YYYY-MM-DD)')
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict", default='/home/galamaj/blockstat.dict', required=False)
    # parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    # parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    args = parser.parse_args()
    return args

def get_dates(timestamp, from_date, to_date):
    dates = []
    date_format = '%Y-%m-%d'
    if timestamp:
        date = datetime.fromtimestamp(int(timestamp[:10]))
        dates.append(date.strftime(date_format))
    if from_date and not to_date:
        to_date = datetime.strftime(datetime.now(), date_format)
    if from_date and to_date:
        current_date = datetime.strptime(from_date, date_format)
        while current_date <= datetime.strptime(to_date, date_format):
            dates.append(current_date.strftime(date_format))
            current_date += timedelta(1)
    return dates

def calc(date):
    inputTable = '//home/desktop/searchband/sessions/' + date
    outputTable = '//home/suggest-dev/galamaj/yastroka/' + date
    t1  = ['qu', 'cat' ]
    yt.wrapper.config['memory_limit'] = 100 * 1024 * 1024 * 1024
    yt.wrapper.create("table", path = outputTable, recursive = True, ignore_existing = True, attributes = None)
    outputPath = yt.wrapper.TablePath(outputTable) #, append = True
    print "%s   %s -> %s" % (str(datetime.now()), inputTable, outputTable)
    yt.wrapper.run_map(mapper, inputTable, outputPath, format = yt.wrapper.JsonFormat())
    #yt.wrapper.run_map_reduce(mapper, group_by_action, inputTable, outputPath, reduce_by='dayuse', format = yt.wrapper.JsonFormat())


if __name__ == '__main__':
    args = parse_args()
    dates = get_dates(args.timestamp, args.from_date, args.to_date)
    print dates
    for date in dates:
        calc(date)



