# -*- coding: utf8 -*-

import time
import yt.yson as yson


def datetime2python_datetime(s):
    "(String) -> datetime"
    return time.strptime(s, "%Y-%m-%d %H:%M:%S")


def datetime2unixtimestamp(s):
    "(String?) -> YsonUint64?"
    if not s:
        return None
    
    dt = datetime2python_datetime(s)
    return yson.YsonUint64(time.mktime(dt))


def datetime2date(s):
    "(String?) -> String?"
    if not s:
        return None
    
    dt = datetime2python_datetime(s)
    return time.strftime("%Y-%m-%d", dt)


def get_maybe_bool(val):
    "(String?) -> bool?"
    if not val:
        return None
    else:
        bool(val)


def get_maybe_int(val):
    "(String?) -> int?"
    if not val:
        return None
    else:
        return int(val)
    

def get_maybe_uint(val):
    "(String?) -> YsonUint64?"
    if not val:
        return None
    else:
        return yson.YsonUint64(val)


def get_maybe_float(val):
    "(String?) -> float?"
    if not val:
        return None
    else:
        return float(val)


def parse_dsv(s, val_sep, kv_sep):
    """
    (String?, String, String) -> Dict

    >>> parse_dsv("desktop=false&isMobile=true", "&", "=")
    {'desktop': 'false', 'isMobile': 'true'}
    """
    if not s:
        return dict()

    s = s.split(val_sep)
    s = [x.split(kv_sep, 1) for x in s if x]
    
    return dict([x for x in s if len(x) == 2])


def mapper(rec):
    
    has_action = rec.get("actionid") in {"0", "1"}
    placementid = get_maybe_int(rec.get("placementid"))
    has_placement = placementid > 1
    
    if has_action and has_placement:
        
        new_rec = dict()
        new_rec["timestamp"] = datetime2unixtimestamp(rec.get("timestamp"))
        new_rec["yandexuid"] = get_maybe_uint(rec.get("yandexuid"))
        new_rec["userid"] = get_maybe_uint(rec.get("userid"))
        # 1 - yandexuid, 2 - CryptaID, 3 - AwapsID, 0 - не имеет значение.
        new_rec["user_id_type"] = get_maybe_int(rec.get("user_id_type"))
        # Номер размещения
        new_rec["placementid"] = placementid
        new_rec["adid"] = get_maybe_int(rec.get("adid"))
        new_rec["actionid"] = None if not rec.get("actionid") else rec.get("actionid") in {1, "1", True}
        # Геозона определенная по IP-адресу пользователя ([geo_zone] int) Vlad: Или взятая из куки с tune.yandex.ru.
        new_rec["geo_zone"] = get_maybe_int(rec.get("geo_zone"))
        parsed_parameterstr = parse_dsv(rec.get("parameterstr"), "&", "=")
        isMobile = parsed_parameterstr.get("isMobile")
        new_rec["is_mobile"] = None if isMobile is None else isMobile in {1, "1", "True", "true", "t", "T", True}
        new_rec["rtb_stlm_price"] = get_maybe_int(rec.get("rtb_stlm_price"))
    
        yield new_rec


if __name__ == "__main__":
    
    from email.mime.text import MIMEText
    from subprocess import Popen, PIPE
    import os
    import yt.wrapper as yt
    

    KEEPED_DAYS = 1000 
    OPERATION_WEIGHT = 0.5
    OPERATION_TITLE = "Adidstat (daily update)"
    SOURCE_DIR = "//statbox/awaps-log/"
    TARGET_DIR = "//home/vipplanners/adidstat/"
    YT_PROXY = "hahn.yt.yandex.net"
    with open("/home/n-bar/.yt/token", "r") as src:
        YT_TOKEN = src.readline().strip()

    yt.config.set_proxy(YT_PROXY)
    yt.config.http.TOKEN = YT_TOKEN
    yt.config.PYTHON_DO_NOT_USE_PYC = True
    print 'host', yt.config.http.PROXY
    schema = [
              {'name': 'timestamp',                   'type': 'uint64',  "group": "a"},
              {'name': 'yandexuid',                   'type': 'uint64',  "group": "a"},
              {'name': 'userid',                      'type': 'uint64'},
              {'name': 'user_id_type',                'type': 'int64'},
              {'name': 'placementid',                 'type': 'int64',   "group": "a"},
              {'name': 'adid',                        'type': 'int64'},
              {'name': 'actionid',                    'type': 'boolean', "group": "a"},
              {'name': 'geo_zone',                    'type': 'int64'},
              {'name': 'is_mobile',                   'type': 'boolean'},
              {'name': 'rtb_stlm_price',              'type': 'int64'}
             ]
    schema = yt.yson.YsonList(schema)
    schema.attributes["strict"] = True
    
    
    def send_notification(from_name, send_to, subject, msg):

        msg = MIMEText(msg)
        msg["From"] = from_name
        msg["To"] = send_to
        msg["Subject"] = subject
        p = Popen(["/usr/sbin/sendmail", "-t", "-oi"], stdin=PIPE)
        p.communicate(msg.as_string())

        return True
    

    def clean_account_folder():

        KEEP_DAYS = KEEPED_DAYS
        
        removed_list = sorted(yt.list(TARGET_DIR[:-1]))[:-KEEP_DAYS]

        if removed_list:
            map(lambda date: yt.remove(TARGET_DIR + date), removed_list)
            print "remove: %s" % removed_list
            send_notification("adidstat", "n-bar@yandex-team.ru", "remove: success", "remove: %s" % removed_list)

        return True


    def get_next_table():
        KEEP_DAYS, DELAY_DAYS = KEEPED_DAYS, 2  # 7, 2 by default
        source_list = sorted(yt.list(SOURCE_DIR[:-1]))[-DELAY_DAYS - KEEP_DAYS:-DELAY_DAYS]
        target_list = sorted(yt.list(TARGET_DIR[:-1]))
        source_list = [x for x in source_list if x not in target_list]
        
        if source_list:
            print "planned: %s " % source_list

        return source_list
    
    col = ["timestamp", "yandexuid", "userid", "user_id_type", "actionid", 
           "parameterstr", "placementid", "adid", "geo_zone", "rtb_stlm_price"]
 
    while True:
        
        try:
            clean_account_folder()
        except Exception as err:
            send_notification("adidstat", "n-bar@yandex-team.ru", "Clean: error", "something went wrong:\n%s" % err)
            
        date = get_next_table()

        if not date:
            break

        date = date[0]
        source_table = yt.TablePath(SOURCE_DIR + date, columns=col)
        target_table = yt.TablePath(TARGET_DIR + date)
        

        with yt.Transaction():
            
            yt.create_table(target_table,
                            recursive=True,
                            attributes={"optimize_for": "scan",
                                        "erasure_codec": "lrc_12_2_2",
                                        "compression_codec": "lz4",  # lz4 by default
                                        "schema": schema})
            print "%s - created" % target_table

            # Run transformation
            yt.run_map(mapper,
                       source_table,
                       target_table,
                       spec={"weight": OPERATION_WEIGHT,
                             "title": OPERATION_TITLE,
                             "max_failed_job_count": 25,
                             "data_size_per_job": 2 * 1024 * 1024 * 1024,  # N GB
                             "data_size_per_map_job": 2 * 1024 * 1024 * 1024,  # N GB
                             "map_selectivity_factor": 0.2
                            })
            print "%s - mapped" % date
            yt.run_sort(target_table, #"<schema={0}>{1}".format(yt.yson.dumps(schema), target_table),
                        sort_by=["placementid","adid", "geo_zone", "actionid", "is_mobile", "rtb_stlm_price"],
                        spec={"weight": OPERATION_WEIGHT, "title": OPERATION_TITLE}
                       )
            print "%s - sorted" % date
            yt.run_merge(target_table, 
                         target_table, 
                         spec={"weight": OPERATION_WEIGHT, "title": OPERATION_TITLE, "combine_chunks": True}
                        )
            print "%s - merged" % date
            try:
                row_count = yt.get_attribute(target_table, "row_count")
            except:
                row_count = None

            send_notification("adidstat", "n-bar@yandex-team.ru", "MR: success. %s" % TARGET_DIR, "%s is finished, rows: %s" % (target_table, row_count))
            
    try:
        clean_account_folder()
    except Exception as err:
        send_notification("adidstat", "n-bar@yandex-team.ru", "Clean: error", "something went wrong:\n%s" % err)
