#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import ast
import collections
import datetime
from itertools import repeat, izip
import multiprocessing
import urlparse

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record,
)
import nile
import pandas as pd
import yt.wrapper as yt
import logging
logging.basicConfig(format='[%(asctime)s] %(filename)s[LINE:%(lineno)d] %(levelname)-8s %(message)s',
                    level=logging.ERROR)

with open('b_list', 'r') as b_list:
    BLAND_BLOCKS = [x for x in b_list.read().split('\n')]

ONE_HOT_LIST = [
    ['day_time', [
        'night',
        'evening',
        'afternoon',
        'morning',
        'early_morning',
    ]],
    ['week_day', [str(i) for i in range(1, 8)]],
]


def matrixnet_prepare(features_dict, path, label):
    result_value = '{}\t{}\t{}'.format(label, path, 1)
    for block in BLAND_BLOCKS:
        features_dict['block_' + block] = int(path == block)
    for one_key, value_list in ONE_HOT_LIST:
        for one_value in value_list:
            features_dict[one_key + '_' + str(one_value)] = int(features_dict[one_key] == one_value)
    for feature in FEATURES:
        feat_value = 0
        if feature in features_dict:
            feat_value = features_dict[feature]
        if feat_value is None:
            feat_value = -100
        result_value += '\t{}'.format(feat_value)
    return result_value


def label_threshold(label, r, p0, p1):
    if label == 0:
        if r <= p0:
            return True
        else:
            return False
    else:
        if r <= p1:
            return True
        else:
            return False


def argument_parser():
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument(
        '-p',
        dest='pool',
        type=str,
        help='you hahn pool',
    )
    parser.add_argument(
        '-i',
        dest='itable',
        type=str,
        help='input table',
    )
    parser.add_argument(
        '-r',
        dest='rtable',
        type=str,
        help='result table',
    )
    parser.add_argument(
        '-l',
        dest='label',
        type=str,
        help='label name',
    )
    parser.add_argument(
        '-rh',
        dest='randomhash',
        type=str,
        help='column for random hash',
    )
    parser.add_argument(
        '-p0',
        dest='prob0',
        type=float,
        help='probabilistic threshold for label 0',
    )
    parser.add_argument(
        '-p1',
        dest='prob1',
        type=float,
        help='probabilistic threshold for label 1',
    )
    args = parser.parse_args()
    return args


with open('f_list', 'r') as f_list:
    FEATURES = [x for x in f_list.read().split('\n') if x[0] != '#']


def main():
    args = argument_parser()
    itable = args.itable
    rtable = args.rtable
    label_name = args.label
    random_hash = args.randomhash
    prob_threshold0 = args.prob0
    prob_threshold1 = args.prob1

    job_name = 'HOME MATRIXNET PREPARE: {} feat'.format(len(FEATURES))

    cluster = clusters.Hahn(pool=args.pool)
    job = cluster.job(job_name)
    train_table = job.table(itable)
    result = train_table.project(
        ne.all(),
        random_p=ne.custom(lambda x: (hash(x) % 1000000) / 1000000., random_hash),
        p_tresh0=ne.const(prob_threshold0),
        p_tresh1=ne.const(prob_threshold1),
    ).filter(
        nf.custom(
            lambda l, r, p0, p1: label_threshold(l, r, p0, p1),
            label_name,
            'random_p',
            'p_tresh0',
            'p_tresh1'
        )
    ).project(
        key=ne.custom(
            lambda r, p, c, rs: '{}_{}_{}_{}'.format(r, p, c, rs),
            'reqid',
            'path',
            'true_num_clicks',
            'realshow',
        ),
        subkey='subkey',
        value=ne.custom(
            matrixnet_prepare,
            'features_dict',
            'path',
            label_name
        )
    ).put(rtable)
    job.run()

if __name__ == "__main__":
    main()

