import yt.wrapper as yt
from nile.api.v1 import (
    clusters,
    extractors as ne,
    filters as nf,
    aggregators as na
)
import urlparse


def argument_parser():
    import argparse
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument(
        '-p',
        dest='pool',
        type=str,
        help='you hahn pool',
    )
    parser.add_argument(
        '-o',
        dest='out_file',
        type=str,
        help='path to output file',
    )
    parser.add_argument(
        '-ts',
        dest='timestamp',
        type=int,
        help='timestamp of process start',
    )
    parser.add_argument(
        '-d',
        dest='day_delta',
        type=int,
        help='difference in days between today and needed table',
    )

    args = parser.parse_args()
    return args


def get_needed_date(timestamp, day_delta):
    import datetime
    need_date = (datetime.datetime.fromtimestamp(timestamp) - datetime.timedelta(day_delta)).date()
    return need_date.strftime('%Y%m%d')


def utm_filter(full_request):
    if "utm_trend" in urlparse.parse_qs(full_request):
        return True
    return False


if __name__ == "__main__":
    yt.config['pickling']['enable_modules_compatibility_filter'] = True

    args = argument_parser()
    pool = args.pool
    out_file = args.out_file
    timestamp = args.timestamp
    day_delta = args.day_delta

    if timestamp > 100000000000:
        timestamp = timestamp / 1000

    yt.config.set_proxy(pool)

    table_name = get_needed_date(timestamp, day_delta)

    input_table = '//home/search-functionality/mt_squeeze/v31/{}'.format(table_name)
    tmp_table = '//home/freshness/terminutz/FRESHNESS-2907/{}'.format(table_name)

    cluster = clusters.Hahn()
    job = cluster.job()

    A = job.table(input_table).filter(
        nf.custom(utm_filter, "full_request")
    ).project(
        long_click=ne.custom(lambda t: t >= 120, 'dwell_time_on_service'),
        short_click=ne.custom(lambda t: t < 30, 'dwell_time_on_service'),
        rerequested=ne.custom(lambda req, nq: req and nq != "", 'queryRerequested', 'next_query'),
        closed=ne.custom(lambda req, nq: req and nq == "", 'queryRerequested', 'next_query')
    ).aggregate(
        all_click=na.count(),
        long_click=na.sum('long_click'),
        short_click=na.sum('short_click'),
        rerequested=na.sum('rerequested'),
        closed=na.sum('closed')
    ).put(tmp_table)

    job.run()

    df = cluster.read(tmp_table).as_dataframe()
    with open(out_file, 'w') as out_f:
        out_f.write('all_click\t{}\t{}\n'.format(df['all_click'][0], timestamp))
        out_f.write('long_click\t{}\t{}\n'.format(df['long_click'][0], timestamp))
        out_f.write('short_click\t{}\t{}\n'.format(df['short_click'][0], timestamp))
        out_f.write('rerequested\t{}\t{}\n'.format(df['rerequested'][0], timestamp))
        out_f.write('closed\t{}\t{}\n'.format(df['closed'][0], timestamp))

    yt.remove(tmp_table)

