import yt.wrapper as yt
import sys
from hashlib import md5
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record,
    files
)


def add_hash(qid):
    return md5(str(qid)).hexdigest()


def shuffle_pool(src_dir, dst_dir, token):
    if not yt.exists(dst_dir):
        yt.mkdir(dst_dir, recursive=True)

    features_src = '{}/{}'.format(src_dir, 'features')
    features_dst = '{}/{}'.format(dst_dir, 'features')

    queries_src = '{}/{}'.format(src_dir, 'queries')
    queries_dst = '{}/{}'.format(dst_dir, 'queries')

    yt.copy(queries_src, queries_dst)

    cluster = clusters.Hahn(token=token).env(templates=dict(job_root=dst_dir))

    job = cluster.job()

    features = job.table(features_src)\
                        .project(ne.all(), qid_md5=ne.custom(add_hash, 'key'))\
                        .sort('qid_md5')\
                        .project(ne.all(exclude=['qid_md5']))\
                        .put(features_dst)

    job.run()

    return features_dst


def main(*args):
    params, in2, in3, token, any_param, html_file = args

    src_dir = params[0]['src_dir']
    dst_dir = params[0]['dst_dir']

    yt.update_config({"proxy": {"url": "hahn.yt.yandex.net"}, "token": token})

    features_dst = shuffle_pool(src_dir, dst_dir, token)

    return [{'cluster': 'hahn', 'table': features_dst}]
