#-*- coding: UTF-8 -*-
import nile
import argparse
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    files as nfi,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
from copy import deepcopy
import urllib
from datetime import datetime as dt, timedelta
import os
import sys
import codecs
import json
from random import random
import hashlib
import requests
import urlparse

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--pool_info', type=str, required=True)
    parser.add_argument('--pool_prefix', type=str, required=True)
    parser.add_argument('--new_factor_slices', type=str, required=True)
    parser.add_argument('--zero_count', type=int, required=True)
    parser.add_argument('--output_directory', type=str, required=True)
    args = parser.parse_args()

    pool_directory = json.load(open(args.pool_info, 'r'))['path'] + "/"
    print pool_directory

    cluster = clusters.yt.Hahn().env(parallel_operations_limit=10,
                                     yt_spec_defaults=dict(
                                         pool_trees=["physical"],
                                         tentative_pool_trees=["cloud"]
                                     ),
                                     templates=dict(
                                         tmp_root='//tmp',
                                         title='CalcOfflinePool'
                                     ))

    if not cluster.driver.exists(args.output_directory):
        cluster.driver.mkdir(args.output_directory)

    output_directory = args.output_directory + "/"

    new_factor_slices = [Record(key="1", subkey="", value=args.new_factor_slices)]
    cluster.driver.write(output_directory + "factor_slices", new_factor_slices)

    for suffix in ['queries', 'ratings']:
        if cluster.driver.exists(output_directory + suffix):
            cluster.driver.remove(output_directory + suffix)
        cluster.driver.copy(pool_directory + suffix, output_directory + suffix)

    add = '\t' + '\t'.join(['0'] * args.zero_count)
    job = cluster.job()
    job.table(pool_directory + "features") \
       .project('key', 'subkey', value=ne.custom(lambda x : x + add, 'value')) \
       .sort('key', 'subkey') \
       .put(output_directory + "features")
    job.run()

if __name__ == '__main__':
    main()
