#!/usr/bin/env python
# -*- coding: utf-8 -*-
# from __future__ import unicode_literals
from __future__ import division
import sys
import os
import argparse
import json

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record
)

from nile.utils.misc import coerce_path
import getpass


class GetHRPath(object):

    def __init__(self, cluster):
        self.cluster = cluster

    def __call__(self, path):
        path = str(
            coerce_path(
                path
            ).eval(**self.cluster.environment.templates)
        )
        if not path.startswith('//'):
            return '//' + path
        return path


get_hr_path = None
username = getpass.getuser()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cluster', default='Hahn')
    parser.add_argument('--in', default='home/videolog/sbr/ybro_video_plays')
    parser.add_argument('--local', action='store_true')
    parser.add_argument('--data_for_join', default='data_for_join.json')
    parser.add_argument('--toloka', default='toloka.json')
    parser.add_argument(
        '--pool', default=username
    )
    args = parser.parse_args()

    cluster = getattr(clusters, args.cluster)(
        pool=args.pool, token=os.environ['YT_TOKEN']
    ).env(
        parallel_operations_limit=10
    )
    get_hr_path = GetHRPath(cluster)

    if not args.local:
        in_ = json.load(open(getattr(args, 'in')))['table']
    else:
        in_ = getattr(args, 'in')

    in_ = get_hr_path(in_)

    if not cluster.driver.exists(in_):
        sys.stderr.write('Input table {} does not exist.\n'.format(
            in_
        ))
        sys.exit(1)

    records = [vars(x) for x in cluster.read(in_)]

    toloka = [
        {'url': x['canon_url']} for x in records
    ]

    data_for_join = {
        x['canon_url']: x for x in records
    }

    json.dump(toloka, open(args.toloka, 'w'), indent=2)
    json.dump(data_for_join, open(args.data_for_join, 'w'), indent=2)



if __name__ == "__main__":
    main()
