#!/usr/bin/env python2
import yt.wrapper as yt
import argparse, json, sys, urllib2

def get_hitman_process_status(job_id, token):
    url = 'https://hitman.yandex-team.ru/api/v1/execution/{}'.format(job_id)
    req = urllib2.Request(url)
    req.add_header('Content-Type', 'application/json')
    req.add_header('Authorization', 'OAuth {}'.format(token))
    status = None
    try:
        resp = urllib2.urlopen(req)
        data = json.load(resp)
        if 'status' in data:
            status = data['status']
    except:
        pass
    return status


def create_argument_parser():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--input-directory',
        required=True,
    )

    parser.add_argument(
        '--output-directory',
        required=True,
    )

    parser.add_argument(
        '--rows-cutoff',
        type=int,
        required=True,
    )

    parser.add_argument(
        '--cluster',
        required=True,
    )

    parser.add_argument(
        '--output-tables',
        required=True,
    )

    parser.add_argument(
        '--error-tables',
        required=True,
    )

    parser.add_argument(
        '--output-yt-path',
        required=True,
    )

    parser.add_argument(
        '--hitman-token',
        required=True,
    )

    return parser


def main():
    args = create_argument_parser().parse_args()
    yt.config.set_proxy(args.cluster)

    total_rows = 0
    moved_tables = []
    error_tables = []

    tables = yt.list(args.input_directory)
    priority_table_paths = [ (  tuple(tbl.split('___')[:2]) if '___' in tbl else ('0', tbl),
                                '{}/{}'.format(args.input_directory, tbl),
                                '{}/{}'.format(args.output_directory, tbl)
                                ) for tbl in tables ]

    meta_info = []
    for (priority_str, job_id_str), in_path, out_path in priority_table_paths:
        try:
            if job_id_str.endswith('NOHITCHECK'):
                meta_info.append((  int(priority_str),
                                    int(job_id_str.split('_')[0]),
                                    False,
                                    in_path,
                                    out_path,
                                    yt.get_attribute(in_path, 'row_count')
                                    ))
            else:
                meta_info.append((  int(priority_str),
                                    int(job_id_str),
                                    True,
                                    in_path,
                                    out_path,
                                    yt.get_attribute(in_path, 'row_count')
                                    ))
        except:
            print >>sys.stderr, "Error while handling", in_path

    meta_info.sort(key=lambda x: (x[0], -x[1]), reverse=True) # higher priority is better, then lower job_id is better

    if not yt.exists(args.output_directory):
        yt.mkdir(args.output_directory, recursive=True)

    for priority, job_id, check_hitman, in_tbl, out_tbl, row_count in meta_info:
        if check_hitman:
            process_status = get_hitman_process_status(job_id, args.hitman_token)
        else:
            process_status = 'RUNNING'

        if process_status == 'RUNNING':
            print >>sys.stderr, "Moving {} to {}".format(in_tbl, out_tbl)
            yt.move(in_tbl, out_tbl)
            moved_tables.append({   'sourceTable': in_tbl,
                                    'destinationTable': out_tbl
                                })

            total_rows += row_count
            if total_rows >= args.rows_cutoff:
                break
        else:
            print >>sys.stderr, "The process that created {} is no longer awaiting for it, status {}".format(in_tbl, process_status)
            error_tables.append({   'sourceTable': in_tbl,
                                    'status': str(process_status)
                                    })

    with open(args.output_tables, 'w') as out_moved_tables:
        print >>out_moved_tables, json.dumps(moved_tables, indent=4).decode('unicode-escape').encode('utf8')

    with open(args.error_tables, 'w') as out_error_tables:
        print >>out_error_tables, json.dumps(error_tables, indent=4).decode('unicode-escape').encode('utf8')

    with open(args.output_yt_path, 'w') as out_yt_path:
        print >>out_yt_path, json.dumps([{'cluster': args.cluster, 'table': args.output_directory}]).decode('unicode-escape').encode('utf8')


if __name__ == '__main__':
    main()
