# -*- coding: utf-8 -*-

import argparse

import yt.wrapper as yt
from yt.wrapper.ypath import ypath_dirname

from map_reduce import run_map_reduce
from prepare_json import prepare_json
from prepare_leveldb import prepare_leveldb
from remove_stale_tables import remove_stale_tables


def main():
    parser = argparse.ArgumentParser(
        description='MR-job for urls by show counters',
    )

    parser.add_argument(
        'command',
        choices=['run-map-reduce', 'prepare-result'],
    )
    parser.add_argument(
        '--input-table',
        dest='input_tables',
        type=str,
        action='append',
    )
    parser.add_argument(
        '--output-table',
        dest='output_table',
        type=str,
        required=True,
    )
    parser.add_argument(
        '--output-absolute',
        dest='output_absolute',
        type=str,
    )
    parser.add_argument(
        '--output-relative',
        dest='output_relative',
        type=str,
    )
    parser.add_argument(
        '--output-queries',
        dest='output_queries',
        type=str,
    )
    parser.add_argument(
        '--max-urls',
        dest='max_urls',
        type=int,
        default=10,
    )
    parser.add_argument(
        '--max-urls-per-tld',
        dest='max_urls_per_tld',
        type=int,
        default=10,
    )
    parser.add_argument(
        '--tlds-coverage',
        dest='tlds_coverage',
        action='store_true',
        default=False,
    )
    parser.add_argument(
        '--data-format',
        dest='data_format',
        type=str,
        choices=[
            'json',
            'leveldb',
        ],
    )
    parser.add_argument(
        '--baobab-counters-path',
        dest='baobab_counters_path',
        type=str
    )
    parser.add_argument(
        '--debug',
        dest='debug_mode',
        action='store_true',
    )

    args = parser.parse_args()

    yt.config['pool'] = 'yframe'
    yt.config['tabular_data_format'] = 'json'
    yt.config['pickling']['use_local_python_in_jobs'] = True
    yt.config['table_writer'] = {'max_row_weight': 128 * 1024 * 1024}

    if args.command == 'run-map-reduce':
        if not args.debug_mode:
            remove_stale_tables(ypath_dirname(args.output_table))

        baobab_counters_list = get_baobab_counters_list(args.baobab_counters_path)
        operation = run_map_reduce(args.max_urls, args.max_urls_per_tld, args.tlds_coverage, args.input_tables, args.output_table, baobab_counters_list)

        # выводим id операции, который подхватывается основным процессом задачи
        print 'yt_operation_for_urls_by_show_counters ', operation.id

    elif args.command == 'prepare-result':
        yt.run_merge(args.output_table, args.output_table, spec={'combine_chunks': True})

        prepare = {
            'json': prepare_json,
            'leveldb': prepare_leveldb,
        }[args.data_format]

        prepare(
            args.output_table,
            args.output_absolute,
            args.output_relative,
            args.output_queries,
        )


def get_baobab_counters_list(file_path):
    if file_path is None:
        return []

    counters = []

    with open(file_path) as content:
        for line in content:
            counters.append(line.strip())

    return counters
