import argparse
from urlparse import urlparse
from datetime import datetime
from yt import wrapper as yw
from yt.logger import LOGGER as yt_logger


def parse_time(time_str):
    return datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S.%fZ')


def modification_time(node):
    return parse_time(node.attributes['modification_time'])


def request_parser(row):  # mapper
    query = row['corrected_query']
    if not query:
        return
    full_request = row['full_request']
    request_parts = urlparse(full_request)
    handle = request_parts.path.rstrip('/')
    if ':' in handle:
        return
    request_params = request_parts.query
    request_params = '&'.join((request_params, 'json_dump=1'))
    yield {'query': query, 'handle': handle, 'request_params': request_params}


def handles_lister(key, _input_row_iterator):  # reducer
    yield {'handle': key['handle']}


class Counter:  # reducer
    def __init__(self, handles_list):
        self._handles_indexes = dict((x[1], x[0]) for x in enumerate(handles_list))

    def __call__(self, key, input_row_iterator):
        query = key['query']
        handle = key['handle']
        request_params = input_row_iterator.next()['request_params']
        count = 1 + sum(1 for _ in input_row_iterator)
        handle_index = self._handles_indexes[handle]
        yield yw.create_table_switch(handle_index)
        yield {'query': query, 'handle': handle, 'count': count, 'request_params': request_params}


def main(args):
    yw.config['pickling']['python_binary'] = '/skynet/python/bin/python'
    source_dir_content = yw.list(args.source, absolute=True, attributes=['locks', 'modification_time', 'type'])
    source_table = sorted((node for node in source_dir_content
                           if not node.attributes['locks'] and node.attributes['type'] == 'table'))[-1]
    yt_logger.info('Source table: {}'.format(source_table))
    date = yw.ypath_split(source_table)[1]
    output_dir = yw.ypath_join(args.dest, date)
    have_prepared_queries = args.symlink and yw.exists(args.symlink)
    if have_prepared_queries and yw.get_attribute(args.symlink, 'path') == output_dir:
        yt_logger.info('Source table has not changed since last run')
        return
    yw.create('map_node', output_dir, recursive=True, ignore_existing=True)
    with yw.TempTable() as raw_table, yw.TempTable() as table_of_handles:
        yw.run_map(request_parser, source_table, raw_table)
        yw.run_sort(raw_table, sort_by=['handle', 'query'])
        yw.run_reduce(handles_lister, raw_table, table_of_handles, reduce_by=['handle'])
        handles = set()
        for row in yw.read_table(table_of_handles):
            handles.add(row['handle'])
        output_tables = [yw.ypath_join(output_dir, handle.lstrip('/').replace('/', '-')) for handle in handles]
        yw.run_reduce(Counter(handles), raw_table, output_tables, reduce_by=['handle', 'query'])
        for output_table in output_tables:
            yw.run_sort(output_table, sort_by=['count'])
        if have_prepared_queries:
            yw.remove(yw.get_attribute(args.symlink, 'path'), recursive=True)
        if args.symlink:
            yw.link(output_dir, args.symlink, force=True)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Build table with requests for SoY')
    parser.add_argument('--source', required=True, help='Cypress dir with request tables')
    parser.add_argument('--dest', required=True, help='Cypress dir where to put results')
    parser.add_argument('--symlink', help='Symlink to update')
    main(parser.parse_args())
