#!/usr/local/bin/python
# -*- coding: utf-8

import yt.wrapper as yt
import argparse
import json
import re
from datetime import date, datetime, timedelta
import logging

NO_ANSWER = re.compile(ur'(ростите\,? я не знаю что ответить|меня нет ответа на такой запрос|не могу на это ответить|пока не умею отвечать на такие запросы|звините\,? у меня нет хорошего ответа|ничего найти не получилось| не получилось ничего найти| я ничего не нашла| ничего не нашлось)')
NOT_ALLOWED_SCENARIOS = re.compile(r'personal_assistant\.(scenarios\.(video_play|music_play|image_what_is_this|music_what_is_playing|repeat)|general_conversation\.general_conversation)')

def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    parser.add_argument('--input_table', help='path to table with facts')
    parser.add_argument('--error_table', help='path to table with vins errors')
    parser.add_argument('--basket_table', help='path to table with basket queries')
    parser.add_argument('--timestamp', help='timestamp for metrics in Razladki and Stat')
    parser.add_argument('--output_prefix', help='path to table with VINS answers')
    parser.add_argument('--output_for_toloka', help='path to table with prepared request, responses for toloka')
    parser.add_argument('--output_scenarios', help='path to table with request, responses, scenarios')
    args = parser.parse_args()
    return args


def get_requests_with_scenarios(row):
    voice_answer = None
    reqid = None
    scenario = None
    answer_source = None
    try:
        query = json.loads(row['postdata'])
        reqid = query['request']['event']['asr_result'][0]['utterance']
        response = json.loads(row['FetchedResult'])
        voice_answer = response['voice_response']['output_speech']['text']
        meta_params = response['response']['meta']
        for param in meta_params:
            if u'intent' in param:
                scenario = param[u'intent']
            if 'form' in param:
                slots = param['form']['slots']
                for slot in slots:
                    if slot['slot'] == 'search_results':
                        if slot['value'] is not None:
                            if 'factoid' in slot['value']:
                                answer_source = slot['value']['factoid']['source']
                            elif 'object' in slot['value']:
                                answer_source = 'object'
                            elif 'calculator' in slot['value']:
                                answer_source = 'calculator'
                            else:
                                answer_source = None
    except (KeyError, TypeError):
        logging.error('Failed to parse some queries or response')
    if reqid is None or voice_answer is None:
        pass
    else:
        yield {'query': reqid.replace('\n', ''),
            'voice_answer': voice_answer.replace('\n', ''),
            'scenario': scenario,
            'answer_source': answer_source}


def join_scenarios_with_basket(key, rows):
    is_search = None
    cnt = None
    query = None
    scenario = None
    for row in rows:
        if 'cnt' in row:
            cnt = row['cnt']
            if 'search_fixed' in row:
                is_search = row['search_fixed']
            else:
                continue
        else:
            query = row['query']
            scenario = row['scenario']
            voice_answer = row['voice_answer']
            answer_source = row['answer_source']
    if scenario is not None and query is not None and cnt is not None:
        yield {'scenario': scenario,
              'query': query,
              'voice_answer': voice_answer,
              'answer_source': answer_source,
              'search_fixed': is_search,
              'cnt': cnt}


def get_data_for_toloka(row):
    if not NOT_ALLOWED_SCENARIOS.search(row['scenario']) and \
        not NO_ANSWER.search(row['voice_answer']) and \
            row['search_fixed'] == 1:
        yield {'reqid': row['query'].replace('\n', ''),
            'voice_answer': row['voice_answer'].replace('\n', '')}



def errors_part_check(input_table, error_table):
    results = yt.row_count(input_table)
    errors = yt.row_count(error_table)
    errors_part = float(errors)/(float(results) + float(errors))
    return errors_part, int(errors)

if __name__ == '__main__':
    args = parse_args()
    input_table = args.input_table
    errors_table = args.error_table
    basket_table = args.basket_table
    output_for_toloka = args.output_for_toloka
    output_scenarios = args.output_scenarios
    output_prefix = args.output_prefix
    timestamp = args.timestamp
    date = datetime.fromtimestamp(int(timestamp[:10])).strftime('%Y-%m-%d')
    print date
    errors = errors_part_check(input_table, errors_table)
    if not yt.exists(output_scenarios):
        yt.create("table", path = output_scenarios, recursive = True, ignore_existing = True, attributes = None)
    if not yt.exists(output_for_toloka):
        yt.create("table", path = output_for_toloka, recursive = True, ignore_existing = True, attributes = None)
    with yt.TempTable('//home/suggest-dev/galamaj/tmp', prefix='scenarios') as tmp_scenarios:
        yt.run_map(get_requests_with_scenarios, input_table, tmp_scenarios)
        yt.run_sort(tmp_scenarios, sort_by='query')
        with yt.TempTable('//home/suggest-dev/galamaj/tmp', prefix='basket') as tmp_basket:
            yt.copy(basket_table, tmp_basket, force=True)
            yt.run_sort(tmp_basket, sort_by='query')
            yt.run_reduce(join_scenarios_with_basket, [tmp_scenarios, tmp_basket], output_scenarios, reduce_by=['query'])
            if errors[0] < 0.003:
                yt.run_map(get_data_for_toloka, output_scenarios, output_for_toloka)
            else:
                logging.error('Too much errors %s' % (errors[1]))
                yt.create("table", path = output_for_toloka, recursive = True, ignore_existing = True, attributes = None)


