#!/usr/local/bin/python
# -*- coding: utf-8

import yt.wrapper as yt
import argparse
import json
import re
from datetime import date, datetime, timedelta

NO_ANSWER = re.compile(r'(ростите\,? я не знаю что ответить|меня нет ответа на такой запрос|не могу на это ответить|пока не умею отвечать на такие запросы|звините\,? у меня нет хорошего ответа|ничего найти не получилось| не получилось ничего найти| я ничего не нашла| ничего не нашлось)')
NOT_ALLOWED_SCENARIOS = re.compile(r'personal_assistant\.(scenarios\.(video_play|music_play|get_news|image_what_is_this|music_what_is_playing)|general_conversation\.general_conversation)')


def parse_args():
    parser = argparse.ArgumentParser(add_help=True, description='Suggest metrics calc')
    parser.add_argument('--mr_server', default='hahn', help='MR server (hahn, banach, ...)')
    parser.add_argument('--yt_pool', default='robot-suggestor-dev', help='YT pool')
    parser.add_argument('--input_table', help='path to table with facts')
    parser.add_argument('--timestamp', help='timestamp for metrics in Razladki and Stat')
    parser.add_argument('--output_prefix', help='path to table with VINS answers')
    parser.add_argument('--output_for_toloka', help='path to table with prepared request, responses for toloka')
    parser.add_argument('--output_scenarios', help='path to table with request, responses, scenarios')
    args = parser.parse_args()
    return args


def test_parse(row):
    reqid = row['request']['request']['event']['asr_result'][0]['utterance']

    yield {'query': reqid.replace('\n', ''),
            'response': row['response']}


def prepare_data_for_toloka(row):
    if not NO_ANSWER.search(row['response']):
        if not NOT_ALLOWED_SCENARIOS.search(row['response']):
            try:
                reqid = row['request']['request']['event']['asr_result'][0]['utterance']
                response = json.loads(row['response'])
                voice_answer = response['voice_response']['output_speech']['text']
            except KeyError:
                voice_answer = None
                reqid = None
            if reqid is None or voice_answer is None:
                pass
            else:
            #     if not NO_ANSWER.search(voice_answer):
                yield {'reqid': reqid.replace('\n', ''),
                        'voice_answer': voice_answer.replace('\n', '')}


def get_requests_with_scenarios_total(row):
    answer_source = None
    try:
        reqid = row['request']['request']['event']['asr_result'][0]['utterance']
        response = json.loads(row['response'])
        voice_answer = response['voice_response']['output_speech']['text']
        meta_params = response['response']['meta']
        for param in meta_params:
            if u'intent' in param:
                scenario = param[u'intent']
            if 'form' in param:
                slots = param['form']['slots']
                for slot in slots:
                    if slot['slot'] == 'search_results':
                        if slot['value'] is not None:
                            if 'factoid' in slot['value']:
                                answer_source = slot['value']['factoid']['source']
                            elif 'object' in slot['value']:
                                answer_source = 'object'
                            elif 'calculator' in slot['value']:
                                answer_source = 'calculator'
                            else:
                                answer_source = None
    except KeyError:
        voice_answer = None
        reqid = None
        scenario = None
    if reqid is None or voice_answer is None:
        pass
    else:
        yield {'query': reqid.replace('\n', ''),
                'voice_answer': voice_answer.replace('\n', ''),
                'scenario': scenario,
                'answer_source': answer_source}


def get_requests_with_scenarios(row):
    if not NO_ANSWER.search(row['response']):
        if not NOT_ALLOWED_SCENARIOS.search(row['response']):
            try:
                reqid = row['request']['request']['event']['asr_result'][0]['utterance']
                response = json.loads(row['response'])
                voice_answer = response['voice_response']['output_speech']['text']
                meta_params = response['response']['meta']
                for param in meta_params:
                    if u'intent' in param:
                        scenario = param[u'intent']
            except KeyError:
                voice_answer = None
                reqid = None
                scenario = None
            if reqid is None or voice_answer is None:
                pass
            else:
            #     if not NO_ANSWER.search(voice_answer):
                yield {'query': reqid.replace('\n', ''),
                      'voice_answer': voice_answer.replace('\n', ''),
                      'scenario': scenario}


if __name__ == '__main__':
    args = parse_args()
    input_table = args.input_table
    output_for_toloka = args.output_for_toloka
    output_scenarios = args.output_scenarios
    output_prefix = args.output_prefix
    timestamp = args.timestamp
    date = datetime.fromtimestamp(int(timestamp[:10])).strftime('%Y-%m-%d')
    print date
    yt.run_map(prepare_data_for_toloka, input_table, output_for_toloka)
    yt.run_map(get_requests_with_scenarios, input_table, output_scenarios)
    output_total = output_prefix + '/vins_results/' + date
    # output_total  = '//home/search-functionality/galamaj/facts/quasar/queries_to_check_result_2000_parsed'
    if not yt.exists(output_total):
        yt.create("table", path = output_total, recursive = True, ignore_existing = True, attributes = None)
    yt.run_map(get_requests_with_scenarios_total, input_table, output_total)

