#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import json
import sys
import base64
import yt.wrapper as yt
import datetime

QLIST = {}

def HandleOptions():
    parser = argparse.ArgumentParser(description="Tool for collecting data from uppersearch and middlesearch")
    parser.add_argument("--server", dest = "server", help = "mapreduce server", default = "hahn.yt.yandex.net:80", required = False)
    return parser


def pick_src(rec):
    sport = False
    if not rec.get("utterance_text") in QLIST:
        return
    if not ("request" in rec and "app_info" in rec["request"] and "app_id" in rec["request"]["app_info"] and rec["request"]["app_info"]["app_id"] == "ru.yandex.quasar.services"):
        return
    if not "personal_assistant.scenarios.search" in str(rec.get("form_name")):
        return
    if len(rec["response"]["meta"]):
        return
    if not len(rec["form"]["slots"]):
        return
    for s in rec["form"]["slots"]:
        if "slot" in s and s["slot"] == "search_results":
            if "value" in s and s["value"] is not None and "factoid" in s["value"]:
                if "source" in s["value"]["factoid"] and "sport" in str(s["value"]["factoid"]["source"]):
                    print >> sys.stderr, s["value"]["factoid"]["source"]
                    sport = True
    yield {"query" : rec.get("utterance_text"), "class" : QLIST[rec.get("utterance_text")], "sport" : sport}


def main():
    parser = HandleOptions()
    args = parser.parse_args()
    yt.update_config({"proxy": {"url": args.server}})

    tables = []
    s1 = datetime.datetime(2018,12,10)
    #s2 = datetime.datetime(2019,1,14)
    s3 = datetime.datetime(2019,1,21)
    for i in range(7):
        tables.append("//home/voice/vins/logs/dialogs/" + (s1 + datetime.timedelta(i)).strftime("%Y-%m-%d"))
        #tables.append("//home/voice/vins/logs/dialogs/" + (s2 + datetime.timedelta(i)).strftime("%Y-%m-%d"))
        tables.append("//home/voice/vins/logs/dialogs/" + (s3 + datetime.timedelta(i)).strftime("%Y-%m-%d"))

    output = "//home/freshness/staff/itajn/randomtest/stationstat"
    yt.run_map(pick_src,
               source_table = tables,
               destination_table = output,
                )


if __name__ == "__main__":
    main()
