import json
import urllib
import requests
from bs4 import BeautifulSoup
import codecs
import argparse
import logging
from serpparser.tagger import SerpTagger
from serpparser.serp_parser_common import SerpMetadata

logger = logging.getLogger(__name__)


def get_tags(tagger, url, meta):
    try:
        parsed_html = urllib.urlopen(url).read()
        soup = BeautifulSoup(parsed_html, "html.parser")
        return tagger.parse_serp_tags(soup, meta)
    except Exception:
        logger.info("can't parse html {}".format(url))
        return {}


def argument_parser():
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument("-d", dest="data", help="data in json format")
    parser.add_argument("-bt", dest="bt_json", help="path to BT results json file")
    parser.add_argument("-ui", dest="ui", help="touch or desktop", required=True)
    parser.add_argument("-t", dest="sbs_ticket", help="id of sbs ticket")
    parser.add_argument("--y_sys_id", dest="yandex_sys_id", default="0")
    parser.add_argument("--g_sys_id", dest="google_sys_id", default="1")
    parser.add_argument("--sys_type_1", dest="sys_type_1", default="yandex")
    parser.add_argument("--sys_type_2", dest="sys_type_2", default="google")
    parser.add_argument("--out_file1", dest="full_table", default="result.tsv")
    return parser.parse_args()


def main():
    args = argument_parser()
    ui = args.ui
    ticket = args.sbs_ticket
    yandex_meta = SerpMetadata("yandex", ui)
    google_meta = SerpMetadata("google", ui)
    tagger = SerpTagger()

    sys_id_1 = args.yandex_sys_id
    sys_id_2 = args.google_sys_id
    sys_meta_1 = yandex_meta
    if args.sys_type_1 == "google":
        sys_meta_1 = google_meta
    sys_meta_2 = google_meta
    if args.sys_type_2 == "yandex":
        sys_meta_2 = yandex_meta

    try:
        bt_results = json.load(open(args.bt_json, "r"))[0]['sbs_metric_results'][0]['sbs_metric_values'][
            'separate_query_results']
    except Exception:
        bt_results = []

    for i in range(len(bt_results)):
        if bt_results[i]["sys-id-1"] == sys_id_1 and bt_results[i]["sys-id-2"] == sys_id_2:
            bt_results = bt_results[i]['query-results']
            break
    bt_dict = {}

    for i in range(len(bt_results) / 2):
        bt_dict[bt_results[2 * i]['query']] = bt_results[2 * i]['exp-score'] / (
                    bt_results[2 * i]['exp-score'] + bt_results[2 * i + 1]['exp-score'])

    if args.data is not None:
        data = json.load(open(args.data, "r"))
        html_name = "html-url"
    else:
        r = requests.get("https://sbs.yandex-team.ru/api/experiment/{}".format(ticket),
                         headers={"Content-Type": "application/json"},
                         verify=False
                         )
        data = json.loads(r.content)
        html_name = "html_url"

    wizards = ["fact", "geo", "video", "entity", "images", "bno", "misspell",
               "collections", "market", "chats", "snippet_image", "recommend"]
    columns = ["query", "region", "yandex_wins", "google_wins", "majority_vote", "bradley_terry", "yandex_html",
               "google_html"] + ['yandex_' + w for w in wizards] + ['google_' + w for w in wizards] + ["snippets"]

    with codecs.open(args.full_table, "w", encoding="utf-8") as f:
        f.write("\t".join(columns) + '\n')

        for i, query in enumerate(data['results']['queries']):
            yandex_wins = 0
            google_wins = 0
            for comparison in query['comparisons']:
                if comparison['left']['sys_id'] == sys_id_1 and comparison['right']['sys_id'] == sys_id_2:
                    yandex_wins += comparison['left']['wins']
                    google_wins += comparison['right']['wins']
                if comparison['right']['sys_id'] == sys_id_1 and comparison['left']['sys_id'] == sys_id_2:
                    yandex_wins += comparison['right']['wins']
                    google_wins += comparison['left']['wins']
            mean_yandex_wins = float(yandex_wins)
            if yandex_wins + google_wins != 0:
                mean_yandex_wins /= (yandex_wins + google_wins)
            text = query['query']['text']
            region = query['query']['region']
            bradley_terry = bt_dict.get(text, 0.0)

            yandex_url, google_url, yandex_tags, google_tags = "", "", {}, {}
            if sys_id_1 in query['systems'] and html_name in query['systems'][sys_id_1]:
                yandex_url = query['systems'][sys_id_1][html_name]
                yandex_tags = get_tags(tagger, yandex_url, sys_meta_1)

            if sys_id_2 in query['systems'] and html_name in query['systems'][sys_id_2]:
                google_url = query['systems'][sys_id_2][html_name]
                google_tags = get_tags(tagger, google_url, sys_meta_2)

            f.write(u"{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(text, region, yandex_wins, google_wins, mean_yandex_wins,
                                                               bradley_terry, yandex_url, google_url))

            is_snippet = True
            for wizard in wizards:
                pos = yandex_tags.get("{}_{}".format(args.sys_type_1, wizard))
                f.write("\t{}".format(pos))
                if wizard != "misspell" and pos is not None and pos < 5:
                    is_snippet = False

            for wizard in wizards:
                pos = google_tags.get("{}_{}".format(args.sys_type_2, wizard))
                f.write("\t{}".format(pos))
                if wizard != "misspell" and pos is not None and pos < 5:
                    is_snippet = False

            if is_snippet:
                f.write("\t0\n")
            else:
                f.write("\tNone\n")

            logger.info(u"Processed query: {}".format(text))


if __name__ == "__main__":
    main()
