import requests
import json
import argparse

import urllib
from bs4 import BeautifulSoup
import numpy as np
import tqdm
import scipy.stats


def get_achievment_number(html):
    parsed_html = urllib.urlopen(html).read()
    soup = BeautifulSoup(parsed_html, 'html.parser')

    els = soup.find_all(attrs={'class': 'organic__verified'})
    return len(els)


def process_one_ticket(data):
    low_res = []
    high_res = []
    with open("low_achievment_count.tsv", 'a') as lf, open("high_achievment_count.tsv", 'a') as hf:
        for query in tqdm.tqdm(data['results']['queries']):
            wins_count = 0
            total_count = 0
            for comparison in query['comparisons']:
                if comparison['left']['sys_id'] == '0' and comparison['right']['sys_id'] == '1':
                    wins_count += comparison['left']['wins']
                    total_count += comparison['left']['wins'] + comparison['right']['wins']
                if comparison['left']['sys_id'] == '1' and comparison['right']['sys_id'] == '0':
                    wins_count += comparison['right']['wins']
                    total_count += comparison['right']['wins'] + comparison['left']['wins']
            win_rate = 1.0 * wins_count / total_count
            count = get_achievment_number(query['systems']['0']['html_url'])
            if count >= 5:
                high_res += [win_rate]
                hf.write("{}\t{}\n".format(query['query']['text'].encode('utf-8'), query['query']['region']))
            else:
                low_res += [win_rate]
                lf.write("{}\t{}\n".format(query['query']['text'].encode('utf-8'), query['query']['region']))
    return low_res, high_res


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Get parameters')
    parser.add_argument('-i', dest='input', type=str, required=True)
    parser.add_argument('-t', dest='token', type=str, required=True)
    args = parser.parse_args()

    sbs_tickets = []
    with open(args.input, 'r') as f:
        for line in f:
            sbs_tickets += [line.strip()]

    open('low_achievment_count.tsv', 'w').close()
    open('high_achievment_count.tsv', 'w').close()
    low_res, high_res = [], []
    for ticket in sbs_tickets:
        r = requests.get("https://sbs.yandex-team.ru/api/experiment/{}".format(ticket),
                         headers={"Content-Type": "application/json",
                                  "Authorization": "OAuth {}".format(args.token)},
                         verify=False
                         )
        data = json.loads(r.content)
        cur_l_r, cur_h_r = process_one_ticket(data)
        low_res += cur_l_r
        high_res += cur_h_r

    print("Mean for low achievement: {}".format(np.mean(low_res)))
    print("Mean for high achievement: {}".format(np.mean(high_res)))
    print("Hypothesis testing: {}".format(scipy.stats.ttest_ind(low_res, high_res, equal_var=False)))


