# -*- coding: utf-8 -*-

import json
import datetime
import os
import time
import logging
from collections import Counter
import argparse
import pandas as pd
from nile.api.v1 import (filters as nf, aggregators as na, extractors as ne, clusters, Record)
import nile
from getpass import getuser
import requests
import collections
from scipy.stats import norm
from math import sqrt

PROPS = ['browser', 'country', 'distr_obj', "os", "product", "referer"]
EVENTTYPES = ["show", "close", "click", "trueinstall"]

def make_selecter(client, sub_client, testids):
    def select(rows):
        for row in rows:
            if row.get('client') == client and row.get('sub_client') == sub_client:
                testid =  [one for one in testids if one in (row.get('testids') or '')]
                if testid:
                    out = {one: row.get(one, "") for one in  PROPS + EVENTTYPES}
                    out['testid'] = testid[0]
                    out['score'] = int(row.get('score') or '-100500')
                    yield Record(**out)
    return select

def event_cumsum(df):
    df.sort_values("score", inplace=True, ascending=False)
    for one in EVENTTYPES:
        df[one] = df[one].cumsum()
    return df

def plot_pic(df, name = "", upload = False, prop=None):
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    import seaborn as sns
    props = df[prop].unique() if prop else ' '
    nx = len(props) if prop else 1
    f, axys = plt.subplots(nx, 3, figsize=(21, 7*nx), squeeze=False)
    if nx > 2:
        plt.subplots_adjust(bottom=0, top=0.97)
    f.suptitle(name, fontsize=22)
    for axs, one in zip(axys, props):
        df_tmp = df[df[prop] == one] if prop else df
        for line, ax in zip(['click', 'close', 'trueinstall'], axs):
            lines = []
            ax.set_title(line, fontsize=22)
            if line == 'click':
                ax.set_ylabel(one, fontsize=22)
            testids = df['testid'].unique()
            testids.sort()
            m = []
            p = []
            for testid, palette in zip(testids, sns.color_palette("Set2", len(testids))):
                df_tmp2 = df_tmp[df_tmp['testid'] == testid]
                df_tmp2 = df_tmp2[df_tmp2['click'] + df_tmp2['trueinstall'] + df_tmp2['close'] + df_tmp2['show'] > 0]
                m.append(max(df_tmp2['show'].sum(), 1))
                p.append(df_tmp2['trueinstall'].sum() * 1. / m[-1])
                df_tmp2 = df_tmp2.groupby(['score'], as_index = False).agg({one: sum for one in ['show', 'click', 'close', 'trueinstall']})
                df_tmp2 = event_cumsum(df_tmp2)
                line_type, = ax.plot(df_tmp2['show'], df_tmp2[line], color=palette, lw=4, label = testid)
                lines.append(line_type)

            ax.legend(handles=lines, fontsize=14, markerscale=3, loc=4)
            if line == 'close':
                mes = "    ".join([z_interval_proportions(m[0], m[1], p[0], p[1]),
                    z_test_proportions(m[0], m[1], p[0], p[1]),
                    "if n1 and n2 = max(n1, n2), then: {}".format(z_test_proportions(max(m[0], m[1]), max(m[0], m[1]), p[0]*m[0]/max(m[0], m[1]), p[1]*m[1]/max(m[0], m[1])))])
                ax.set_xlabel(mes)
    if upload:
        f.savefig("{}.png".format(name))
        url = load_to_jing("{}.png".format(name))
        return url

def load_to_jing(filename):
    url = "https://jing.yandex-team.ru/api/v1/files/{}".format(getuser())
    headers = {"Authorization" : "OAuth {}".format(get_oauth())}
    files =  {'file':  (filename, open(filename, 'rb'))}
    r = requests.post(url, headers=headers, files=files)
    return json.loads(r.text).get('url')

def get_oauth():
    with open('/home/{}/.oauth'.format(getuser())) as f:
        return f.read().rstrip()

def post_in_ticket(messages, ticket):
    oauth = get_oauth()
    headers = {"Authorization": "OAuth {}".format(oauth), "Content-Type": "application/json; charset: UTF-8"}
    text = "\n".join(['(({} {}))'.format(messages[key], key) for key in messages])
    ticket_data = { "text": text, "summonees" : ["zaringleb"]}
    r = requests.post("https://st-api.yandex-team.ru/v2/issues/{ticket}/comments".format(ticket=ticket), headers=headers, data=json.dumps(ticket_data))

def make_table(client, sub_client, testids, dates):
    #ts = str(time.time()).split('.')[0]
    job_root = '//tmp/' + getuser() + '/curve_testid'  #+ ts
    dates = "{" + dates[0] + ".." + dates[-1] + "}"
    cluster = clusters.Hahn(pool = 'search-research_{}'.format(getuser())).env(templates=dict(job_root=job_root, date=dates))

    mapper = make_selecter(client, sub_client, testids)

    job = cluster.job()
    table = job.table("//home/atom/zaringleb/eventtype_tables/table_@date") \
                    .map(mapper, intensity='cpu') \
                    .groupby(*(PROPS+['score', 'testid'])) \
                    .aggregate(intensity='cpu', **{one: na.sum(one) for one in EVENTTYPES}) \
                    .put('$job_root/grouped')
    job.run()
    return table

def process(client, sub_client, testids, dates, ticket=None, slice_by=None, post=None):

    table = make_table(client, sub_client, testids, dates)
    df = table.read().as_dataframe()
    print('len(df): {}'.format(len(df)))
    mes = {}
    url = plot_pic(df, name='{}_{}_{}_{}'.format(client, sub_client, dates[0], dates[-1]), upload=True)
    mes['total'] = url

    #url = plot_pic(df[df['product'] != 'shortcut'], name='noshortcut_{}_{}_{}_{}'.format(client, sub_client, dates[0], dates[-1]), upload=True)
    #mes['noshortcut'] = url
    #url = plot_pic(df[df['product'] == 'shortcut'], name='shortcut_{}_{}_{}_{}'.format(client, sub_client, dates[0], dates[-1]), upload=True)
    #mes['shortcut'] = url

    if slice_by:
        for prop in slice_by.split(","):
            url = plot_pic(df, name='{}_{}_{}_{}'.format(client, sub_client, dates[0], dates[-1]), upload=True, prop=prop)
            mes['by_{}'.format(prop)] = url
    for one in mes:
        print("{}: {}".format(one, mes[one]))
    if post:
        post_in_ticket(ticket, 'curves, dates: {}-{}'.format(dates[0], dates[-1]), mes)

def post_in_ticket(ticket, title='', messages={}):
    oauth = get_oauth()
    headers = {"Authorization": "OAuth {}".format(oauth), "Content-Type": "application/json; charset: UTF-8"}
    text = "\n".join([title] + ['(({} {}))'.format(messages[key], key) for key in messages])
    ticket_data = { "text": text, "summonees" : ["zaringleb"]}
    r = requests.post("https://st-api.yandex-team.ru/v2/issues/{ticket}/comments".format(ticket=ticket), headers=headers, data=json.dumps(ticket_data))

class Testid():
    '''Download, parse and store information about testid'''
    def __init__(self, testid):
        self.testid = testid
        self.get_testid_info()

    def get_testid_info(self):
        url = 'http://ab.yandex-team.ru/api/testid?type=ABT&form=full&id='
        req = requests.get(url + self.testid)
        self.config = json.loads(req.json()[0]['params'])[0] #json.loads(req.json()[0]['params'])
        self.parse_testid()

    def parse_testid(self):
        if self.config['HANDLER'] == 'REPORT':
            self.client = 'distr_serp'
            self.parse_report()
        elif self.config['HANDLER'] in ['GATEWAY', 'ATOM']:
            self.client = self.config['CONDITION'][len("SESSION_atom_client == '"): -1]
            self.parse_context()

    def parse_context(self):
        if 'atom.params.relev' not in self.config['CONTEXT'][self.config['HANDLER']] and self.client == 'promolib':
            self.sub_client = ""
        relev = self.config['CONTEXT'][self.config['HANDLER']]['atom.params.relev'][0]
        self.parse_relev()

    def parse_report(self):
        self.sub_client = None
        for sub_client in CLIENTS_FROM_CONF[self.client]:
            if ("atom_" + sub_client) in str(self.config):
                self.sub_client = sub_client

    def parse_relev(self):
        self.sub_client = None
        for sub_client in CLIENTS_FROM_CONF[self.client]:
            if ("atom_" + sub_client) in self.config['CONTEXT'][self.config['HANDLER']]['atom.params.relev'][0]:
                self.sub_client = sub_client

class Experiment():
    '''Contain information about experiment'''
    def __init__(self, task_info):
        self.ticket = task_info['ticket']
        self.authorities = task_info['authorities']
        self.author = task_info['author']
        self.str_testids = task_info['testids']

    def process_exp(self):
        self.testids = [Testid(str(testid)) for testid in self.str_testids]
        self.client = max([one.client for one in self.testids])
        self.sub_client = max([one.sub_client for one in self.testids])
        self.host = self.get_host_by_client()
        self.get_ticket_info()

    def get_host_by_client(self):
        try:
            df_clients_hosts = pd.read_csv(PATH + 'clients_hosts.csv', sep=',')
            hosts = df_clients_hosts.loc[(df_clients_hosts['client'] == self.client)&(df_clients_hosts['sub_client'] == self.sub_client), 'distr_obj']
            return hosts.values[0]
        except Exception:
            print('Unable get host by client')
            return "_total_"

    def get_ticket_info(self):
        oauth = get_oauth()
        headers = {"Authorization": "OAuth {}".format(oauth)}
        r = requests.get("https://st-api.yandex-team.ru/v2/issues/{ticket}".format(ticket=self.ticket), headers=headers)
        q = json.loads(r.content)
        description = q['description'].encode('UTF-8')
        self.dates = self.get_dates(description)

    def get_dates(self, description):
        try:
            s = r'Даты проведения: \d\d\.\d\d\.\d\d\d\d-\d\d\.\d\d\.\d\d\d\d'
            w = re.search(s, description)
            begin_date, end_date = w.group()[len('Даты проведения: '):].split("-")
            begin_date = datetime.date(*[int(one) for one in begin_date.split('.')][::-1])
            end_date = datetime.date(*[int(one) for one in end_date.split('.')][::-1])
            return begin_date, end_date
        except Exception:
            print('Unable parse dates')

    def ticket_in_past_tickets(self):
        with open(PATH + 'tickets.txt') as f:
            tickets = f.readlines()
            tickets = [ticket.strip() for ticket in tickets]
        return self.ticket in tickets

    def put_in_past_tickets(self):
        with open(PATH + 'tickets.txt', 'a') as f:
            f.write(self.ticket + "\n")

def get_task(ticket):
    url = 'http://ab.yandex-team.ru/api/task/{}'.format(ticket)
    req = requests.get(url)
    info = req.json()
    return {prop : info[prop] for prop in ['ticket', 'testids', 'authorities', 'author']}

def get_clients_from_conf():
    url = 'http://pecheny.haze.yandex.net:8049/viewconfig'
    req = requests.get(url)
    conf = req.json()
    clients_sub_clients = collections.defaultdict(list)
    for client in conf['clients']:
        if 'sub' in conf['clients'][client]:
            for sub_client in conf['clients'][client]['sub']:
                clients_sub_clients[client.lower()].append(sub_client.lower())
        else:
            clients_sub_clients[client.lower()].append("")
    return clients_sub_clients

def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--log_to_stdout', action='store_true')
    parser.add_argument('--client', action='store')
    parser.add_argument('--sub_client', action='store')
    parser.add_argument('--testids', action='store')
    parser.add_argument('--ticket', action='store')
    parser.add_argument('--post', action='store_true')
    parser.add_argument('--parse', action='store_true')
    parser.add_argument('--slice_by', action='store')
    parser.add_argument('--dates', action='store')
    args = parser.parse_args()
    if not args.ticket:
        if args.parse or args.post:
            print("specify ticket, example: --ticket EXPERIMENTS-9109")
            return
    if (not args.ticket) or (not args.parse):
        if not(args.client and (args.sub_client or args.sub_client == '') and args.testids):
            print("specify client, sub_client and testids or ticket and parse")
            return
    if not args.dates:
        print("specify dates, example: --dates 2016-12-17,2016-12-19")
    return args

def z_test_proportions(n1, n2, p1, p2):
    p1, p2 = max(p1, p2), min(p1, p2)
    t = norm.cdf(0, loc=(p1-p2), scale=sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2))
    return  "P(max(p1, p2)==min(p1, p2)) = {:.2%}".format(t)

def z_interval_proportions(n1, n2, p1, p2, alpha=0.05):
    z = norm.ppf(1 - alpha)*sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2)
    return "Confidence interval, for alpha={}, is: ({:.7f}; {:.7f})".format(alpha, (p1-p2) - z, (p1-p2) + z)

def main():
    args = parse_arguments()
    if args.parse:
        global CLIENTS_FROM_CONF
        CLIENTS_FROM_CONF = get_clients_from_conf()
        task = get_task(args.ticket)
        exp = Experiment(task)
        exp.process_exp()
        client = exp.client
        sub_client = exp.sub_client
        testids = [one.testid for one in exp.testids]
    if args.client:
        client = args.client
    if args.sub_client:
        sub_client = args.sub_client
    if args.testids:
        testids = args.testids.split(",")
    print("client: {}\nsub_client: {}\ntestids: {}".format(client, sub_client, testids))
    process(client, sub_client, testids, args.dates.split(","), ticket=args.ticket, slice_by=args.slice_by, post=args.post)

if __name__ == '__main__':
    main()
