#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This script is executed automatically via
# http://jenkinstat.haze.yandex.net:8080/view/proj.switch/job/proj.switch.ru.bandits-monitoring-ext.daily/
#

from __future__ import division
from itertools import groupby
from operator import itemgetter
from scipy.stats import norm
import datetime
import dateutil.parser
import math
import os
import pandas as pd
import re
import requests
import sys

from email_helper import send_email

sys.path.append('../../stat_reports/extention_dashboard')
from reports_config import get_metrica_oauth_token

PROJECTS = [
    'promo-switch-ru',
    'promo-switch-ru-browser',
]
SEND_TO = os.environ.get('SEND_TO', '').split()
ALPHA = 0.01
DEV = not os.environ.get('JENKINS_HOME')
DISABLE_FILTERS = [
    # 'promo-switch-ru-browser'
]

if DEV:
    import getpass
    import requests_cache

    user = getpass.getuser()
    SEND_TO = [user + '@yandex-team.ru']
    requests_cache.core.install_cache(expire_after=datetime.timedelta(hours=1))
    requests_cache.core.remove_expired_responses()


def wilson_ci(m, n, alpha):
    z = norm.ppf(1 - alpha / 2)
    p = float(m) / n
    radius = z * math.sqrt((p * (1 - p) + z * z / (4 * n)) / n)
    return (p + z * z / (2 * n) - radius) / (1 + z * z / n), \
           (p + z * z / (2 * n) + radius) / (1 + z * z / n)


def cat_bunker_node(node):
    """
    Get contents of bunker node.
    """
    r = requests.get(
        'http://bunker-api-dot.yandex.net/v1/cat',
        params={
            'node': node,
        }
    )
    r.raise_for_status()
    return r.json()


def transform_record(r):
    """
    Transform record acquired from metrika.
    Returns None if record has invalid format.
    """
    try:
        path = r['dimensions'][1]['name']

        m = re.match(r'(.+)/(\d+)/$', path)
        if not m:
            return None

        landing, variant = m.groups()
        utm_source = r['dimensions'][2]['name']
        utm_medium = r['dimensions'][3]['name']
        if not utm_source or not utm_medium:
            return None

        r = {
            'date': r['dimensions'][0]['name'],
            'landing': landing,
            'utm_source': utm_source,
            'utm_medium': utm_medium,
            'variant': variant,
            'visits': int(r['metrics'][0]),
            'clicks': int(r['metrics'][1]),
        }

        return r
    except TypeError:
        return None


def download_metrika_analytics2(project):
    if project != 'promo-switch-ru-browser':
        return download_metrika_analytics(project)

    node = project + '/analytics/default'
    data = cat_bunker_node(node)
    data = data['metrika']

    node2 = project + '/settings'
    data2 = cat_bunker_node(node2)
    data2 = data2['bandit']

    data['bandit'] = data2
    return data


def download_metrika_analytics(project):
    """
    Get analytics parameters from bunker.
    """
    node = project + '/analytics/default'
    data = cat_bunker_node(node)
    data = data['metrika']
    return data


def download_stats(project, date1, date2):
    """
    Download statistics from metrika using config from bunker.
    """
    metrika_config = download_metrika_analytics2(project)
    ids = metrika_config['id']
    goal_install = 'ym:s:goal{}reaches'.format(
        metrika_config['goals']['install']
    )
    filters = metrika_config['bandit']['filters']
    if project in DISABLE_FILTERS:
        filters = []

    r = requests.get(
        'https://api-metrika.yandex.ru/stat/v1/data',
        params={
            'date1': [date1],
            'date2': [date2],
            'dimensions':
                'ym:s:date,ym:s:startURLPath,ym:s:UTMSource,ym:s:UTMMedium',
            'oauth_token': None, # Do not use plain text token
            'ids': ids,
            'metrics': ['ym:s:visits', goal_install],
            'filters': filters,
            'accuracy': ['1'],
            'limit': 100000,
        }
    )
    r.raise_for_status()
    metrika_records = r.json()['data']

    records = filter(None, map(transform_record, metrika_records))
    return records, r.url


def nan2none(x):
    if pd.isnull(x):
        return None
    return x


def get_dates(days=14, date_max=None):
    if date_max is None:
        date_max = datetime.datetime.now().date() - datetime.timedelta(days=1)
    else:
        date_max = dateutil.parser.parse(date_max)
    date_min = date_max - datetime.timedelta(days=days)
    return date_min.strftime('%Y-%m-%d'), date_max.strftime('%Y-%m-%d')


def download_landing_names(project):
    r = requests.get(
        'http://bunker-api-dot.yandex.net/v1/ls',
        params={
            'node': project + '/landings',
        }
    )
    r.raise_for_status()
    return r.json()


def get_list_of_landings(project):
    records = download_landing_names(project)
    records = filter(lambda r: not r.get('isDeleted', True), records)
    records = map(itemgetter('name', 'fullName'), records)
    return records


def get_variant_weights(node):
    info = cat_bunker_node(node)
    weights = map(itemgetter('weight'), info['variants'])
    return {str(i): x for i, x in enumerate(weights)}


def get_arms_weights(project):
    """
    Get weights for each arm of each landing.
    Result format:
    result['fines']['0'] -> int
    """
    landings = get_list_of_landings(project)
    result = {}
    for name, fullName in landings:
        weights = get_variant_weights(fullName)
        result[name] = weights
    return result


def process_group(x):
    max_lb = x.lower_bound.max()
    x['is_leader'] = (x.lower_bound == max_lb).astype(int)
    x['is_outsider'] = (x.upper_bound < max_lb).astype(int)
    x['has_outsider'] = x.is_outsider.any()
    return x


def leader_anywhere(x):
    x['is_leader_anywhere'] = x.is_leader.any().astype(int)
    return x


def df_to_excel_with_custom_header(df, filename, header):
    """
    Saves dataframe to excel file with header at cell A1
    """
    writer = pd.ExcelWriter(filename)
    df.to_excel(writer, startrow=1, index=False)
    fmt = writer.book.add_format({'bold': True})
    ws = writer.book.worksheets()[0]
    ws.write('A1', header, fmt)
    writer.save()


def render_brief(df_html, project, now, metrika_url):
    """
    Render email body as html.
    """
    with open('brief.htt') as fp:
        t = fp.read()
    return t.format(
        df_html=df_html,
        project=project,
        now=now,
        metrika_url=metrika_url,
    )


def process_project(project):
    """
    Main processing function.

    # 0
    - Download arms weights from bunker
    - Download clicks and visits from metrika.
    # 1
    - Sum all clicks and visits for all days.
    - Calculate confidence intervals
    # 2
    - Keep records with nonzero weights.
    - Group them by (landing, source_medium) and
      find leader and outsiders
    - Keep only groups that have outsiders
    # 3
    - Report only (landing, variant) than is not a leader
      in any (landing, source_medium) group.
    # 4
    - Make full report (full.xlsx) and brief report (email contents)
    - Send emails
    """
    # 0
    dates = get_dates()
    weights = get_arms_weights(project)
    records, metrika_url = download_stats(project, *dates)

    # 1
    records.sort(key=itemgetter(
        'landing', 'utm_source', 'utm_medium', 'variant', 'date',
    ))

    ctr_records = []
    for key, group in groupby(records, key=itemgetter(
        'landing', 'utm_source', 'utm_medium', 'variant',
    )):
        clicks, visits = 0, 0
        for g in group:
            clicks += g.get('clicks')
            visits += g.get('visits')
        try:
            bounds = wilson_ci(clicks, visits, ALPHA)
        except ValueError:
            bounds = None, None

        short_name = key[0].split('/')[-1]
        weight = weights.get(short_name, {}).get(key[-1])

        landing, utm_source, utm_medium, variant = key

        ctr_records.append((
            landing,
            utm_source,
            utm_medium,
            variant,
            clicks,
            visits,
            clicks / visits,
            bounds[0],
            bounds[1],
            weight,
        ))

    df = pd.DataFrame(
        ctr_records,
        columns=[
            'landing',
            'utm_source',
            'utm_medium',
            'variant',
            'clicks',
            'visits',
            'ctr',
            'lower_bound',
            'upper_bound',
            'weight',
        ]
    )
    # 2
    df = df.loc[df.weight != 0]
    df = df.groupby(by=('landing', 'utm_source', 'utm_medium')) \
           .apply(process_group)
    df = df.loc[df.has_outsider]
    if df.empty:
        return
    df.drop(['weight', 'has_outsider'], axis=1, inplace=True)
    # 3
    df = df.groupby(by=('landing', 'variant')).apply(leader_anywhere)
    df['to_be_reported'] = df.is_outsider & ~df.is_leader_anywhere

    # 4
    header = 'dates: {}..{}, alpha: {}'.format(dates[0], dates[1], ALPHA)
    df_to_excel_with_custom_header(df, 'full.xlsx', header)

    brief = df.loc[df.to_be_reported == 1]
    if not brief.empty:
        df_html = brief.to_html(
            columns=[
                'landing',
                'variant',
                'utm_source',
                'utm_medium',
                'clicks',
                'visits',
                'ctr',
                'lower_bound',
                'upper_bound',
            ],
            index=False,
        )

        html = render_brief(
            df_html,
            project,
            datetime.datetime.now().strftime('%Y-%m-%d'),
            metrika_url,
        )

        send_email(
            'bandits_monitoring',
            SEND_TO,
            'Bandits Monitoring',
            html,
            ['full.xlsx'],
        )


def main():
    for project in PROJECTS:
        process_project(project)


if __name__ == '__main__':
    main()
