#!/usr/bin/python
# -*- coding: UTF-8 -*-

from __future__ import division

import pandas as pd
import urlparse
from collections import defaultdict
from nile.api.v1 import (
    clusters
)
import yt.wrapper as yt

yt.config['token'] = 'AVImKQMAAAO3VDAg54V4QjeI8eaNZJSQfA'
yt.config.set_proxy('hahn.yt.yandex.net')


class Atomhelper:
    def __init__(self, date, product, url_pattern, delay=False):
        self.username = 'chikachoff'
        self.date = date
        self.product = product
        self.url_pattern = url_pattern
        self.humanized_table = "//logs/atomfront-reqans-log/1d/{}".format(
            self.date)
        self.noshows = []
        self.noshow_table = '//home/atom/chikachoff/noshow/{0}/{1}'.format(
            self.product, self.date)
        self.delay = delay
        self.df = self.filter_humanized()
        self.stages = defaultdict(dict)

    @staticmethod
    def _filter(records):
        for rec in records:
            if rec.get('client', '') != 'distr_portal':
                continue
            if 'yandex.ru%2F%3Fsource%3Dtoloka%26amp%3Bdate' in \
                    rec.get('rest', {}).get('requestUrl', ''):
                yield rec

    def filter_humanized(self):
        """
        Get atom requests with 'source=toloka' in referer and
        put them in dataframe
        """
        print 'Filtering atom-log, putting to DataFrame...'
        results = []
        out_table = '//home/atom/chikachoff/atom_log_toloka_{}'.format(
            self.date)
        if self.delay:
            out_table = '_'.join(out_table, 'delay')
        cluster = clusters.Hahn(
            pool='search-research_{}'.format(self.username),
            token='AVImKQMAAAO3VDAg54V4QjeI8eaNZJSQfA'
        ).env(
            templates=dict(
                job_root='home/atom/humanized'
            )
        )
        if yt.exists(out_table):
            results.append(cluster.job().table(out_table))
        else:
            job = cluster.job()
            atomlog = job.table(self.humanized_table)
            atomlog_parsed = atomlog.map(self._filter)
            result = atomlog_parsed.put(out_table)
            results.append(result)
            job.run()
        frames = []
        for result in results:
            adf = result.read().as_dataframe()
            frames.append(adf)
        self.df = pd.concat(frames)
        print 'Filter atom-log done.'
        return self.df

    @staticmethod
    def get_referer(rest):
        """Parse rest and return referer dict"""
        request_url = rest.get('requestUrl')
        referer_string = urlparse.parse_qs(
            urlparse.urlsplit(request_url).query)['referer'][0]
        referer = urlparse.parse_qs(referer_string)
        for k, v in referer.iteritems():
            referer[k] = v[0]
        return referer

    @staticmethod
    def get_stage(referer):
        return int(referer['id'][-1])

    @staticmethod
    def get_id(referer):
        return int(referer['id'].split('_')[0])

    @staticmethod
    def get_toloka_answer(rest):
        toloka_answer = {}
        for answer in rest.get('answers'):
            if answer.get('name') == 'toloka':
                toloka_answer = answer
        return toloka_answer

    @staticmethod
    def get_show(answer, bannerid):
        if not answer:
            return False
        for doc in answer.get('docs'):
            if bannerid in doc.get('bannerId'):
                return True
        return False

    def write_debug_log_for_noshows(self):
        print 'Writing debug log for noshows ...'
        if not yt.exists(self.noshow_table):
            yt.create_table(self.noshow_table, recursive=True)
        yt.write_table(self.noshow_table, self.noshows)
        print '...done'

    def fill_stages(self, toloka_stages, bannerid):
        print 'Filling atom stages...'
        for idx in range(len(self.df)):
            elem = self.df.loc[idx, ['rest']]
            referer = self.get_referer(elem.rest)
            if not referer:
                continue
            stage = self.get_stage(referer)
            _id = self.get_id(referer)
            product = referer.get('product')
            if product != self.product:
                continue

            toloka_answer = self.get_toloka_answer(elem.rest)
            has_show = self.get_show(toloka_answer, bannerid)
            # Check if this id is in accepted toloka results
            task_id_in_toloka_stage = False
            if _id in [
                toloka_id
                for toloka_stage in toloka_stages
                for toloka_id in toloka_stage['indices']
                if toloka_stage.get('id') == stage
            ]:
                task_id_in_toloka_stage = True
                self.stages[stage][_id] = has_show
            # Log no-shows
            if task_id_in_toloka_stage and not has_show:
                self.noshows.append(elem.rest)
        print '...atom stages filled'
        return self.stages

    def calc_metrics(self, data):
        print 'Calculating atom log metrics...'
        if len(self.stages) < 1:
            print 'Got no data in self.stages.'
        print 'Atom stages: {}'.format(self.stages)
        for stage_idx, stage_data in self.stages.iteritems():
            atom_answers_count = len([shown for _id, shown in
                                      stage_data.iteritems() if shown])
            atom_requests_count = len(stage_data)
            if stage_idx in range(0, 4):
                # data[0]['atom_req_0'] ... data[0]['atom_req_3']
                data[0]['atom_req_'.format(stage_idx)] = atom_requests_count
                # data[0]['atom_ans_0'] ... data[0]['atom_ans_3']
                data[0]['atom_ans_'.format(stage_idx)] = atom_answers_count
            print 'Stage:{0}\tRequests:{1}\tAnswers:{2}\tRate:{3}'.format(
                stage_idx,
                atom_requests_count,
                atom_answers_count,
                round(atom_answers_count / atom_requests_count, 4))
        print '...atom log metrics ok'
        return data

    def calc_delay_metrics(self, data):
        data = self.calc_metrics([data])[0]
        metrics = {k: v for k, v in data.items()
                   if k in ('atom_req_3', 'atom_ans_3')}
        return metrics

    def source_from_bannerid(self, bannerid):
        source = bannerid.split('_')[-1]
        if source not in ('apps', 'cookie', 'products'):
            source = 'all'
        return source

    def get_show_source(self, answer, bannerid):
        show = {'all': False, 'apps': False, 'cookie': False,
                'products': False}
        if not answer:
            return show
        for doc in answer.get('docs'):
            if bannerid in doc.get('bannerId'):
                show[self.source_from_bannerid(doc.get('bannerId'))] = True
        return show

    def fill_stages_source(self, toloka_stages, bannerid):
        '''
        returns:
        self.stages = {
            0: {0: {'all': True, 'apps': False, 'cookie': False, 'products': True},
                ...
                100: {'all': True, 'apps': True, 'cookie': False, 'products': True}},
            1: {0: {'all': True, 'apps': False, 'cookie': True, 'products': False},
                ...
                100: {'all': True, 'apps': True, 'cookie': False, 'products': True}}
            }
        }
        '''
        def task_id_in_toloka_stage(_id, toloka_stages, stage):
            if _id in [
                toloka_id
                for toloka_stage in toloka_stages
                for toloka_id in toloka_stage['indices']
                if toloka_stage.get('id') == stage
            ]:
                return True

        print 'Filling atom _source_ stages...'
        for idx in range(len(self.df)):
            elem = self.df.loc[idx, ['rest']]
            referer = self.get_referer(elem.rest)
            if not referer:
                continue
            stage = self.get_stage(referer)
            _id = self.get_id(referer)
            product = referer.get('product')
            if product != self.product:
                continue
            toloka_answer = self.get_toloka_answer(elem.rest)
            show = self.get_show_source(toloka_answer, bannerid)
            # Check if this id is in accepted toloka results
            task_is_in_toloka = False
            if task_id_in_toloka_stage(_id, toloka_stages, stage):
                task_is_in_toloka = True
                self.stages[stage][_id] = show
            # Log no-shows
            if task_is_in_toloka and not show.get('all'):
                self.noshows.append(elem.rest)
        print '...atom stages filled'
        return self.stages

    def calc_metrics_source(self, data):
        atom_aggregated = {}
        for stage, ans_stats in self.stages.iteritems():
            stage_metrics = defaultdict(int)
            stage_metrics['atom_req'] = len(ans_stats)
            for _, show in ans_stats.iteritems():
                for source, shown in show.iteritems():
                    source = '_{}'.format(source).replace('_all', '')
                    stage_metrics['atom_ans{}'.format(source)] += int(shown)
            atom_aggregated[stage] = stage_metrics
        metrics = {
            '{}_{}'.format(metric, stage): value
            for stage, metrics in atom_aggregated.items()
            for metric, value in metrics.items()
        }
        data[0].update(metrics)
        return data

    def calc_delay_metrics_source(self, data):
        data = self.calc_metrics_source([data])[0]
        metrics = {k: v for k, v in data.items()
                   if k.endswith('_3')}
        return metrics
