#!/usr/bin/python
# -*- coding: UTF-8 -*-

import json
import nile
import pandas as pd
import shlex
import subprocess
import urlparse
from collections import defaultdict, namedtuple
from nile.api.v1 import (
    clusters,
    Record
)


class Atomhelper:
    def __init__(self, date, product, url_pattern):
        self.username = 'chikachoff'
        self.date = date
        self.product = product
        self.url_pattern = url_pattern
        self.humanized_table = "//logs/atomfront-reqans-log/1d/{}".format(self.date)
        self.df = None
        self.noshows = []
        self.stages = defaultdict(dict)

    @staticmethod
    def _filter(records):
        for rec in records:
            if rec.get('client', '') != 'distr_portal':
                continue
            if 'yandex.ru%2F%3Fsource%3Dtoloka%26amp%3Bdate' in rec.get('rest', {}).get('requestUrl', ''):
                yield rec

    def filter_humanized(self):
        """
        Get atom requests with 'source=toloka' in referer and
        put them in dataframe
        """
        print 'Filtering atom-log, putting to DataFrame...'
        results = []
        cluster = clusters.Hahn(
            pool='search-research_{}'.format(self.username),
            token='AVImKQMAAAO3VDAg54V4QjeI8eaNZJSQfA'
        ).env(
            templates=dict(
                job_root='home/atom/humanized'
            )
        )
        job = cluster.job()
        atomlog = job.table(self.humanized_table)
        atomlog_parsed = atomlog.map(self._filter)
        result = atomlog_parsed.put(
            "//home/atom/chikachoff/atom_log_toloka_{}".format(self.date)
        )
        results.append(result)
        job.run()
        frames = []
        for result in results:
            adf = result.read().as_dataframe()
            frames.append(adf)
        self.df = pd.concat(frames)
        print 'Filter atom-log done.'
        return self.df

    @staticmethod
    def get_referer(rest):
        """Parse rest and return referer dict"""
        request_url = rest.get('requestUrl')
        referer_string = urlparse.parse_qs(
            urlparse.urlsplit(request_url).query)['referer'][0]
        referer = urlparse.parse_qs(referer_string)
        for k, v in referer.iteritems():
            referer[k] = v[0]
        return referer

    @staticmethod
    def get_stage(referer):
        return int(referer['id'][-1])

    @staticmethod
    def get_id(referer):
        return int(referer['id'].split('_')[0])

    @staticmethod
    def get_toloka_answer(rest):
        toloka_answer = {}
        for answer in rest.get('answers'):
            if answer.get('name') == 'toloka':
                toloka_answer = answer
        return toloka_answer

    @staticmethod
    def get_show(answer, bannerid):
        if not answer:
            return False
        for doc in answer.get('docs'):
            if doc.get('bannerId') == bannerid:
                return True
        return False

    def fill_stages(self, toloka_stages, bannerid, atom_df):
        print 'Starting fill atom stages...'
        self.df = atom_df
        try:
            for idx in range(len(self.df)):
                elem = self.df.loc[idx, ['rest']]
                referer = self.get_referer(elem.rest)
                if not referer:
                    continue
                stage = self.get_stage(referer)
                _id = self.get_id(referer)
                product = referer.get('product')
                if product != self.product:
                    continue

                toloka_answer = self.get_toloka_answer(elem.rest)
                has_show = self.get_show(toloka_answer, bannerid)
                # Check if this id is in accepted toloka results
                task_id_in_toloka_stage = False
                if _id in [
                    toloka_id
                    for toloka_stage in toloka_stages
                    for toloka_id in toloka_stage['indices']
                    if toloka_stage.get('id') == stage
                ]:
                    task_id_in_toloka_stage = True
                    self.stages[stage][_id] = has_show
                # Log no-shows
                if task_id_in_toloka_stage and not has_show:
                    self.noshows.append(elem.rest)
        except:
            import pdb; pdb.set_trace()
        print 'Atom Stages filled'
        return self.stages

    def write_debug_log_for_noshows(self):
        print 'Writing debug log for noshows ...'
        with open('no_shows_{}'.format(self.date), 'w') as fp:
            for elem in self.noshows:
                fp.write(json.dumps(elem) + '\n')
        print '''
        Debug log for no shows written to:
        /home/chikachoff/atom/recall/no_shows_{} at pecheny.haze.yandex.net
        '''.format(self.date)

    def calc_metrics(self, data):
        print 'Calculating atom log metrics'
        if len(self.stages) < 1:
            print 'Got no data in self.stages.'
        print 'Atom stages:'
        print self.stages
        for stage_idx, stage_data in self.stages.iteritems():
            atom_answers_count = len([shown for _id, shown in
                                      stage_data.iteritems() if
                                      shown])
            atom_requests_count = len(stage_data)
            if stage_idx == 0:
                data[0]['atom_req_0'] = atom_requests_count
                data[0]['atom_ans_0'] = atom_answers_count
            elif stage_idx == 1:
                data[0]['atom_req_1'] = atom_requests_count
                data[0]['atom_ans_1'] = atom_answers_count
            elif stage_idx == 2:
                data[0]['atom_req_2'] = atom_requests_count
                data[0]['atom_ans_2'] = atom_answers_count
            print 'Stage: {stage_idx} \t \
                   RequestCount: {atom_requests_count} \t \
                   AnswerCount: {atom_answers_count} \t \
                   AnswerRate: {AnswerRate}'.format(
                stage_idx=stage_idx,
                atom_requests_count=atom_requests_count,
                atom_answers_count=atom_answers_count,
                AnswerRate=round(atom_answers_count / atom_requests_count, 2)
            )
        print 'Atomlog metrics:\tOK'
        return data
