#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import codecs
import argparse
from mrdef import defaults
from mapreducelib import MapReduce, Record
from collections import defaultdict, Counter
from putils import utf8ify, deutf8ify, make_logger, parsevars
import base64
import re
import pdb
import psutil
import scarab.main
import datetime
import requests
import subprocess
import shutil
import urllib
import json
try:
    basestring
except NameError:
    basestring = str


def get_alltables():
    return sorted(
        [x.name for x in
            MapReduce.getTablesInfo(
                'statbox/atomfront/atomfront-answer-log/*'
            )
            if len(x.name.split('/')) == 4
         ]
    )

URL = 'http://atom-admin.n.yandex-team.ru/atom/api/v1/'
i = 0


def get_token():
    with codecs.open('.atom_token', 'r') as f:
        return f.read().rstrip()


def safediv(x, y):
    try:
        return x / y
    except ZeroDivisionError:
        return 0

HEADERS = {}


def get_candidates(raw=False):
    # req = requests.get(
    #     'http://sas1-5350.search.yandex.net:10260/_golovan')
    # obj = json.loads(req.content.decode('utf8'))
    # cands = [x for x in obj if 'CandidatesVersion' in x[0]]
    # task_id = cands[0][1]
    req = requests.get(
        URL + 'version/production', headers=HEADERS)
    task_id = json.loads(req.content.decode('utf8'))['task_id']
    sb_req = requests.get(
        'https://sandbox.yandex-team.ru:443/api/v1.0/resource?'
        'type=PERS_ATOM_CANDIDATES&task_id={}&limit=1'
        .format(task_id))
    sb_obj = json.loads(sb_req.content.decode('utf8'))
    rslink = sb_obj['items'][0]['http']['links'][0].replace(
        'http://', 'rsync://')
    rslink = re.sub(r':[0-9]+', '/sandbox-tasks', rslink)
    subprocess.call(['rsync', '-r', '{}'.format(rslink), '.'])
    result = {}
    if raw:
        allcands = []
    for x in os.walk('atom_candidates'):
        for y in x[2]:
            if y.endswith('.json'):
                obj = json.load(open(os.path.join(x[0], y)))
                if raw:
                    allcands.extend(obj)
                for cand in obj:
                    result[cand.get('internal-url', '')
                           .split('/')[-1]] = cand.get('__product', '')
    subprocess.call(['chmod', '-R', '+w', 'atom_candidates'])
    shutil.rmtree('atom_candidates')
    if raw:
        return allcands
    return result

# class AtomfrontMap(object):
#     def __init__(self, timestamp, products):
#         self.timestamp = timestamp
#         self.products = products

#     def __call__(self, rec):
#         rec = deutf8ify(rec)
#         data = rec.value.split('\\t')[-1]
#         decoded = base64.b64decode(data)
#         parsed = scarab.main.deserialize_event_from_str(decoded)
#         client = parsed.client
#         try:
#             for answer in parsed.answer:
#                 subclient = answer.name
#                 if len(answer.docs) > 0:
#                     score = round(answer.docs[0].score, 5)
#                     if answer.docs[0].banner_id in self.products:
#                         product = self.products[answer.docs[0].banner_id]
#                         yield utf8ify(Record(
#                             '{}/{}_{}'.format(client, subclient, product),
#                             str(self.timestamp),
#                             str(score)
#                         ))
#         except AttributeError:
#             pass


class AtomfrontMap(object):

    def __init__(self, timestamp, products):
        self.timestamp = timestamp
        self.products = products

    def __call__(self, rec):
        rec = deutf8ify(rec)
        data = [x.split('\\t')[-1] for x in rec.value.split('\n')]
        decoded = [base64.b64decode(x) for x in data if x]
        parsed = [scarab.main.deserialize_event_from_str(d)
                  for d in decoded if d]
        for event in parsed:
            client = event.client
            if hasattr(event, 'answer'):
                for answer in event.answer:
                    subclient = answer.name
                    clientkey = '{}/{}'.format(client, subclient)
                    try:
                        listname = answer.aux_info.props.json.get(
                            'stored-candidate-list', ''
                        )
                    except:
                        listname = ''
                    try:
                        obj = answer.docs[0].source_aux.json
                        yield utf8ify(Record(
                            clientkey,
                            'show',
                            '1'
                        ))
                        product = parsevars(obj['counters'][
                                            'show']['vars'])['product']
                        yield utf8ify(Record(
                            clientkey + '_' + product,
                            'show',
                            '1'
                        ))
                        if listname:
                            yield utf8ify(Record(
                                clientkey + '_' + listname,
                                'show',
                                '1'
                            ))
                    except (IndexError, AttributeError):
                        yield utf8ify(Record(
                            clientkey,
                            'show',
                            '0'
                        ))
                        if listname:
                            yield utf8ify(Record(
                                clientkey + '_' + listname,
                                'show',
                                '0'
                            ))
                        try:
                            obj = answer.aux_info.candidate_infos[0]
                            product = self.products[obj.banner_id]
                            yield utf8ify(Record(
                                clientkey + '_' + product,
                                'show',
                                '0'
                            ))
                        except:
                            pass
                    warnings = len(answer.warnings)
                    if warnings > 0:
                        yield utf8ify(Record(
                            clientkey,
                            'warnings_{}'.format(answer.warnings[0][:5]),
                            '1'
                        ))
                        if listname:
                            yield utf8ify(Record(
                                clientkey + '_' + listname,
                                'warnings_{}'.format(answer.warnings[0][:5]),
                                '1'
                            ))
                    yield utf8ify(Record(
                        clientkey,
                        'warnings',
                        str(warnings)
                    ))
                    if listname:
                        yield utf8ify(Record(
                            clientkey + '_' + listname,
                            'warnings',
                            str(warnings)
                        ))
                    if answer.rerank_success is True:
                        yield utf8ify(Record(
                            clientkey,
                            'rerank_success',
                            '1'
                        ))
                        if listname:
                            yield utf8ify(Record(
                                clientkey + '_' + listname,
                                'rerank_success',
                                '1'
                            ))
                    elif answer.rerank_success is False:
                        yield utf8ify(Record(
                            clientkey,
                            'rerank_success',
                            '-1'
                        ))
                        if listname:
                            yield utf8ify(Record(
                                clientkey + '_' + listname,
                                'rerank_success',
                                '-1'
                            ))
                    else:
                        yield utf8ify(Record(
                            clientkey,
                            'rerank_success',
                            '0'
                        ))
                        if listname:
                            yield utf8ify(Record(
                                clientkey + '_' + listname,
                                'rerank_success',
                                '0'
                            ))


def atomfront_reduce(key, recs):
    key = deutf8ify(key)
    dct = defaultdict(lambda: Counter())
    for rec in recs:
        rec = deutf8ify(rec)
        dct[rec.subkey][int(rec.value)] += 1
    if 'show' in dct:
        yield utf8ify(Record(
            key,
            'show',
            format(dct['show'][1])
        ))
        yield utf8ify(Record(
            key,
            'noshow',
            format(dct['show'][0])
        ))
        yield utf8ify(Record(
            key,
            'showshare',
            format(dct['show'][1] / (dct['show'][0] + dct['show'][1]))
        ))
    if 'rerank_success' in dct:
        yield utf8ify(Record(
            key,
            'rerank',
            format(dct['rerank_success'][1])
        ))
        yield utf8ify(Record(
            key,
            'norerank',
            format(dct['rerank_success'][0])
        ))
        yield utf8ify(Record(
            key,
            'rerankfalse',
            format(dct['rerank_success'][-1])
        ))
        yield utf8ify(Record(
            key,
            'rerankshare',
            format(safediv(dct['rerank_success'][1],
                           (dct['rerank_success'][-1] +
                            dct['rerank_success'][1])))
        ))
        yield utf8ify(Record(
            key,
            'rerankfalseshare',
            format(safediv(dct['rerank_success'][-1],
                           (dct['rerank_success'][-1] +
                            dct['rerank_success'][1])))
        ))
    if 'warnings' in dct:
        for x in dct['warnings']:
            yield utf8ify(Record(
                key,
                'warnings_{}'.format(x),
                format(dct['warnings'][x])
            ))
        yield utf8ify(Record(
            key,
            'warningsrate',
            format(sum(dct['warnings'][x] for x in dct['warnings'] if x > 0) /
                   sum(dct['warnings'].values()))
        ))
    wngs = [x for x in dct if x.startswith('warnings') and x != 'warnings']
    for x in wngs:
        yield utf8ify(Record(
            key,
            x,
            format(dct[x][1])
        ))


def reqlist(*listnames):
    return urllib.quote(json.dumps(
        {
            'atom-candidates':
            {
                        listname: None for listname in listnames
                        }
        }
    ).encode('utf8')).decode('utf8')


skeleton = ('http://querysearch-atom.search.yandex.net/yandsearch'
            '?ms=querysearch:json:3'
            '&rearr=qd_struct_keys={}'
            '&waitall=da'
            '&timeout=1000000')
badlists = {'news', 'service_block_ru'}


# def get_unique_candidates(logger=None, debug=False):
#     import requests
#     import arrow
#     req = requests.get(skeleton.format(reqlist('all_keys')))
#     allkeysjson = json.loads(req.content)
#     allkeys = allkeysjson['Data'][0]['Value']
#     data = []
#     for key in allkeys:
#         req1 = requests.get(skeleton.format(reqlist(key)))
#         cands = json.loads(req1.content)
#         cands1 = cands['Data'][:1]
#         data.extend(cands1)
#     if debug:
#         with open('candidates_{}.json'.format(
#                 arrow.now().timestamp), 'w') as f:
#             f.write(json.dumps(data, indent=4, ensure_ascii=False)
#                     .encode('utf8', errors='replace'))
#     keysset = defaultdict(lambda: set())
#     result = {}
#     for delem in [x for x in data if 'Key' in x]:
#         identifier = 'internal-url'
#         listname = delem['Key'][0]
#         if listname in badlists:
#             continue
#         for elem in delem['Value']:
#             try:
#                 candidate_id = elem[identifier].split('/')[-1]
#             except (IndexError, KeyError):
#                 continue
#             keysset[candidate_id].add(listname)
#             if elem.get('__product'):
#                 result[candidate_id] = elem['__product']
#     return result


# def atomfront_reduce(key, recs):
#     key = deutf8ify(key)
#     counter = Counter()
#     for rec in recs:
#         rec = deutf8ify(rec)
#         counter[float(rec.value)] += 1
#     counter.pop(1, None)
#     for q in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
#         yield utf8ify(Record(
#             key,
#             format(q),
#             format(counter_quantile(counter, q))
#         ))


def get_timestamp(t):
    if not isinstance(t, basestring):
        t = t.name
    return t.split('/')[-1].split('-')[0]


def get_srctables(lastts, alltables=None):
    if not alltables:
        alltables = get_alltables()
    srctables = [x
                 for x in alltables
                 if int(get_timestamp(x)) > int(lastts) and
                 datetime.datetime.fromtimestamp(
                     int(get_timestamp(x)))
                 .minute % 15 == 0][:-1]
    return srctables


def push_to_razladki(data, overwrite=False):
    url = ('http://launcher.razladki.yandex-team.ru/'
           'save_new_data_json/SearchPortalDistribution')
    requests.post(url, json={'data': data, 'override': overwrite})


def get_lastts():
    try:
        with codecs.open('{}/atomfront_5m_an_lastts'.format(
            os.path.dirname(os.path.abspath(__file__))
        ), 'r', 'utf8') as f:
            return f.read()
    except:
        return (datetime.datetime.now() -
                datetime.timedelta(minutes=30)).strftime('%s')


def set_lastts(ts):
    with codecs.open('{}/atomfront_5m_an_lastts'.format(
        os.path.dirname(os.path.abspath(__file__))
    ), 'w', 'utf8') as f:
        f.write(ts)


def main():
    global HEADERS
    HEADERS = {'Authorization': 'Token {}'.format(get_token())}
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--nolock', action='store_true')
    parser.add_argument('--persistent', action='store_true')
    parser.add_argument('--datetimefrom')
    parser.add_argument('--datetimeto')
    args = parser.parse_args()
    logger = make_logger(os.path.abspath(__file__), debug=args.debug)
    defaults()
    MapReduce.useDefaults(username='personalization')
    if not args.nolock:
        check_if_locked(os.path.abspath('atomlog_an_lock.txt'), logger)
    products = get_candidates()
    if args.datetimefrom:
        datetimefrom = int(
            datetime.datetime.strptime(
                args.datetimefrom, '%Y%m%d%H%M'
            ).strftime('%s')
        )
    if args.datetimeto:
        datetimeto = int(
            datetime.datetime.strptime(
                args.datetimeto, '%Y%m%d%H%M'
            ).strftime('%s')
        )
    if args.datetimefrom and args.datetimeto:
        alltables = get_alltables()
        srctables = [
            x for x in alltables if
            int(datetimefrom) <= int(get_timestamp(x)) <= int(datetimeto)
        ]
        for table in srctables:
            process_table(table, overwrite=True, logger=logger,
                          products=products, persistent=args.persistent)
        with codecs.open('atomlog_an_lock.txt', 'w', 'utf8') as f:
            f.write('free')
    else:
        srctables = get_srctables(get_lastts())
        while len(srctables) > 0:
            t = process_table(srctables[0], overwrite=True, logger=logger,
                              products=products, persistent=args.persistent)
            if t:
                set_lastts(get_timestamp(srctables[0]))
            srctables = get_srctables(get_lastts())
        logger.info("No new data. Latest counted date is {}"
                    .format(get_lastts()))
        with codecs.open('atomlog_an_lock.txt', 'w', 'utf8') as f:
            f.write('free')


def check_if_locked(lock, logger):
    with codecs.open(lock, 'r', 'utf8') as f:
        contents = f.read().rstrip()
    search_for_process = [p for p in psutil.process_iter()
                          if (os.path.basename(__file__)
                              in ' '.join(p.cmdline())) and
                          not ('nolock' in ' '.join(p.cmdline())) and
                          not ('mapreduce' in ' '.join(p.cmdline()))]
    if 'locked' in contents and len(search_for_process) > 1:
        logger.info('Process is locked, exiting...')
        sys.exit(0)
    else:
        with codecs.open(lock, 'w', 'utf8') as f:
            f.write('locked at {}'.format(datetime.datetime.now()))


def process_table(table, overwrite=False, logger=None, products=None,
                  persistent=False):
    if not products:
        products = {}
    atomfront_map = AtomfrontMap(get_timestamp(table), products)
    tmp1 = 'tmp/pers/atomfront{}_an_map'.format(
        get_timestamp(table)
    )
    tmp2 = 'tmp/pers/atomfront{}_an_reduce'.format(
        get_timestamp(table)
    )
    t = datetime.datetime.now()
    logger.info('mapping from {} to {}'.format(table, tmp1))
    MapReduce.runMap(atomfront_map,
                     srcTable=table,
                     dstTable=tmp1)
    logger.info('reducing from {} to {}'.format(tmp1, tmp2))
    MapReduce.runReduce(atomfront_reduce,
                        srcTable=tmp1,
                        dstTable=tmp2)
    logger.info('getting records from {}'.format(tmp2))
    records = list(MapReduce.getSample(tmp2))
    data = []
    logger.info('pushing to razladki')
    for rec in records:
        rec = deutf8ify(rec)
        data.append({
                    'param': 'atomlog_{}_{}'.format(rec.key, rec.subkey),
                    'ts': get_timestamp(table),
                    'value': rec.value,
                    })
    push_to_razladki(data, overwrite=overwrite)
    if not persistent:
        MapReduce.dropTable(tmp1)
        MapReduce.dropTable(tmp2)
    logger.info('total time: {}'.format((datetime.datetime.now() - t)
                                        .total_seconds() / 60))
    return True

if __name__ == "__main__":
    main()
