#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
from __future__ import print_function
import sys
import os
import re
import codecs
import logging
import toml
import pdb
import argparse
import traceback
import mapreducelib
import threading
import urllib
import json
from time import sleep
try:
    import thread
except ImportError:
    import _thread as thread
from mapreducelib import MapReduce, Record
import urlparse
from collections import defaultdict, Counter, namedtuple
import datetime as dt
from datetime import datetime as dtdt
from pecheny.mrdef import defaults
from pecheny.moncommons import push_to_razladki

# PATH = ['12.1620.705', '12.1620.486']

PATH = {'12.1620.705': 'install',
        '12.1620.486': 'cancel'}
Parameter = namedtuple('Parameter', ['element', 'eventtype',
                                     'lang'])
Ident = namedtuple('Ident', ['browser', 'device',
                             'lang', 'candidate', 'product'])


def deutf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        key, subkey, value = rec.key, rec.subkey, rec.value
        if not isinstance(key, unicode):
            key = key.decode('utf8', errors='replace')
        if not isinstance(subkey, unicode):
            subkey = subkey.decode('utf8', errors='replace')
        if not isinstance(value, unicode):
            value = value.decode('utf8', errors='replace')
        return Record(key, subkey, value)
    elif isinstance(rec, str):
        rec = rec.decode('utf8', errors='replace')
    return rec


def utf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        if isinstance(rec.key, unicode):
            rec.key = rec.key.encode('utf8')
        if isinstance(rec.subkey, unicode):
            rec.subkey = rec.subkey.encode('utf8')
        if isinstance(rec.value, unicode):
            rec.value = rec.value.encode('utf8')
        return rec
    elif isinstance(rec, unicode):
        rec = rec.encode('utf8')
    return rec


def tryint(string):
    try:
        return int(string)
    except:
        return -1


def dttots(dt_):
    return int((dt_ - dtdt(1970, 1, 1)).total_seconds())


def parsevars(vars):
    commas = vars.split(',')
    result = {'clids': []}
    for x in commas:
        if len(x.split('=')) > 1:
            key = x.split('=')[0]
            value = '='.join(x.split('=')[1:])
            if key.startswith('-'):
                key = key[1:]
            if key.startswith('clid'):
                result['clids'].append(value)
            else:
                result[key] = value
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def parseparams(value):
    tabs = value.split('\t')
    result = {}
    for x in tabs:
        if len(x.split('=')) > 1:
            result[x.split('=')[0]] = '='.join(x.split('=')[1:])
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def tabulate(*args):
    return '\t'.join(map(format, args))


def ntabulate(*args):
    return tabulate(*args) + '\n'


def gettld(url):
    if not '//' in url:
        url = 'http://' + url
    parsed = urlparse.urlparse(url)
    nl = parsed.netloc
    return nl.split('.')[-1].split(':')[0]

HEADERS = {'StatRobotUser': 'robot_pecheny',
           'StatRobotPassword': 'OoGh1Adahy'}
URL = 'https://upload.stat.yandex-team.ru/_api/report/data'


def push_to_stat(event_dict, ts, logger):
    import arrow
    import requests
    tsv_data = ntabulate('fielddate', 'product', 'browser',
                         'device', 'lang', 'candidate',
                         'shows', 'installs', 'cancels')
    fd = arrow.get(ts).strftime('%Y-%m-%d %H:%M:00')
    total_dict = defaultdict(lambda: Counter())
    for ident in event_dict:
        tsv_data += ntabulate(fd, ident.product, ident.browser, ident.device,
                              ident.lang, ident.candidate,
                              event_dict[ident]['shows'], event_dict[
                                  ident]['installs'],
                              event_dict[ident]['cancels'])
        # 'browser', 'device', 'lang', 'candidate', 'product'
        once = False
        for comb in itertools.islice(itertools.product(
            (ident.browser, '_total_'),
            (ident.device, '_total_'),
            (ident.lang, '_total_'),
            (ident.candidate, '_total_'),
            (ident.product, '_total_'),
        ), 1, None):
            total_dict[Ident(*comb)] += event_dict[ident]
        # total_dict[ident._replace(product='_total_')] += event_dict[ident]
        # total_dict[ident._replace(browser='_total_')] += event_dict[ident]
        # total_dict[ident._replace(device='_total_')] += event_dict[ident]
        # total_dict[ident._replace(lang='_total_')] += event_dict[ident]
        # total_dict[ident._replace(candidate='_total_')] += event_dict[ident]
    for ident in total_dict:
        tsv_data += ntabulate(fd, ident.product, ident.browser, ident.device,
                              ident.lang, ident.candidate,
                              total_dict[ident]['shows'], total_dict[
                                  ident]['installs'],
                              total_dict[ident]['cancels'])
    # td_sorted = sorted(total_dict, key=lambda x: sum(total_dict[x].values()),
    #     reverse=True)
    # pdb.set_trace()
    logger.info(tsv_data)
    data = {
        "name": "Distribution/Others/AtomBanners",
        "scale": "i",
        "_append_mode": 1,
        "tsv_data": tsv_data,
    }
    r = requests.post(URL, headers=HEADERS, data=data)
    logger.info(r.text)


def reqlist(*listnames):
    return urllib.quote(json.dumps(
        {
            'atom-candidates':
            {
                        listname: None for listname in listnames
                        }
        }
    ).encode('utf8')).decode('utf8')

skeleton = ('http://querysearch.search.yandex.net/yandsearch'
            '?ms=querysearch:json:3&rearr=qd_struct_keys={}'
            '&waitall=da&timeout=1000000')
badlists = {'news', 'service_block_ru'}
inturllists = {'banana/1127_filtered'}


def get_unique_candidates(logger, debug=False):
    import requests
    import arrow
    req = requests.get(skeleton.format(reqlist('all_keys')))
    allkeysjson = json.loads(req.content)
    allkeys = allkeysjson['Data'][0]['Value']
    req1 = requests.get(skeleton.format(reqlist(*allkeys)))
    allcandidatesjson = json.loads(req1.content)
    data = allcandidatesjson['Data'][:len(allkeys)]
    if debug:
        with open('candidates_{}.json'.format(
                arrow.now().timestamp), 'w') as f:
            f.write(json.dumps(data, indent=4, ensure_ascii=False)
                    .encode('utf8'))
    keysset = defaultdict(lambda: set())
    result = {}
    for delem in data:
        identifier = 'url'
        listname = delem['Key'][0]
        if listname in badlists:
            continue
        if listname in inturllists:
            identifier = 'internal-url'
        for elem in delem['Value']:
            try:
                candidate_id = elem[identifier].split('/')[1]
            except (IndexError, KeyError):
                logger.error('Bad internal url in list {}: {}'.format(
                    listname, elem))
                continue
            # if candidate_id in keysset:
            #     logger.error('{} ({}) is duplicate in {} and {}'.format(
            #         candidate_id, elem[identifier],
            #         listname, keysset[candidate_id]))
            keysset[candidate_id].add(listname)
            if 'grouping-key' in elem:
                result[candidate_id] = elem['grouping-key']
    return result


class FirstMap(object):

    def __init__(self, PATH):
        self.PATH = PATH

    def __call__(self, rec):
        from collections import defaultdict
        rec = deutf8ify(rec)
        params = parseparams(rec.value)
        pvars = parsevars(params['vars'])
        lang = ''
        if params['type'] in {'CLICK', 'TECH'}:
            lang = gettld(params['referer'])
        elif params['type'] == 'REQUEST':
            lang = gettld(params['full-request'])
        if not lang:
            lang = params['dom-region']
        # if params['type'] == 'CLICK' and params['path'] in self.PATH:
        #     yield utf8ify(
        #         Record(
        #             rec.key,
        #             rec.subkey,
        #             tabulate('wiz_chrome',
        #                 self.PATH[params['path']],
        #                 lang)
        #             ))
        # elif params['type'] == 'CLICK' and re.search(
        #     r'^707.(1651|1652|1870|1873|1879|1878).[0-9]+.494|707.1432.494$',
        #     params['path']):
        #     yield utf8ify(
        #         Record(
        #             rec.key,
        #             rec.subkey,
        #             tabulate('distr_stripe_serp',
        #                 'install',
        #                 lang)
        #             ))
        # elif params['type'] == 'BLOCKSTAT' and '/stripe' in rec.value:
        #     yield utf8ify(
        #         Record(
        #             rec.key,
        #             rec.subkey,
        #             tabulate('distr_stripe_serp',
        #                 'show',
        #                 lang)
        #             ))
        # elif params['type'] == 'CLICK' and re.search(
        #     r'^707.(1651|1652|1870|1873|1879|1878).[0-9]+.494|707.1432.1780$',
        #     params['path']):
        #     yield utf8ify(
        #         Record(
        #             rec.key,
        #             rec.subkey,
        #             tabulate('distr_stripe_serp',
        #                 'close',
        #                 lang)
        #             ))
        # elif (params['type'] == 'REQUEST'
        #     and 'default_search_wizard' in rec.value):
        #     yield utf8ify(
        #         Record(
        #             rec.key,
        #             rec.subkey,
        #             tabulate('wiz_chrome', 'show', lang)
        #             ))
        # elif (params['type'] == 'REQUEST'
        #     and 'distr_stripe_wizard' in rec.value):
        #     yield utf8ify(
        #         Record(
        #             rec.key,
        #             rec.subkey,
        #             tabulate('distr_stripe_serp_atom', 'show', lang)
        #             ))
        if (params['type'] == 'TECH'
                and params['path'] == 'tech.portal-ads.default_search'):
            yield utf8ify(
                Record(
                    rec.key,
                    rec.subkey,
                    tabulate('default_search',
                             pvars['eventtype'],
                             params['dom-region'],
                             pvars['bannerid'])
                ))
        elif (params['type'] == 'TECH'
              and params['path'] == 'tech.portal-ads.smart-banner'):
            yield utf8ify(
                Record(
                    rec.key,
                    rec.subkey,
                    tabulate('smart-banner',
                             pvars['eventtype'],
                             params['dom-region'],
                             pvars['bannerid'])
                ))
        elif (params['type'] == 'TECH'
              and params['path'] == 'tech.portal-ads.distr_stripe'):
            yield utf8ify(
                Record(
                    rec.key,
                    rec.subkey,
                    tabulate('distr_stripe',
                             pvars['eventtype'],
                             params['dom-region'],
                             pvars['bannerid'])
                ))


def get_lastts():
    with open('fastlogs_last_timestamp') as f:
        ts = f.read()
    return int(ts)


def set_lastts(ts):
    with open('fastlogs_last_timestamp', 'w') as f:
        f.write(format(ts))


def tstable(table):
    return tryint(table.split('/')[-1])


def get_srctables(lb=None, ub=None, alltables=None):
    if not alltables:
        alltables = get_alltables()
    if not lb:
        lb = get_lastts()
    if not ub:
        ub = 9999999999
    result = [x for x in alltables if tstable(x) > lb and tstable(x) <= ub]
    return result


def get_alltables():
    alltables = MapReduce.getTablesInfo('fast_logs/user_sessions/*')
    alltables = sorted([x.name for x in alltables if x.name.endswith('0')])
    return alltables


def main():
    global __file__                         # to fix stupid
    __file__ = os.path.abspath(__file__)    # __file__ handling
    _file_ = os.path.basename(__file__)     # in python 2
    import arrow

    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--savecands', action='store_true')
    parser.add_argument('--config', default=None)
    parser.add_argument('--datetimefrom', default=None)
    parser.add_argument('--datetimeto', default=None)
    parser.add_argument('--timestamp', action='store_true')
    args = parser.parse_args()
    start = int((dtdt.now() - dtdt(1970, 1, 1)).total_seconds())

    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    # load config
    with open('basic.toml', 'r') as f:
        config = toml.loads(f.read())
    os.chdir(os.path.dirname(__file__))
    with open('distribution.toml', 'r') as f:
        config.update(toml.loads(f.read()))
    if args.config is None:
        config.update(toml.loads(open(_file_[:-3] + '.toml').read()))
    else:
        config.update(toml.loads(open(args.config).read()))

    candidates = get_unique_candidates(logger, debug=args.savecands)
    if args.savecands:
        with open('candidate_to_product_{}.json'.format(
                arrow.now().timestamp), 'w') as f:
            f.write(json.dumps(candidates, indent=4, ensure_ascii=False))

    defaults()
    MapReduce.useDefaults(server=config['mr_server'])
    alltables = get_alltables()
    if not(args.datetimefrom and args.datetimeto):
        lastts = get_lastts()
        srctables = get_srctables(lb=lastts)
        while len(srctables) > 0:
            t = process_date(srctables[0], logger, config, candidates)
            processed_ts = tstable(srctables[0])
            if t and processed_ts > get_lastts():
                set_lastts(processed_ts)
            srctables = get_srctables()
        logger.info("No new data. Latest counted ts is {}"
                    .format(lastts))
        sys.exit(0)
    else:
        df = args.datetimefrom
        lb = (int(df)
              if args.timestamp
              else arrow.get(df, 'YYYYMMDDHHmm',
                             tzinfo='Europe/Moscow').timestamp)
        ub = (int(args.datetimeto)
              if args.timestamp
              else arrow.get(args.datetimeto, 'YYYYMMDDHHmm',
                             tzinfo='Europe/Moscow').timestamp)
        srctables = get_srctables(lb=lb, ub=ub)
        for srctable in srctables:
            t = process_date(srctable, logger, config, candidates)

event_dict = {
    'close_stripe': 'cancels',
    'closestripe': 'cancels',
    'close-stripe': 'cancels',
    'close': 'cancels',
    'cancel': 'cancels',
    'show': 'shows',
    'download': 'installs',
    'install': 'installs',
}


def process_date(srctable, logger, config, candidates):
    logger.info('Source table is {}'.format(srctable))
    ts = int(srctable.split('/')[-1])
    dsttable = 'tmp/pers/set_installs_fastlogs_{}'.format(ts)
    first_map = FirstMap(PATH)
    if len(list(MapReduce.getSample(dsttable, count=1))) != 1:
        success = False
        while not success:
            try:
                logger.info('Mapping from {} to {}'.format(
                    srctable, dsttable))
                MapReduce.runMap(first_map, srcTable=srctable,
                                 dstTable=dsttable)
                success = True
            except:
                logger.error(traceback.format_exc())

    if len(list(MapReduce.getSample(dsttable, count=1))) == 1:
        counter = Counter()
        ident_dict = defaultdict(lambda: Counter())
        for rec in MapReduce.getSample(dsttable, count=None):
            rec1 = deutf8ify(rec)
            parameter = Parameter(
                element=rec1.value.split('\t')[0],
                eventtype=rec1.value.split('\t')[1],
                lang=rec1.value.split('\t')[2])
            bannerid = rec1.value.split('\t')[3]
            counter[parameter] += 1
            if parameter.eventtype in event_dict and bannerid in candidates:
                ident_dict[Ident(
                    browser='unknown',
                    device='unknown',
                    lang=parameter.lang,
                    candidate=bannerid,
                    product=candidates[bannerid]
                )][event_dict[parameter.eventtype]] += 1
        push_to_stat(event_dict, ts, logger)
        for param in counter:
            desc = '{lang}_{elem}_set_{event}s_fastlogs'.format(
                lang=param.lang,
                elem=param.element,
                event=param.eventtype)
            value = counter[param]
            logger.info('Pushing to razladki: {}, {}'
                        .format(desc, value))
            push_to_razladki(config, desc, value, ts=ts)
            if param.eventtype in {'click', 'install', 'cancel', 'close'}:
                try:
                    rdesc = '{lang}_{elem}_set_{event}srate_fastlogs'.format(
                        lang=param.lang,
                        elem=param.element,
                        event=param.eventtype)
                    showvalue = counter[param._replace(eventtype='show')]
                    rvalue = value / float(showvalue)
                    logger.info('Pushing to razladki: {}, {}'
                                .format(rdesc, rvalue))
                    push_to_razladki(config, rdesc, rvalue, ts=ts)
                except:
                    logger.info(traceback.format_exc())
            if (param.eventtype == 'close'
                    and param._replace(eventtype='click') in counter):
                try:
                    rdesc = '{lang}_{elem}_set_closeclickrate_fastlogs'.format(
                        lang=param.lang,
                        elem=param.element)
                    showvalue = counter[param._replace(eventtype='click')]
                    rvalue = value / float(showvalue)
                    logger.info('Pushing to razladki: {}, {}'
                                .format(rdesc, rvalue))
                    push_to_razladki(config, rdesc, rvalue, ts=ts)
                except:
                    logger.info(traceback.format_exc())
        return True
    else:
        logger.critical('No installs/shows/cancels at {} or something went wrong.'
                        .format(ts))
        return False


if __name__ == "__main__":
    main()
