#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
from __future__ import print_function
import sys
import os
import re
import codecs
import logging
import toml
import argparse
import traceback
import mapreducelib
import threading
from time import sleep
try:
    import thread
except ImportError:
    import _thread as thread
from mapreducelib import MapReduce, Record
import urlparse
from collections import defaultdict, Counter, namedtuple
import datetime as dt
from datetime import datetime as dtdt

# PATH = ['12.1620.705', '12.1620.486']

PATH = {'12.1620.705': 'install',
        '12.1620.486': 'cancel'}


def deutf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        key, subkey, value = rec.key, rec.subkey, rec.value
        if not isinstance(key, unicode):
            key = key.decode('utf8', errors='replace')
        if not isinstance(subkey, unicode):
            subkey = subkey.decode('utf8', errors='replace')
        if not isinstance(value, unicode):
            value = value.decode('utf8', errors='replace')
        return Record(key, subkey, value)
    elif isinstance(rec, str):
        rec = rec.decode('utf8', errors='replace')
    return rec


def utf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        if isinstance(rec.key, unicode):
            rec.key = rec.key.encode('utf8')
        if isinstance(rec.subkey, unicode):
            rec.subkey = rec.subkey.encode('utf8')
        if isinstance(rec.value, unicode):
            rec.value = rec.value.encode('utf8')
        return rec
    elif isinstance(rec, unicode):
        rec = rec.encode('utf8')
    return rec


def tryint(string):
    try:
        return int(string)
    except:
        return -1


def parsevars(vars):
    commas = vars.split(',')
    result = {'clids': []}
    for x in commas:
        if len(x.split('=')) > 1:
            key = x.split('=')[0]
            value = '='.join(x.split('=')[1:])
            if key.startswith('-'):
                key = key[1:]
            if key.startswith('clid'):
                result['clids'].append(value)
            else:
                result[key] = value
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def parseparams(value, sep='\t'):
    tabs = value.split(sep)
    result = {}
    for x in tabs:
        if len(x.split('=')) > 1:
            result[x.split('=')[0]] = '='.join(x.split('=')[1:])
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def tabulate(*args):
    return '\t'.join(map(format, args))


def gettld(url):
    if not '//' in url:
        url = 'http://' + url
    parsed = urlparse.urlparse(url)
    nl = parsed.netloc
    return nl.split('.')[-1].split(':')[0]

# def getvalue(string,val,d="\t"):
#     rv=""
#     tabs=string.split(d)
#     for k in tabs:
#         if k[0:(len(val)+1)]==val+"=":
#             rv=k[(len(val)+1):]
#     return rv


class FirstMap(object):

    def __init__(self, PATH):
        self.PATH = PATH

    def __call__(self, key, recs):
        from collections import defaultdict
        rec = deutf8ify(rec)
        lang = ''
        clicks = defaultdict(lambda: [])
        wizardreqids = set()
        stripereqids = set()
        for rec_ in recs:
            rec = deutf8ify(rec_)
            params = parseparams(rec.value)
            reqid = params['reqid']
            vars = parsevars(params['vars'])
            if params['type'] in {'CLICK', 'TECH'}:
                lang = gettld(params['referer'])
            elif params['type'] == 'REQUEST':
                lang = gettld(params['full-request'])
            if not lang:
                lang = params['dom-region']
            if params['type'] == 'CLICK' and params['path'] in self.PATH:
                clicks.append(reqid, (
                    Record(
                        rec.key,
                        rec.subkey,
                        tabulate('wiz_chrome',
                                 self.PATH[params['path']],
                                 lang)
                    )))
            elif params['type'] == 'CLICK' and re.search(
                r'^707.(1651|1652|1870|1873|1879|1878).[0-9]+.494|707.1432.494$',
                    params['path']):
                clicks.append(reqid, (
                    Record(
                        rec.key,
                        rec.subkey,
                        tabulate('distr_stripe_serp',
                                 'install',
                                 lang)
                    )))
            elif params['type'] == 'BLOCKSTAT' and '/stripe' in rec.value:
                clicks.append(reqid, (
                    Record(
                        rec.key,
                        rec.subkey,
                        tabulate('distr_stripe_serp',
                                 'show',
                                 lang)
                    )))
            elif params['type'] == 'CLICK' and re.search(
                r'^707.(1651|1652|1870|1873|1879|1878).[0-9]+.494|707.1432.1780$',
                    params['path']):
                clicks.append((reqid, Record(
                    rec.key,
                    rec.subkey,
                    tabulate('distr_stripe_serp',
                             'close',
                             lang)
                )))
            elif (params['type'] == 'REQUEST'
                  and 'default_search_wizard' in rec.value):
                wizardreqids.add(reqid)
                yield utf8ify(
                    Record(
                        rec.key,
                        rec.subkey,
                        tabulate('wiz_chrome', 'show', lang, 'atom')
                    ))
            elif (params['type'] == 'REQUEST'
                  and 'distr_stripe_wizard' in rec.value):
                stripereqids.add(reqid)
            elif (params['type'] == 'TECH'
                  and params['path'] == 'tech.portal-ads.distr_stripe'):
                yield utf8ify(
                    Record(
                        rec.key,
                        rec.subkey,
                        tabulate('distr_stripe',
                                 vars['eventtype'],
                                 params['dom-region'],
                                 'atom')
                    ))
        for click in clicks:
            source = ''
            if click[0] in wizardreqids:
                source = 'wizard'
            if click[0] in stripereqids:
                source += 'stripe'
            if not source:
                source = 'bk'
            r = click[1]
            r.value += '\t{}'.format(source)
            yield utf8ify(r)


def main():
    global __file__                         # to fix stupid
    __file__ = os.path.abspath(__file__)    # __file__ handling
    _file_ = os.path.basename(__file__)     # in python 2

    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--config', default=None)
    args = parser.parse_args()
    start = int((dtdt.now() - dtdt(1970, 1, 1)).total_seconds())

    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    # load config
    with open('basic.toml', 'r') as f:
        config = toml.loads(f.read())
    os.chdir(os.path.dirname(__file__))
    with open('distribution.toml', 'r') as f:
        config.update(toml.loads(f.read()))
    if args.config is None:
        config.update(toml.loads(open(_file_[:-3] + '.toml').read()))
    else:
        config.update(toml.loads(open(args.config).read()))

    from pecheny.mrdef import defaults
    from pecheny.moncommons import push_to_razladki

    defaults()
    MapReduce.useDefaults(server=config['mr_server'])
    alltables = MapReduce.getTablesInfo('fast_logs/user_sessions/*')
    alltables = [x.name for x in alltables if x.name.endswith('0')]
    try:
        lastts = int(open('fastlogs_last_timestamp').read())
    except:
        lastts = 0
    srctables = sorted([x for x in alltables
                        if tryint(x.split('/')[-1]) > lastts],
                       key=lambda x: int(x.split('/')[-1]))
    if len(srctables) == 0:
        logger.info("No new data. Latest counted ts is {}"
                    .format(lastts))
        sys.exit(0)
    for srctable in srctables:
        logger.info('Source table is {}'.format(srctable))
        ts = int(srctable.split('/')[-1])
        dsttable = 'tmp/pers/set_installs_fastlogs_{}'.format(ts)
        first_map = FirstMap(PATH)
        success = False
        while not success:
            try:
                MapReduce.runMap(first_map, srcTable=srctable,
                                 dstTable=dsttable)
                success = True
            except:
                logger.error(traceback.format_exc())

        Parameter = namedtuple('Parameter',
                               ['element', 'eventtype', 'lang', 'source'])
        if len(list(MapReduce.getSample(dsttable, count=1))) == 1:
            counter = Counter()
            for rec in MapReduce.getSample(dsttable, count=None):
                rec1 = deutf8ify(rec)
                if len(rec1.value.split('\t')) >= 4:
                    parameter = Parameter(
                        element=rec1.value.split('\t')[0],
                        eventtype=rec1.value.split('\t')[1],
                        lang=rec1.value.split('\t')[2],
                        source=rec1.value.split('\t')[3])
                    counter[parameter] += 1

            for param in counter:
                desc = '{lang}_{elem}_{source}_{event}s_fastlogs'.format(
                    lang=param.lang,
                    elem=param.element,
                    event=param.eventtype,
                    source=param.source)
                value = counter[param]
                logger.info('Pushing to razladki: {}, {}'
                            .format(desc, value))
                push_to_razladki(config, desc, value, ts=ts)

            open('fastlogs_last_timestamp', 'w').write(
                '{}'.format(ts))
        else:
            logger.critical('No installs/shows/cancels at {} or something went wrong.'
                            .format(ts))


if __name__ == "__main__":
    main()
