#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
from __future__ import print_function
import sys
import os
import codecs
import logging
import toml
import pdb
import argparse
import traceback
import mapreducelib
import threading
from time import sleep
try:
    import thread
except ImportError:
    import _thread as thread
from mapreducelib import MapReduce, Record
import urlparse
from collections import defaultdict, Counter
import datetime as dt
from datetime import datetime as dtdt

PATH = ['12.1620.705']


def cdquit(fn_name):
    # print to stderr, unbuffered in Python 2.
    print('{0} took too long'.format(fn_name), file=sys.stderr)
    sys.stderr.flush()  # Python 3 stderr is likely buffered.
    thread.interrupt_main()  # raises KeyboardInterrupt


def exit_after(s):
    '''
    use as decorator to exit process if
    function takes longer than s seconds
    '''
    def outer(fn):
        def inner(*args, **kwargs):
            timer = threading.Timer(s, cdquit, fn.__name__)
            timer.start()
            try:
                result = fn(*args, **kwargs)
            finally:
                timer.cancel()
            return result
        return inner
    return outer


def deutf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        key, subkey, value = rec.key, rec.subkey, rec.value
        if not isinstance(key, unicode):
            key = key.decode('utf8', errors='replace')
        if not isinstance(subkey, unicode):
            subkey = subkey.decode('utf8', errors='replace')
        if not isinstance(value, unicode):
            value = value.decode('utf8', errors='replace')
        return Record(key, subkey, value)
    elif isinstance(rec, str):
        rec = rec.decode('utf8', errors='replace')
    return rec


def utf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        if isinstance(rec.key, unicode):
            rec.key = rec.key.encode('utf8')
        if isinstance(rec.subkey, unicode):
            rec.subkey = rec.subkey.encode('utf8')
        if isinstance(rec.value, unicode):
            rec.value = rec.value.encode('utf8')
        return rec
    elif isinstance(rec, unicode):
        rec = rec.encode('utf8')
    return rec


def tryint(string):
    try:
        return int(string)
    except:
        return -1


class FirstMap(object):

    def parseparams(self, value):
        tabs = value.split('\t')
        result = {}
        for x in tabs:
            if len(x.split('=')) > 1:
                result[x.split('=')[0]] = '='.join(x.split('=')[1:])
            else:
                result[x] = 'SINGLE'
        return defaultdict(lambda: '', result)

    def __call__(self, rec):
        from collections import defaultdict
        rec = deutf8ify(rec)
        params = self.parseparams(rec.value)
        if params['type'] == 'REQUEST' and 'default_search_wizard' in rec.value:
            yield utf8ify(
                Record(
                    rec.key,
                    rec.subkey,
                    params['query'] + '\t' + params['dom-region']
                ))

# @exit_after(1500)


def main():
    global __file__                         # to fix stupid
    __file__ = os.path.abspath(__file__)    # __file__ handling
    _file_ = os.path.basename(__file__)     # in python 2

    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--config', default=None)
    args = parser.parse_args()
    start = int((dtdt.now() - dtdt(1970, 1, 1)).total_seconds())

    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    # load config
    with open('basic.toml', 'r') as f:
        config = toml.loads(f.read())
    os.chdir(os.path.dirname(__file__))
    with open('distribution.toml', 'r') as f:
        config.update(toml.loads(f.read()))
    if args.config is None:
        config.update(toml.loads(open(_file_[:-3] + '.toml').read()))
    else:
        config.update(toml.loads(open(args.config).read()))

    from pecheny.mrdef import defaults
    from pecheny.moncommons import push_to_razladki

    defaults()
    MapReduce.useDefaults(server=config['mr_server'])
    alltables = MapReduce.getTablesInfo('fast_logs/user_sessions/*')
    alltables = [x.name for x in alltables if x.name.endswith('0')]
    try:
        lastts = int(open('fastlogs_shows_last_timestamp').read())
    except:
        lastts = 0
    srctables = sorted([x for x in alltables
                        if tryint(x.split('/')[-1]) > lastts],
                       key=lambda x: int(x.split('/')[-1]))
    if len(srctables) == 0:
        logger.info("No new data. Latest counted ts is {}"
                    .format(lastts))
        sys.exit(0)
    for srctable in srctables:
        logger.info('Source table is {}'.format(srctable))
        ts = int(srctable.split('/')[-1])
        dsttable = 'tmp/pers/set_shows_fastlogs_{}'.format(ts)
        first_map = FirstMap()
        success = False
        # pdb.set_trace()
        while not success:
            try:
                MapReduce.runMap(first_map, srcTable=srctable,
                                 dstTable=dsttable)
                success = True
            except:
                logger.error(traceback.format_exc())

        if len(list(MapReduce.getSample(dsttable, count=1))) == 1:
            regions = Counter()
            for rec in MapReduce.getSample(dsttable, count=None):
                rec1 = deutf8ify(rec)
                # logger.info('{}\t{}\t{}'
                #             .format(rec1.key, rec1.subkey, rec1.value))
                regions[rec1.value.split('\t')[-1]] += 1

            for region in regions:
                logger.info('Pushing to razladki: {}, {}'
                            .format(region, regions[region]))
                push_to_razladki(config,
                                 '{}_wiz_chrome_set_shows_fastlogs'.format(
                                     region),
                                 regions[region],
                                 ts=ts)
            open('fastlogs_shows_last_timestamp', 'w').write(
                '{}'.format(ts))
        else:
            logger.critical('No shows at {} or something went wrong.'
                            .format(ts))


if __name__ == "__main__":
    main()
