#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import copy
import subprocess
import datetime
import codecs
import argparse
import mapreducelib
import datetime
import logging
import requests
import gzip
import pdb
import re
import traceback
import math
from mapreducelib import MapReduce, Record
from collections import defaultdict, Counter


def deutf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        key, subkey, value = rec.key, rec.subkey, rec.value
        if not isinstance(key, unicode):
            key = key.decode('utf8', errors='replace')
        if not isinstance(subkey, unicode):
            subkey = subkey.decode('utf8', errors='replace')
        if not isinstance(value, unicode):
            value = value.decode('utf8', errors='replace')
        return Record(key, subkey, value)
    elif isinstance(rec, str):
        rec = rec.decode('utf8', errors='replace')
    return rec


def utf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        if isinstance(rec.key, unicode):
            rec.key = rec.key.encode('utf8')
        if isinstance(rec.subkey, unicode):
            rec.subkey = rec.subkey.encode('utf8')
        if isinstance(rec.value, unicode):
            rec.value = rec.value.encode('utf8')
        return rec
    elif isinstance(rec, unicode):
        rec = rec.encode('utf8')
    return rec


def run_reduce(*args, **kwargs):
    if kwargs.get('logger'):
        kwargs.pop('logger').info(
            'Reducing from {} to {}'.format(
                kwargs['src'],
                kwargs['dst']
            )
        )
    src = kwargs.pop('src')
    dst = kwargs.pop('dst')
    nargs = {}
    if isinstance(src, list):
        nargs['srcTables'] = src
    else:
        nargs['srcTable'] = src
    if isinstance(dst, list):
        nargs['dstTables'] = dst
    else:
        nargs['dstTable'] = dst
    for x in kwargs:
        nargs[x] = kwargs.pop(x)
    MapReduce.runReduce(*args, **nargs)


def run_map(*args, **kwargs):
    if kwargs.get('logger'):
        kwargs.pop('logger').info(
            'Reducing from {} to {}'.format(
                kwargs['src'],
                kwargs['dst']
            )
        )
    src = kwargs.pop('src')
    dst = kwargs.pop('dst')
    nargs = {}
    if isinstance(src, list):
        nargs['srcTables'] = src
    else:
        nargs['srcTable'] = src
    if isinstance(dst, list):
        nargs['dstTables'] = dst
    else:
        nargs['dstTable'] = dst
    for x in kwargs:
        nargs[x] = kwargs.pop(x)
    MapReduce.runReduce(*args, **nargs)


def tryint(string):
    try:
        return int(string)
    except:
        return -1


def tabulate(*args):
    return '\t'.join(map(format, args))


def parseparams(value):
    tabs = value.split('\t')
    result = {}
    for x in tabs:
        if len(x.split('=')) > 1:
            result[x.split('=')[0]] = '='.join(x.split('=')[1:])
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def parsevars(_vars):
    commas = _vars.split(',')
    result = {'clids': []}
    for x in commas:
        if len(x.split('=')) > 1:
            key = x.split('=')[0]
            value = '='.join(x.split('=')[1:])
            if key.startswith('-'):
                key = key[1:]
            if key.startswith('clid'):
                result['clids'].append(value)
            else:
                result[key] = value
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def make_test_record(s, subkey=True):
    if subkey:
        return Record(
            s.split('\t')[0],
            s.split('\t')[1],
            '\t'.join(s.split('\t')[2:]),
        )


def trylist(someshit):
    try:
        return list(someshit)
    except:
        return []


def safediv(x, y):
    try:
        return x / y
    except ZeroDivisionError:
        return 0


def mr_grep(src, dst, pattern, field='v', user='tmp', server='sakura'):
    env = copy.deepcopy(os.environ)
    env['MR_USER'] = user
    subprocess.call([
        '/Berkanavt/mapreduce/bin/mr_grep-dev',
        '-s',
        server,
        '-{}'.format(field),
        pattern,
        src,
        '-d',
        dst
    ], env=env)


def mr_read(src, suffix='.tsv', user='tmp', head=None, server='sakura'):
    env = copy.deepcopy(os.environ)
    env['MR_USER'] = user
    read_args = [
        '/Berkanavt/mapreduce/bin/mapreduce-dev',
        '-server',
        server,
        '-subkey',
        '-read',
        src
    ]
    outname = src.split('/')[-1] + suffix
    if head:
        read_ps = subprocess.Popen(read_args, env=env, stdout=subprocess.PIPE)
        subprocess.call(['head', '-n', str(head)], stdin=read_ps,
                        stdout=open(outname, 'wb'))
    else:
        subprocess.call(read_args, env=env, stdout=open(outname, 'wb'))


def dttots(str_):
    return int(datetime.datetime.strptime(str_,
                                          ('%Y%m%d%H%M' if len(str_) > 8 else '%Y%m%d')).strftime('%s'))


def wrap_yt_output(dct, key):
    rec_key = format(dct.pop(key))
    rec_value = '\t'.join(['='.join([format(k), format(dct[k])])
                           for k in sorted(dct)])
    return utf8ify(Record(
        rec_key,
        '',
        rec_value
    ))


def reduce_wrapper(func, records):
    prevkey = ''
    result = []
    buff = []
    for rec in records:
        if rec.key != prevkey and prevkey:
            result.extend(trylist(func(prevkey, buff)))
            buff = []
        buff.append(rec)
        prevkey = rec.key
    result.extend(trylist(func(prevkey, buff)))
    return result


def map_wrapper(func, records):
    result = []
    for rec in records:
        result.extend(list(func(rec)))
    return result


def reduce_wrapper_file(func, filename, outfilename):
    prevkey = ''
    result = []
    buff = []
    with codecs.open(outfilename, 'w', 'utf8') as fw:
        with open_wrapper(filename) as f:
            for line in f:
                try:
                    rec = make_test_record(line)
                except:
                    pdb.set_trace()
                if rec.key != prevkey and prevkey:
                    for recx in func(prevkey, buff):
                        try:
                            fw.write(
                                tabulate(recx.key, recx.subkey, recx.value))
                            fw.write('\n')
                        except:
                            pdb.set_trace()
                        result.append(recx)
                    buff = []
                buff.append(rec)
                prevkey = rec.key
        for recx in func(prevkey, buff):
            fw.write(tabulate(recx.key, recx.subkey, recx.value))
            fw.write('\n')
            result.append(recx)
    return result


def open_wrapper(filename):
    if filename.endswith(('.gz', '.gzip')):
        return gzip.open(filename)
    return codecs.open(filename, 'r', 'utf8', errors='replace')


def records_to_str(records):
    return '\n'.join('\t'.join((rec.key, rec.subkey, rec.value))
                     for rec in records)


def timeround(moment, seconds=1800):
    if isinstance(moment, datetime.datetime):
        moment = moment.strftime('%s')
    moment = int(moment)
    return datetime.datetime.fromtimestamp(moment // seconds * seconds)


def make_logger(_file, debug=False):
    directory = os.path.dirname(_file)
    basename = os.path.splitext(os.path.basename(_file))[0]
    logger = logging.getLogger(basename)
    os.chdir(directory)
    if not os.path.isdir('{}/logs'.format(directory)):
        os.mkdir('{}/logs'.format(directory))
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}_{}.log'.format(
        directory,
        basename,
        datetime.datetime.now().strftime('%Y%m%d_%H%M%S')),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    return logger


def parse_to_tabs(filename, enc='utf8'):
    with codecs.open(filename, 'r', enc) as f:
        lines = [x for x in f.read().split('\n') if x]
    result = [x.split('\t') for x in lines]
    return result


def push_to_razladki(data,
                     project='SearchPortalDistribution',
                     maxfailures=5, logger=None, debug=False,
                     override=False):
    razladki = ('http://launcher.razladki.yandex-team.ru/'
                'save_new_data_json/') + project
    now = int(datetime.datetime.now().strftime('%s'))
    if not isinstance(data, list):
        data = [data]
    for x in data:
        if 'ts' not in x:
            x['ts'] = now
    req = None
    failures = 0
    while ((req is None or req.status_code not in {200, 409}) and
           failures < maxfailures):
        try:
            req = requests.post(razladki, json={'data': data,
                                                'override': override})
        except:
            if logger is not None:
                logger.warning(traceback.format_exc())
            failures += 1
            pass
    return req


def table_exists(table):
    return MapReduce.getTableInfo(table).records > 0


def date_from_table(table):
    try:
        datepart = re.search(r'[0-9]{8}', table).group(0)
        return date_from_string(datepart)
    except:
        return None


def date_from_string(string):
    string = string.replace('-', '')
    return datetime.date(int(string[:4]), int(string[4:6]), int(string[6:8]))


def choose_interval(num, intervals):
    return [interval for interval in intervals
            if num >= interval[0] and num <= interval[1]][0]


def counter_quantile(counter, quantile):
    keys = sorted([x for x in counter if counter[x] != 0])
    kmapper = {}
    mover = 0
    mover_prev = 0
    for k in keys:
        mover = mover_prev + counter[k] - 1
        kmapper[(mover_prev, mover)] = k
        mover_prev = mover + 1
    length = sum(counter.values()) - 1  # not gonna work with zero counters
    if length <= 0:
        return 0
    target = length * quantile
    if int(target) == target:
        return kmapper[choose_interval(target, kmapper)]
    else:
        return ((kmapper[choose_interval(math.floor(target), kmapper)] +
                 kmapper[choose_interval(math.ceil(target), kmapper)]) / 2.0)


def load_lines(filename):
    with codecs.open(filename, 'r', 'utf8') as f:
        return [x for x in f.read().split('\n') if x]


def process_stat_data(data, D, complextable=False):
    for item in data:
        name = (item['element_override_by_dictionary'] + '_' +
                item['product_override_by_dictionary'])
        if complextable:
            date = datetime.datetime.strptime(item['fielddate'].split()[0],
                                              '%Y-%m-%d')
        for field in item:
            if complextable:
                if len(field.split('^')) == 2:
                    measure = field.split('^')[1]
                    try:
                        value = int(item[field])
                        D[name][date][measure] = value
                    except:
                        continue
            else:
                if (len(field.split('^')) == 2 and
                        len(field.split('^')[0].split('.')) == 3):
                    date = datetime.datetime.strptime(
                        field.split('^')[0], '%d.%m.%Y'
                    )
                    measure = field.split('^')[1]
                    try:
                        value = int(item[field])
                        D[name][date][measure] = value
                    except:
                        continue


def main():
    pass

if __name__ == "__main__":
    main()
