#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import copy
import subprocess
import datetime
import codecs
import argparse
import datetime
import json
import time
import logging
try:
    import requests
except:
    pass
import gzip
import pdb
import re
import traceback
import math
import urllib
import getpass
import urlparse
from collections import defaultdict, Counter
try:
   from nile.api.v1 import statface as ns
except:
   pass


SPEC10k = {
    "job_count": 10000,
    "map_job_count": 10000,
    "reduce_job_count": 10000,
    "pool": "search-research_{}".format(getpass.getuser()),
}

CERT = '/usr/local/share/ca-certificates/YandexInternalRootCA.crt'


class Moscow(datetime.tzinfo):

    def utcoffset(self, dt):
        return datetime.timedelta(hours=3)

    def tzname(self, dt):
        return "Europe/Moscow"

    def dst(self, dt):
        return datetime.timedelta(0)

    def __repr__(self):
        return "Europe/Moscow (UTC+3)"


def tryint(string):
    try:
        return int(string)
    except:
        return -1


def apply_replacements(s, pairs):
    if isinstance(pairs, dict):
        pairs = pairs.items()
    for p in pairs:
        s = s.replace(p[0], p[1])
    return s


def optionalize_schema(qt, schema):
    return {k: qt.Optional[v] for k, v in schema.items()}


bad_prefices = ('www.', 'm.')


def get_host(url, strip_tld=False):
    parsed = urlparse.urlparse(url)
    result = parsed.netloc
    while result.startswith(bad_prefices):
        for prefix in bad_prefices:
            if result.startswith(prefix):
                result = result[len(prefix):]
    if strip_tld:
        result = '.'.join(result.split('.')[:-1])
    return result


def yql_run(query, token, maxtries=30, title=None):
    yql_headers = {
        'Content-Type': 'application/json',
        'Authorization': 'OAuth {}'.format(token)
    }
    query_json = {
        'content': query,
        'action': 'RUN',
        'type': 'SQL'
    }
    if title:
        query_json['title'] = '{} | YQL'.format(title)
    req = requests.post(
        'https://yql.yandex.net/api/v2/operations',
        json=query_json,
        headers=yql_headers
    )
    id_ = req.json()['id']
    print('running query {}'.format(id_))
    status = req.json()['status']
    tries = 0
    while status in {'PENDING', 'RUNNING'} and tries < maxtries:
        req = requests.get(
            'https://yql.yandex.net/api/v2/operations/{}'.format(id_),
            headers=yql_headers
        )
        status = req.json()['status']
        print('operation status is {}'.format(status))
        if status not in {'PENDING', 'RUNNING'}:
            break
        time.sleep(60)
        tries += 1
    if status != 'COMPLETED':
        sys.stderr.write('operation {} failed: {}'.format(id_, req.content))


def tabulate(*args):
    return '\t'.join(map(format, args))


# def yt_wrapper_nile_compat(func):
#    def func_wrapper(*args, **kwargs):
#        for rec in func(*args, **kwargs):
#            return Record(**rec)
#    return func_wrapper

def ntabulate(*args):
    return tabulate(*args) + '\n'


def sane_dir(obj):
    return [x for x in dir(obj) if not x.startswith('_')]


def get_stat_headers():
    return {
        'StatRobotUser': os.environ['STAT_LOGIN'],
        'StatRobotPassword': os.environ['STAT_TOKEN']
    }


def get_dates_from_stat(
    headers, report, dimensions=None, add_cgi=None
):
    url = (
        'https://upload.stat.yandex-team.ru/_api/report/available_dates'
        '?name={}&scale=d'
    ).format(report)
    req = requests.get(
        url, headers=headers, verify=False
    )
    return [
        yt_get_date_from_table(x) for x in req.json()['available_dates']
    ]
    # dim_totals = '&'.join(
    #     '{}=_total_'.format(x) for x in dimensions
    # )
    # url = 'https://upload.stat.yandex-team.ru/{}?{}{}&_type=json'.format(
    #     report, dim_totals, add_cgi
    # )
    # req = requests.get(
    #     url, headers=headers, verify=False
    # )
    # print('parsing response')

    # try:
    #     values = sorted(
    #         req.json()['values'], key=lambda x: x['fielddate'], reverse=True
    #     )
    #     last_date = yt_get_date_from_table(
    #         values[0]['fielddate'].split(' ')[0]
    #     )
    # except:
    #     raise Exception(repr(req.json()))
    # print('last date: {}'.format(last_date))
    # return last_date


def parseparams(value, pairsep='\t', kvsep='='):
    tabs = value.split(pairsep)
    result = {}
    for x in tabs:
        if len(x.split(kvsep)) > 1:
            result[x.split(kvsep)[0]] = kvsep.join(x.split(kvsep)[1:])
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def parse_cgi(url, prepend=None, extractfirst=True):
    if prepend:
        url = b'{}{}'.format(prepend, url)
    try:
        parsed = urlparse.urlparse(url)
    except ValueError:
        return {}
    qs = urlparse.parse_qs(parsed.query)
    return {
        k: (v[0] if extractfirst else v) for k, v in qs.items()
    }


def parsevars(_vars, sep=','):
    commas = _vars.split(sep)
    result = {'clids': []}
    for x in commas:
        if len(x.split('=')) > 1:
            key = x.split('=')[0]
            value = '='.join(x.split('=')[1:])
            if key.startswith('-'):
                key = key[1:]
            if key.startswith('clid'):
                result['clids'].append(value)
            else:
                result[key] = value
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def good_dump(x, fn):
    json.dump(
        x, codecs.open(fn, 'w', 'utf8'),
        indent=2, ensure_ascii=False, sort_keys=True
    )


def make_tskv(dct):
    return '\t'.join(
        '{}={}'.format(
            tskv_prepare(k), tskv_prepare(v)
        )
        for k, v in dct.items()
    )


def tskv_prepare(s):
    s = ensure_unicode(s)
    s = s.replace('\\', '\\\\')
    return s


def rtmr_ts_to_ts(rts):
    return datetime.datetime.fromtimestamp(9223372036854775807 - int(rts))


def ensure_unicode(s, enc='utf8', nobksl=False):
    if not isinstance(s, unicode):
        s = s.decode(enc, errors='replace')
    return s


def nda_shorten(url):
    if isinstance(url, unicode):
        url = url.encode('utf8')
    req = requests.get('http://nda.ya.ru/--?url={}'
                       .format(urllib.quote(url)))
    return req.content.decode('utf8')


def parse_cookies(text):
    return {
        x.split('=')[0]: '='.join(x.split('=')[1:])
        for x in text.split('; ')
    }


def parse_yp(yp):
    return {
        x.split('.')[1]: {
            'expires': int(x.split('.')[0]),
            'value': x.split('.')[2]
        }
        for x in yp.split('#')
    }


def trylist(someshit):
    try:
        return list(someshit)
    except:
        return []


def safediv(x, y):
    try:
        return x / y
    except ZeroDivisionError:
        return 0


def dttots(str_):
    return int(
        datetime.datetime.strptime(str_, (
            '%Y%m%d%H%M' if len(str_) > 8 else '%Y%m%d'
        )).strftime('%s')
    )


def make_test_record(line):
    pass


def from_bytes(rec):
    for x in rec:
        if isinstance(rec[x], str):
            rec[x] = rec[x].decode('utf8', errors='replace')
    return rec


class DummyLogger(object):

    def action(self, s):
        pass

    def info(self, s):
        self.action(s)

    def error(self, s):
        self.action(s)

    def warning(self, s):
        self.action(s)

    def debug(self, s):
        self.action(s)


class PrintLogger(object):

    def action(self, s):
        print(s)

    def info(self, s):
        self.action(s)

    def error(self, s):
        self.action(s)

    def warning(self, s):
        self.action(s)

    def debug(self, s):
        self.action(s)


def yt_config_set_defaults(yt, logger=None, pytz_off=False):
    exceptions = {'ujson', 'Crypto.Cipher._Blowfish'}
    yt.config['proxy']['url'] = 'hahn.yt.yandex.net'
    yt.config['spec_defaults']['pool'] = SPEC10k['pool']
    yt.config['spec_defaults']['ignore_existing'] = True
    yt.config['tabular_data_format'] = yt.YsonFormat()
    if 'Anaconda' in sys.version:
        yt.config['pickling']['module_filter'] = (
            lambda module: hasattr(module, '__file__') and
            (not module.__file__.endswith('.so') or
                getattr(module, '__name__', '') not in exceptions) and
            'hashlib' not in getattr(module, '__name__', '')
        )
    if pytz_off:
        yt.config['pickling']['module_filter'] = (
            lambda module: 'pytz' not in getattr(module, '__name__', '')
        )
    yt.config['yamr_mode']['create_recursive'] = True
    if logger:
        logging.getLogger('Yt').handlers = logger.handlers


def reduce_wrapper(func, records):
    prevkey = ''
    result = []
    buff = []
    for rec in records:
        if rec.key != prevkey and prevkey:
            result.extend(trylist(func(prevkey, buff)))
            buff = []
        buff.append(rec)
        prevkey = rec.key
    result.extend(trylist(func(prevkey, buff)))
    return result


def map_wrapper(func, records):
    result = []
    for rec in records:
        result.extend(list(func(rec)))
    return result


def stringify_dict_keys(dct):
    return {
        str(k): v
        for k, v in dct.items()
    }


def reduce_wrapper_file(func, filename, outfilename):
    prevkey = ''
    result = []
    buff = []
    with codecs.open(outfilename, 'w', 'utf8') as fw:
        with open_wrapper(filename) as f:
            for line in f:
                try:
                    rec = make_test_record(line)
                except:
                    pdb.set_trace()
                if rec.key != prevkey and prevkey:
                    for recx in func(prevkey, buff):
                        try:
                            fw.write(
                                tabulate(recx.key, recx.subkey, recx.value))
                            fw.write('\n')
                        except:
                            pdb.set_trace()
                        result.append(recx)
                    buff = []
                buff.append(rec)
                prevkey = rec.key
        for recx in func(prevkey, buff):
            fw.write(tabulate(recx.key, recx.subkey, recx.value))
            fw.write('\n')
            result.append(recx)
    return result


def open_wrapper(filename):
    if filename.endswith(('.gz', '.gzip')):
        return gzip.open(filename)
    return codecs.open(filename, 'r', 'utf8', errors='replace')


def records_to_str(records):
    return '\n'.join('\t'.join((rec.key, rec.subkey, rec.value))
                     for rec in records)


def timeround(moment, seconds=1800):
    if isinstance(moment, datetime.datetime):
        moment = moment.strftime('%s')
    moment = int(moment)
    return datetime.datetime.fromtimestamp(moment // seconds * seconds)


def make_logger(_file, debug=False):
    directory = os.path.dirname(_file)
    basename = os.path.splitext(os.path.basename(_file))[0]
    logger = logging.getLogger(basename)
    os.chdir(directory)
    if not os.path.isdir('{}/logs'.format(directory)):
        os.mkdir('{}/logs'.format(directory))
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}_{}_pid_{}.log'.format(
        directory,
        basename,
        datetime.datetime.now().strftime('%Y%m%d_%H%M%S'),
        os.getpid()
    ),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    return logger


def parse_to_tabs(filename, enc='utf8'):
    with codecs.open(filename, 'r', enc) as f:
        lines = [x for x in f.read().split('\n') if x]
    result = [x.split('\t') for x in lines]
    return result


def date_range(from_, to_):
    if isinstance(from_, basestring):
        from_ = datetime.datetime.strptime(from_, '%Y-%m-%d').date()
    if isinstance(to_, basestring):
        to_ = datetime.datetime.strptime(to_, '%Y-%m-%d').date()
    mvr = min([from_, to_])
    result = []
    while mvr <= max([from_, to_]):
        result.append(mvr)
        mvr += datetime.timedelta(days=1)
    if to_ < from_:
        result = result[::-1]
    return result


def yt_date_to_ts(string_):
    return int(datetime.datetime.strptime(string_, '%Y-%m-%d').strftime('%s'))


re_date = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2}')


def yt_get_date_from_table(table):
    if re_date.search(table):
        return datetime.datetime.strptime(
            re_date.search(table).group(0), '%Y-%m-%d'
        ).date()


def send_sms(recipients, message):
    url = ('https://golem.yandex-team.ru/api/sms/send.sbml?resps={}&msg={}'
           .format(','.join(recipients),
                   urllib.quote(message.encode('utf8').decode('utf8'))))
    r = requests.get(url)


def get_yt_exists(yt):
    def yt_exists(table):
        if not yt.exists(table):
            return False
        if yt.get_attribute(table, 'row_count'):
            return True
        else:
            return False
    return yt_exists


class StatPusher(object):

    def __init__(
        self, cluster, client=None,
        username_var=b'STAT_LOGIN',
        password_var=b'STAT_TOKEN',
        report=None, scale=None, replace_mask=None,
        remote_publish=None, async_mode=None
    ):
        self.cluster = cluster
        self.driver = get_driver(self.cluster)
        if client:
            self.client = client
        else:
            self.client = ns.StatfaceClient(
                proxy=b'upload.stat.yandex-team.ru',
                username=os.environ[username_var],
                password=os.environ[password_var]
            )
        self.report = report
        self.scale = scale
        self.remote_publish = remote_publish
        self.replace_mask = replace_mask
        self.async_mode = async_mode

    def push(
        self, data,
        report=None, replace_mask=None, scale='daily', remote_publish=False,
        async_mode=False
    ):
        scale = scale or self.scale
        report = report or self.report
        remote_publish = remote_publish or self.remote_publish
        async_mode = async_mode or self.async_mode
        if replace_mask is None:
            replace_mask = self.replace_mask
        if not remote_publish and isinstance(data, basestring):
            data = [x.to_dict() for x in self.driver.read(data)]
        rep = ns.StatfaceReport().path(
            report
        ).client(
            self.client
        ).scale(
            scale
        )

        if replace_mask:
            rep = rep.replace_mask(replace_mask)

        if remote_publish:
            rep = rep.remote_publish(
                proxy='hahn',
                table_path=data,
                async_mode=async_mode,
                upload_config=False
            )
        else:
            rep = rep.data(
                data
            ).publish()


def push_to_razladki(data,
                     project='SearchPortalDistribution',
                     maxfailures=5, logger=None, debug=False,
                     override=False):
    razladki = ('http://launcher.razladki.yandex-team.ru/'
                'save_new_data_json/') + project
    now = int(datetime.datetime.now().strftime('%s'))
    if not isinstance(data, list):
        data = [data]
    for x in data:
        if 'ts' not in x:
            x['ts'] = now
    req = None
    failures = 0
    while ((req is None or req.status_code not in {200, 409}) and
           failures < maxfailures):
        try:
            req = requests.post(razladki, json={'data': data,
                                                'override': override})
        except:
            if logger is not None:
                logger.warning(traceback.format_exc())
            failures += 1
            pass
    return req


def date_from_table(table):
    try:
        datepart = re.search(r'[0-9]{8}', table).group(0)
        return date_from_string(datepart)
    except:
        return None


def date_from_string(string):
    string = string.replace('-', '')
    return datetime.date(int(string[:4]), int(string[4:6]), int(string[6:8]))


def choose_interval(num, intervals):
    return [interval for interval in intervals
            if num >= interval[0] and num <= interval[1]][0]


def counter_quantile(counter, quantile):
    keys = sorted([x for x in counter if counter[x] != 0])
    kmapper = {}
    mover = 0
    mover_prev = 0
    for k in keys:
        mover = mover_prev + counter[k] - 1
        kmapper[(mover_prev, mover)] = k
        mover_prev = mover + 1
    length = sum(counter.values()) - 1  # not gonna work with zero counters
    if length <= 0:
        return 0
    target = length * quantile
    if int(target) == target:
        return kmapper[choose_interval(target, kmapper)]
    else:
        return ((kmapper[choose_interval(math.floor(target), kmapper)] +
                 kmapper[choose_interval(math.ceil(target), kmapper)]) / 2.0)


def load_lines(filename):
    with codecs.open(filename, 'r', 'utf8') as f:
        return [x for x in f.read().split('\n') if x]


def process_stat_data(data, D, complextable=False):
    for item in data:
        name = (item['element_override_by_dictionary'] + '_' +
                item['product_override_by_dictionary'])
        if complextable:
            date = datetime.datetime.strptime(item['fielddate'].split()[0],
                                              '%Y-%m-%d')
        for field in item:
            if complextable:
                if len(field.split('^')) == 2:
                    measure = field.split('^')[1]
                    try:
                        value = int(item[field])
                        D[name][date][measure] = value
                    except:
                        continue
            else:
                if (len(field.split('^')) == 2 and
                        len(field.split('^')[0].split('.')) == 3):
                    date = datetime.datetime.strptime(
                        field.split('^')[0], '%d.%m.%Y'
                    )
                    measure = field.split('^')[1]
                    try:
                        value = int(item[field])
                        D[name][date][measure] = value
                    except:
                        continue


def get_cluster(clusters, args):
    if not isinstance(args, dict):
        args = vars(args)
    kwargs = {
        'token': os.environ['YT_TOKEN']
    }
    if args.get('pool'):
        kwargs['pool'] = args['pool']
    no_yql = args.get('no_yql')
    if no_yql:
        cluster = getattr(clusters, 'yt')
    else:
        cluster = getattr(clusters, 'yql')
        kwargs['yql_token'] = os.environ['YQL_TOKEN']
    proxy = (
        args.get('proxy') or
        os.environ['YT_PROXY'].split('.')[0].title()
    )
    templates = {'title': args.get('title') or 'default_title'}
    if args.get('job_root'):
        templates['job_root'] = args['job_root']
    if args.get('templates') or {}:
        templates.update(args['templates'])
    cluster = getattr(cluster, proxy)(
        **kwargs
    ).env(
        templates=templates,
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
    )
    return cluster


def get_driver(cluster):
    try:
        return cluster.driver.yt_driver
    except AttributeError:
        return cluster.driver


def get_rc(cluster, table):
    return get_driver(cluster).client.get_attribute(
        table, 'row_count', 0
    )


def tmp(s):
    return s + '_tmp'


def remove_tmp_(cluster, *tables):
    for table in tables:
        try:
            get_driver(cluster).remove(tmp(table))
            print('removed {}'.format(tmp(table)))
        except Exception as e:
            print('didn\'t remove {}: {}'.format(table, e))


def move_tmp_(cluster, *tables):
    for table in tables:
        try:
            get_driver(cluster).client.move(tmp(table), table, force=True)
            print('moved {} -> {}'.format(tmp(table), table))
        except Exception as e:
            print('didn\'t move {} -> {}: {}'.format(tmp(table), table, e))


def main():
    pass

if __name__ == "__main__":
    main()
