#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import time
import json
import os
import re
import codecs
import datetime
import requests
import argparse
import subprocess
import shlex
import logging
import pdb
import toml
from collections import Counter
from pecheny.moncommons import push_to_razladki

re_url = re.compile(ur'(?<=url=).*?(?=(\t|$))')
re_lh = re.compile(ur'(?<=login-hash=)[0-9a-f]+')


def current_timestamp():
    return int((datetime.datetime.now()
                - datetime.datetime(1970, 1, 1)).total_seconds())

# def push_to_razladki(params, desc, value):
#     data = {desc: value}
#     req = None
#     while req is None or req.status_code != 200:
#         req = requests.post(params['razladki'], data=data)


def get_param(s, compiled_regex):
    if compiled_regex.search(s):
        return compiled_regex.search(s).group(0)
    return ''


def main():

    global __file__                         # to fix
    global _file_                           # stupid __file__
    __file__ = os.path.abspath(__file__)    # handling
    _file_ = os.path.basename(__file__)     # in python 2

    start = current_timestamp()

    parser = argparse.ArgumentParser()
    parser.add_argument('--uids', '-u', type=int, default=None)
    parser.add_argument('--ystaff', '-y', action='store_true')
    parser.add_argument('--debug', '-d', action='store_true')
    parser.add_argument('--config', '-c', default=None)
    args = parser.parse_args()

    # set up logging
    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    with open('basic.toml', 'r') as f:
        params = toml.loads(f.read())
    os.chdir(os.path.dirname(__file__))
    with open('video.toml', 'r') as f:
        params.update(toml.loads(f.read()))
    if not args.config is None:
        with open(args.config, 'r') as f:
            params.update(toml.loads(f.read()))

    if args.uids == None:
        args.uids = params['logins'][_file_[:-3]]
        if args.ystaff:
            args.uids = int(args.uids / STAFF_FACTOR)

    uids = set()
    nulls = set()
    yandexuids = Counter()
    logins = Counter()

    table = (params['fetch_table_staff']
             if args.ystaff else params['fetch_table'])

    req_text = params['fetch_req_json'].format(
        params['rtmr'], table)

    previous = ''
    while len(uids) < args.uids and (time.time() - start < 3600):

        try:
            req = requests.get(req_text, timeout=1)
            text = req.content.decode('utf8', errors='replace')
            if text == '':
                continue
            if text == previous:
                continue
            if not 'related_src=top' in text:
                continue

            try:
                u = json.loads(text)['Entries'][0]['Key']
                entries = json.loads(text)['Entries']
            except KeyError:
                logger.info('error on json {}'.format(
                    json.loads(text))[:100])
                pdb.set_trace()
                continue
            entries = sorted(entries, key=lambda x: int(x['SubKey']))
            for entry in entries:
                if 'related_src=top' in entry['Value']:
                    threshold = entry['SubKey']
                    if not isinstance(threshold, int):
                        threshold = int(threshold)
                    break

        except:
            continue
        previous = text

        is_login = False

        if get_param(text, re_lh):
            u = get_param(text, re_lh)
            is_login = True
        if u == '':
            continue

        if not u in uids:
            uids.add(u)
            check_req = params['check_req'].format(
                params['rtmr_check'],
                params['check_table'][_file_[:-3]] +
                ('.logins' if is_login else ''),
                u)
            logger.info('checking uid {}: {}'.format(
                u, check_req))
            try:
                req = requests.get(check_req, timeout=1)
                text = req.content.decode('utf8', errors='replace')
            except:
                if args.debug:
                    pdb.set_trace()
                continue

            if text == '' or text == 'null\n' or text == 'null':
                nulls.add(u)
                if is_login:
                    logins['null'] += 1
                else:
                    yandexuids['null'] += 1
            else:
                if 'related' in text:
                    entries = json.loads(text)['Entries']
                    entries = json.loads(entries[0]['Value'])
                    entries = [
                        x for x in entries if int(x['ts']) < threshold
                        and 'related' in x and len(x['related']) > 0
                    ]
                    if len(entries) > 0:
                        if is_login:
                            logins['related'] += 1
                        else:
                            yandexuids['related'] += 1
                        logger.info('user {}, related {}'.format(u, entries))
                    else:
                        if is_login:
                            logins['nonnull'] += 1
                        else:
                            yandexuids['nonnull'] += 1
                else:
                    if is_login:
                        logins['nonnull'] += 1
                    else:
                        yandexuids['nonnull'] += 1

            logger.info('added uid {}, ts {}'.format(u, threshold))
        else:
            logger.info('uid {} already in uids'.format(u))

    total = sum(yandexuids.values()) + sum(logins.values())
    related = yandexuids['related'] + logins['related']
    if total > 0:

        logger.info('yandexuids total: {}'.format(sum(yandexuids.values())))
        logger.info(yandexuids.most_common())
        logger.info('logins total: {}'.format(sum(logins.values())))
        logger.info(logins.most_common())
        logger.info('total: {}'.format(
            total))
        logger.info('null users: {}'.format(nulls))
        nrate = (total - related) / total
        logger.info('null users rate: {:.2}'.format(nrate))

        if not args.debug:
            desc = 'video_related_null'
            if args.ystaff:
                desc += '_ystaff'
            if 'suffix' in params:
                desc += params['suffix']
            push_to_razladki(params, desc, nrate)

if __name__ == "__main__":
    main()
