#!usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import os
import sys
import re
import pdb
import logging
from collections import Counter
import hashlib
import requests
import urllib2
import argparse
import traceback
import codecs
import json
import datetime
import time
import toml
import urlparse
from pecheny.moncommons import *

re_url = re.compile(ur'(?<=url=).*?(?=(\t|$))')
re_query = re.compile(r'(?<=query=).*?(?=(\t|$))')
re_qid = re.compile(ur'(?<=reqid=).*?(?=(\t|$))')
re_lh = re.compile(ur'(?<=login-hash=).*?(?=(\t|$))')
isletter = re.compile(ur'[A-Za-zА-Яа-я]')
_file_ = ''


class Testcase(object):

    def __init__(self, id, query, debugtext):
        self.id = id
        self.query = query
        self.debugtext = debugtext


def get_param(s, compiled_regex):
    if compiled_regex.search(s):
        return compiled_regex.search(s).group(0)
    return u''


def pairs_from_testcases(testcases):
    return {(x.id, x.query) for x in testcases}


def get_samereqid(lines, reqid_):
    return {x for x in lines
            if get_param(x, re_qid) == reqid_
            and u'\ttype=REQUEST\t' in x}


def percent_decoded_url(string_):
    return urllib2.unquote(
        get_param(string_, re_url).encode('utf8')).decode('utf8')


def good_line(line, params):
    if ('type=REQUEST' in line
        and 'service=video.yandex' in line
        and get_param(line, re_url)
            and not_porn(line)):
        return True
    return False


def get_all_logins(lines):
    logins = set()
    for line in lines:
        if re_lh.search(line):
            logins.add(re_lh.search(line).group(0))
    return logins


def make_check_req(params, id):
    return params['check_req'].format(
        params['rtmr_check'], params['check_table'][_file_[:-3]], id)


def md5(string_):
    return hashlib.md5(string_).hexdigest()


def current_timestamp():
    return int((datetime.datetime.now()
                - datetime.datetime(1970, 1, 1)).total_seconds())


def fail(failures, reason, text):
    failures[reason] += 1
    return text

# def push_to_razladki(params, desc, value):
#     data = {desc: value}
#     req = None
#     while req is None or req.status_code != 200:
#         req = requests.post(params['razladki'], data=data)


def main():

    global __file__                         # to fix
    global _file_                           # stupid __file__
    __file__ = os.path.abspath(__file__)    # handling
    _file_ = os.path.basename(__file__)     # in python 2

    parser = argparse.ArgumentParser()
    parser.add_argument('--logins', '-l', default=None, type=int)
    parser.add_argument('--ystaff', '-y', action='store_true')
    parser.add_argument('--debug', '-d', action='store_true')
    parser.add_argument('--config', '-c', default=None)
    args = parser.parse_args()

    start = current_timestamp()

    # set up logging
    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    with open('basic.toml', 'r') as f:
        params = toml.loads(f.read())
    os.chdir(os.path.dirname(__file__))
    with open('video.toml', 'r') as f:
        params.update(toml.loads(f.read()))
    if not args.config is None:
        with open(args.config, 'r') as f:
            params.update(toml.loads(f.read()))

    if args.logins is None:
        args.logins = params['logins'][_file_[:-3]]
        if args.ystaff:
            args.logins = int(args.logins / 3)
    if args.ystaff:
        fetch_table = params['fetch_table_staff']
    else:
        fetch_table = params['fetch_table']

    fetch_req = params['fetch_req'].format(
        params['rtmr'], fetch_table)

    testcases = []
    prevtext = u''
    failures = Counter()

    QFAIL = 1000
    if args.ystaff:
        QFAIL *= 10

    session = requests.Session()

    while (len(testcases) < args.logins
            and sum(failures.values()) < QFAIL * args.logins):
        try:
            if current_timestamp() - start > 3600:
                sys.exit(1)
            req = session.get(fetch_req, timeout=1)
            text, exit_status = (req.content.decode('utf8', errors='replace'),
                                 req.status_code)
            if exit_status != 200:
                prevtext = fail(failures, 'request failed', '')
                nonzeroes += 1
                continue
            if text == prevtext:
                prevtext = fail(failures, 'same line', '')
                continue
        except:
            prevtext = fail(failures, 'other error', '')
            logger.info(traceback.format_exc())
            continue

        if not re_lh.search(text):
            prevtext = fail(failures, 'non-login', text)
            continue
        lines = [x for x in re.split(ur'\n+', text) if x != u'']
        if len(list(get_all_logins(lines))) > 1:
            prevtext = fail(failures, 'more than one login', text)
            continue
        login = list(get_all_logins(lines))[0]
        for line in lines:
            if good_line(line, params):
                query_ = get_param(line, re_query)
                user_ = login
                testcase = Testcase(id=user_,
                                    query=query_,
                                    debugtext=line)
                if ((testcase.id, testcase.query)
                        not in pairs_from_testcases(testcases)):
                    testcases.append(testcase)
                    logger.info('added {}, [{}]'
                                .format(testcase.id, testcase.query))
                    prevtext = text

            else:
                prevtext = fail(failures, 'bad line', text)
                continue

    mid = current_timestamp()
    logger.info('failure detalization: {}'.format(failures))
    logger.info('{} uid-url pairs retrieved from table {} '
                'in {} seconds with {} failed requests.'
                .format(len(testcases),
                        fetch_table,
                        mid - start,
                        failures['request failed']))

    failures2 = Counter()
    found = 0
    notfound = 0

    while len(testcases) > 0 and sum(failures2.values()) < 100:
        try:
            if (current_timestamp() - start) > 3600:
                sys.exit(1)

            check_req = make_check_req(params, testcases[0].id)
            req = requests.get(check_req, timeout=1)
            text, exit_status = (req.content.decode('utf8', errors='replace'),
                                 req.status_code)
            if exit_status != 200:
                fail(failures2, 'request failed', text)
                time.sleep(1)
                continue
        except:
            fail(failures2, 'other error', text)
            logger.info(traceback.format_exc())
            continue
        try:
            thisfound = False
            if text != '':
                try:
                    response_json = json.loads(text)
                except:
                    fail(failures2, 'bad json', text)
                    logger.info('bad json "{}"'
                                .format(text.rstrip()))
                try:
                    thisfound = (testcases[0].query
                                 in response_json['Entries'][0]['Value'])
                except:
                    fail(failures2, 'bad json', text)
            if thisfound:
                found += 1
            else:
                logger.info('not found: {}, [{}], logline {}'.format(
                    testcases[0].id,
                    testcases[0].query,
                    testcases[0].debugtext))
                notfound += 1
            testcases.pop(0)
        except:
            logger.info(traceback.format_exc())
            fail(failures2, 'other error', text)
            testcases.pop(0)
            continue
    try:
        foundrate = found / (found + notfound)
    except:
        foundrate = 0

    logger.info('{} queries found in {} ({:.1%}), '
                '{} queries not found, {} failed requests.'
                .format(found,
                        params['check_table'][_file_[:-3]],
                        foundrate,
                        notfound,
                        failures2['request failed']))
    desc = 'video_requests_foundrate'
    if args.ystaff:
        desc += '_ystaff'
    if 'suffix' in params:
        desc += params['suffix']
    if not args.debug:
        push_to_razladki(params, desc, foundrate)

if __name__ == '__main__':
    main()
