#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
import sys
import os
import re
import time
import codecs
import pdb
import argparse
import datetime
import logging
import requests
import toml
import json
import StringIO
from collections import defaultdict

re_lated = re.compile(r'"related" ?:\[[^\]]')


def merge_dicts(d1, d2):
    for k in d2:
        for j in d2[k]:
            d1[k].append(j)


def current_timestamp():
    return int((datetime.datetime.now()
                - datetime.datetime(1970, 1, 1)).total_seconds())


def flatten_dict(dict_):
    result = defaultdict(lambda: [])
    for item_ in dict_:
        if isinstance(dict_[item_], dict):
            merge_dicts(result, flatten_dict(dict_[item_]))
        elif isinstance(dict_[item_], list):
            for item_item_ in dict_[item_]:
                if isinstance(item_item_, dict):
                    merge_dicts(result, flatten_dict(item_item_))
        else:
            result[item_].append(dict_[item_])
    return result


def current_timestamp():
    return int((datetime.datetime.now()
                - datetime.datetime(1970, 1, 1)).total_seconds())


def get_uids(params):

    logger = logging.getLogger(_file_[:-3])
    c_start = current_timestamp()
    uids = set()
    previous = ''

    fetch_req = params['fetch_req'].format(params['rtmr'],
                                           params['table'])

    logger.info('began fetchin\' {} uids from {}'.format(
        params['uids'], fetch_req))
    while (len(uids) < params['uids']
           and current_timestamp() - c_start < 3600):
        req = requests.get(fetch_req, timeout=1)
        text = req.content.decode('utf8', errors='replace')
        # logger.info(text[:300])
        if text == '':
            continue
        if text == previous:
            continue
        if not re_lated.search(text):
            previous = text
            continue
        u = text.split('\t')[0][1:]
        uids.add(u)
    return uids

# main


def main():

    global __file__                         # to fix
    global _file_                           # stupid __file__
    __file__ = os.path.abspath(__file__)    # handling
    _file_ = os.path.basename(__file__)     # in python 2

    start = current_timestamp()

    parser = argparse.ArgumentParser()
    parser.add_argument('--logins', '-l', type=int, default=None)
    parser.add_argument('--url', default='default')
    parser.add_argument('--ystaff', '-y', action='store_true')
    parser.add_argument('--debug', '-d', action='store_true')
    parser.add_argument('--config', '-c', default=None)
    args = parser.parse_args()

    with open('basic.toml', 'r') as f:
        params = toml.loads(f.read())
    os.chdir(os.path.dirname(__file__))
    with open('video.toml', 'r') as f:
        params.update(toml.loads(f.read()))
    if not args.config is None:
        with open(args.config, 'r') as f:
            params.update(toml.loads(f.read()))

    start = current_timestamp()

    # set up logging
    # set up logging
    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    STAFF_FACTOR = params['staff_factor']
    TABLE = (params['recent_videos_staff'] if args.ystaff
             else params['recent_videos'])
    params['table'] = TABLE
    if args.logins is None:
        args.logins = params['logins'][_file_[:-3]]
    params['morda_check_url'] = params['morda']['urls'][args.url]
    params['uids'] = args.logins

    results = defaultdict(list)
    s = 'error'

    logger.info('gonna get some uids')
    uids = get_uids(params)
    logger.info('wow')

    base_html_href = []
    base_img_href = []

    for uid in uids:
        req = requests.get(params['morda_check_url'].format(uid))
        if req.status_code == 200:
            text = req.content.decode('utf8', errors='replace')
            if 'Рекомендации' in text:
                s = 'recommendations'
                r1 = requests.get((params['morda_check_url']
                                   + '&json_dump=1').format(uid))
                j1 = json.loads(r1.content)
                j1_f = flatten_dict(j1)
                for i_ in j1_f['base_html_href']:
                    base_html_href.append(i_)
                for i_ in j1_f['base_img_href']:
                    base_img_href.append(i_)
            else:
                s = 'no recommendations'
        else:
            s = 'error'
        results[s].append(uid)

    bases = zip(base_html_href, base_img_href)

    output = StringIO.StringIO()
    for s in results:
        print >>output, '{}:\n{}\n'.format(s, '\n'.join(results[s]))

    print >>output, 'base_html_href\tbase_img_href:\n{}'.format(
        '\n'.join(['{}\t{}'.format(x[0], x[1]) for x in bases]))

    with codecs.open('logs/recs-{}{}.log'.format(start,
                                                 '' if args.url == 'default' else args.url), 'w', 'utf8') as f:
        f.write(output.getvalue())

if __name__ == "__main__":
    main()
