#!/usr/bin/env python
#! -*- coding: utf-8 -*-
from __future__ import unicode_literals
from __future__ import division
from __future__ import print_function
import sys
import os
import re
import arrow
import requests
from tqdm import tqdm
import time
import codecs
import contextlib
import itertools
import logging
import toml
import pdb
import argparse
import tempfile
import traceback
import subprocess
import smtplib
import mapreducelib
import threading
import urllib
import json
from time import sleep
try:
    import thread
except ImportError:
    import _thread as thread
import urlparse
from collections import defaultdict, Counter, namedtuple
import datetime as dt
from datetime import datetime as dtdt

# PATH = ['12.1620.705', '12.1620.486']

PATH = {'12.1620.705': 'install',
        '12.1620.486': 'cancel'}
Parameter = namedtuple('Parameter', ['element', 'eventtype',
                                     'lang'])
Ident = namedtuple('Ident', ['product', 'service', 'lang',
                             'browser', 'description', 'element', 'candidate'])
browser_dict = {}

SERVER = "localhost"
RECIPIENTS = ['riddle', 'pecheny']
SUBJECT = "AtomBanners stat uploader"
FROM = "stat-uploader@monitoring1.haze.yandex.net"


def send_email(recipients, message):
    recipients = ['{}@yandex-team.ru'.format(rec) for rec in recipients]
    body = """\
From: {}
To: {}
Subject: {}

{}
""".format(FROM, ", ".join(recipients), SUBJECT, message).encode('utf8')
    server = smtplib.SMTP(SERVER)
    server.sendmail(FROM, recipients, body)
    server.quit()


@contextlib.contextmanager
def make_temp_file(**kwargs):
    temp_file = tempfile.mkstemp(**kwargs)
    yield temp_file
    os.remove(temp_file[1])


def deutf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        key, subkey, value = rec.key, rec.subkey, rec.value
        if not isinstance(key, unicode):
            key = key.decode('utf8', errors='replace')
        if not isinstance(subkey, unicode):
            subkey = subkey.decode('utf8', errors='replace')
        if not isinstance(value, unicode):
            value = value.decode('utf8', errors='replace')
        return Record(key, subkey, value)
    elif isinstance(rec, str):
        rec = rec.decode('utf8', errors='replace')
    return rec


def utf8ify(rec):
    if isinstance(rec, mapreducelib.SubkeyedRecord):
        if isinstance(rec.key, unicode):
            rec.key = rec.key.encode('utf8')
        if isinstance(rec.subkey, unicode):
            rec.subkey = rec.subkey.encode('utf8')
        if isinstance(rec.value, unicode):
            rec.value = rec.value.encode('utf8')
        return rec
    elif isinstance(rec, unicode):
        rec = rec.encode('utf8')
    return rec


def tryint(string):
    try:
        return int(string)
    except:
        return -1


def dttots(dt_):
    return int((dt_ - dtdt(1970, 1, 1)).total_seconds())


def parsevars(vars):
    commas = vars.split(',')
    result = {'clids': []}
    for x in commas:
        if len(x.split('=')) > 1:
            key = x.split('=')[0]
            value = '='.join(x.split('=')[1:])
            if key.startswith('-'):
                key = key[1:]
            if key.startswith('clid'):
                result['clids'].append(value)
            else:
                result[key] = value
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def parseparams(value):
    tabs = value.split('\t')
    result = {}
    for x in tabs:
        if len(x.split('=')) > 1:
            result[x.split('=')[0]] = '='.join(x.split('=')[1:])
        else:
            result[x] = 'SINGLE'
    return defaultdict(lambda: '', result)


def tabulate(*args):
    return '\t'.join(map(format, args))


def ntabulate(*args):
    return tabulate(*args) + '\n'


def gettld(url):
    if not '//' in url:
        url = 'http://' + url
    parsed = urlparse.urlparse(url)
    nl = parsed.netloc
    return nl.split('.')[-1].split(':')[0]


def format_ident(fd, ident, ident_dict, ident2=None):
    if not ident2:
        ident2 = ident
    return ntabulate(
        fd,
        ident.product,
        ident.service,
        ident.lang,
        ident.browser,
        ident.description,
        ident.element,
        ident.candidate,
        ident_dict[ident2]['shows'],
        ident_dict[ident2]['installs'],
        ident_dict[ident2]['cancels'],
        ident_dict[ident2]['clicks'])


def ident_from_line(tabs):
    return Ident(
        product=tabs[1],
        service=tabs[2],
        lang=tabs[3],
        browser=tabs[4],
        description=tabs[5],
        element=tabs[6],
        candidate=tabs[7],
    )


def debug_tqdm(x, debug):
    if debug:
        return tqdm(x)
    return x

HEADERS = {'StatRobotUser': 'robot_pecheny',
           'StatRobotPassword': 'OoGh1Adahy'}
URL = 'https://upload.stat.yandex-team.ru/_api/report/data'


def get_lastts():
    if os.path.isfile('fastlogs_stat_last_timestamp'):
        with open('fastlogs_stat_last_timestamp') as f:
            ts = f.read()
        return arrow.get(ts + '+03:00')
    return arrow.get(0).to('Europe/Moscow')


def set_lastts(ts):
    with open('fastlogs_stat_last_timestamp', 'w') as f:
        f.write(ts.format('YYYY-MM-DD HH:mm'))


def tsfile(tfile):
    result = tfile.split('.tsv.log')[0]
    result = result.split('tsv_data_')[1]
    return arrow.get(result + '+03:00')


def get_srcfiles(lb=None, ub=None, allfiles=None):
    if not allfiles:
        allfiles = get_allfiles()
    if not lb:
        lb = get_lastts()
    if not ub:
        ub = arrow.get(9999999999).to('Europe/Moscow')
    result = [x for x in allfiles if tsfile(x) > lb and tsfile(x) <= ub]
    return sorted(result)


def get_allfiles():
    os.chdir('logs')
    allfiles = [os.path.join(os.path.dirname(os.path.abspath(x)), x)
                for x in os.listdir(os.getcwd()) if x.startswith('tsv_data')]
    os.chdir('..')
    return allfiles


def main():
    global __file__                         # to fix stupid
    __file__ = os.path.abspath(__file__)    # __file__ handling
    _file_ = os.path.basename(__file__)     # in python 2
    import arrow

    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--config', default=None)
    parser.add_argument('--datetimefrom', default=None)
    parser.add_argument('--datetimeto', default=None)
    parser.add_argument('--timestamp', action='store_true')
    args = parser.parse_args()
    start = int((dtdt.now() - dtdt(1970, 1, 1)).total_seconds())

    logger = logging.getLogger(_file_[:-3])
    formatter = logging.Formatter('%(asctime)s | %(message)s')
    ch = logging.StreamHandler()
    logger.setLevel(logging.DEBUG)
    if args.debug:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    fh = logging.FileHandler('{}/logs/{}-{}.log'.format(
        os.path.dirname(__file__), _file_[:-3], start),
        encoding='utf8')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    # load config
    with open('basic.toml', 'r') as f:
        config = toml.loads(f.read())
    os.chdir(os.path.dirname(__file__))
    with open('distribution.toml', 'r') as f:
        config.update(toml.loads(f.read()))
    if args.config is None:
        config.update(toml.loads(open(_file_[:-3] + '.toml').read()))
    else:
        config.update(toml.loads(open(args.config).read()))

    if not(args.datetimefrom and args.datetimeto):
        lastts = get_lastts()
        srcfiles = get_srcfiles(lb=lastts)
        while len(srcfiles) > 0:
            t = process_file(srcfiles[0], logger, config, debug=args.debug)
            processed_ts = tsfile(srcfiles[0])
            # if args.debug:
            #     pdb.set_trace()
            if t and processed_ts > get_lastts():
                set_lastts(processed_ts)
            srcfiles = get_srcfiles()
        logger.info("No new data. Latest counted ts is {}"
                    .format(lastts))
        sys.exit(0)
    else:
        df = args.datetimefrom
        lb = (arrow.get(df)
              if args.timestamp
              else arrow.get(df + '+03:00', 'YYYYMMDDHHmmZZ'))
        ub = (arrow.get(args.datetimeto)
              if args.timestamp
              else arrow.get(args.datetimeto + '+03:00',
                             'YYYYMMDDHHmmZZ',
                             ))
        srcfiles = get_srcfiles(lb=lb, ub=ub)
        for srcfile in srcfiles:
            t = process_file(srcfile, logger, config, append=False,
                             debug=args.debug)


def process_file(srcfile, logger, config, append=True, debug=False):
    logger.info('Source file is {}'.format(srcfile))
    ts = tsfile(srcfile)
    g_data = [['fielddate', 'product', 'service',
               'lang', 'browser', 'description', 'element', 'candidate',
               'shows', 'installs', 'cancels', 'clicks']]
    fd = ts.strftime('%Y-%m-%d %H:%M:00')
    logger.info('Gathering data from {}...'.format(srcfile))
    with codecs.open(srcfile, 'r', 'utf8') as tf2:
        prev_ident = None
        shows = 0
        clicks = 0
        cancels = 0
        installs = 0
        for line in debug_tqdm(tf2, debug):
            tabs = line.rstrip().replace('\u0000', '').split('\t')
            if len(tabs) > 12:
                logger.info('Error on line {}'.format(line.rstrip()))
                continue
            try:
                ident = tabs[:8]
            except IndexError:
                continue
            if ident != prev_ident and prev_ident:
                g_data.append(prev_ident +
                              [shows,
                               installs,
                               cancels,
                               clicks])
                try:
                    shows = int(tabs[8])
                except:
                    if debug:
                        pdb.set_trace()
                installs = int(tabs[9])
                cancels = int(tabs[10])
                clicks = int(tabs[11])
            else:
                try:
                    shows += int(tabs[8])
                    installs += int(tabs[9])
                    cancels += int(tabs[10])
                    clicks += int(tabs[11])
                except IndexError:
                    continue
            prev_ident = ident
        g_data.append(prev_ident +
                      [shows,
                       installs,
                       cancels,
                       clicks])
    logger.info('Forming tsv_data...')
    tsv_data = '\n'.join(['\t'.join([format(y) for y in x]) for x in g_data])
    logger.info('Ready to post data')
    # pdb.set_trace()
    # logger.info(tsv_data)
    data = {
        "name": "Distribution/Others/AtomBanners/v2",
        "scale": "i",
        "_append_mode": (1 if append else 0),
        "tsv_data": tsv_data,
    }
    req = None
    retries = 0
    while (req is None or req.status_code != 200) and not ((not req is None) and b'Error in data' in req.content) and retries < 5:
        # pdb.set_trace()
        req = requests.post(URL, headers=HEADERS, data=data)
        logger.info(req.text)
        if req.status_code != 200:
            time.sleep(60)
            retries += 1
    if b'Error in data' in req.content:
        send_email(RECIPIENTS, '{} seems to have invalid data'.format(ts))
    if req.status_code != 200 and retries >= 5:
        send_email(RECIPIENTS, 'Upload of data from {} failed'.format(ts))
    return True


if __name__ == "__main__":
    main()
