#!/usr/bin/python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, argparse, json, requests
import yt.wrapper as ytw
from datetime import date, timedelta, datetime
from urllib import urlopen
from traceback import format_exception
from collections import defaultdict

MAX_ATTEMPTS_CNT = 3

def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

class cmplLogMapper:
    def __init__(self, today):
        self.today = today
    def __call__(self, record):
        try:
            if "flags" in record and record.get("flags", "") and record.get("flags", "").find('tskv') < 0 and "uid" in record and record.get("uid", "") and \
                str(record.get("uid", "")).find('tskv') < 0 and "type" in record and record.get("type", "") and str(record.get("type", "")).find('tskv') < 0:
                    flags, uid, t = record.get("flags", "").split(';'), record.get("uid", ""), record.get("type", "")
                    if uid and self.today == record.get("msgdate", "").split()[0] and t == "foo" and ("YW" in flags or "YS" in flags) and "F9" not in flags:
                        yield {
                            "uid":     uid,
                            "geo":     record.get("geo", "-"),
                            "msgdate": self.today
                        }
        except Exception, e:
            print >>sys.stderr, 'CmplLogMapper error: "%s" in record "%s".%s' % (str(e), str(record), get_traceback())

def getBouncesInfo(date, start, end):
    try:
        f = urlopen(GET_BOUNCES_INFO_URL % (date, start, end))
        if f.getcode() == 200:
            return f.read()
        else:
            print >>sys.stderr, 'Get bounces info response HTTP code: {0}, body: {1}'.format(f.getcode(), f.info())
    except Exception, e:
        print >>sys.stderr, 'Get bounces info HTTP request failed: %s.%s' % (str(e), get_traceback())
    return ""

if __name__ == "__main__":
    data = []
    YT_CMPLLOG_PATH = "//home/logfeller/logs/mail-so-compl-log/1d/"
    YT_STAT_TMP_PATH = "//home/so_fml/tmp/statistics_uids_tmp_"
    GET_BOUNCES_INFO_URL = "https://web.so.yandex-team.ru/tools/get_bounces_uids?date=%s&start=%s&end=%s"
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--refdate', type=str, help="Reference date for calculating statistics")
    parser.add_argument('-p', '--period',  type=str, help="Days period for recalculating data for YaStat")
    parser.add_argument('-o', '--output',  type=str, help="Path to output json with data to upload to YaStat")
    args = parser.parse_known_args()[0]
    OUTPUT_JSON_FILE = args.output if args.output else './output_data.json'
    DAYS = int(args.period) if args.period else 14
    top_date = args.refdate.split('T')[0] if args.refdate else date.today().isoformat()
    try:
        topd = datetime.strptime(top_date, '%Y-%m-%d').date()
    except Exception, e:
        print "Error while parsing date string '%s': %s" % (top_date, str(e))
        sys.exit(1)
    for i in range(DAYS, 0, -1):
        d = (topd - timedelta(days = i)).isoformat();
        uids, geos = defaultdict(lambda: ''), defaultdict(int)
        ytw.run_map(cmplLogMapper(d), YT_CMPLLOG_PATH + d, YT_STAT_TMP_PATH + d, job_count = 1000)
        ytw.run_merge(YT_STAT_TMP_PATH + d, YT_STAT_TMP_PATH + d, spec = {'combine_chunks': 'true', 'data_size_per_job': 1453936477})
        for r in ytw.read_table(YT_STAT_TMP_PATH + d, format = ytw.JsonFormat(), raw = False):
            uids[r["uid"]] = r['geo']
        start, step, attempt = 0, 200, 0
        while attempt < MAX_ATTEMPTS_CNT:
            try:
                bounces = json.loads(getBouncesInfo(d, start, start + step - 1))
                attempt = 0
                if len(bounces) < 1:
                    break
                for b in bounces:
                    if b["count"] > 1:
                        uids[b["uid"]] = b["geo"]
            except Exception, e:
                print "Error: %s.%s" % (str(e), get_traceback())
                attempt += 1
            start += step
        for uid, geo in uids.iteritems():
            geos[geo if geo else '-'] += 1
            geos['total'] += 1
        for geo in geos.iterkeys():
            data.append({'fielddate': d, "geo": geo, "uids_count": geos[geo]})
            print '{"fielddate": "%s", "geo": "%s", "uids_count": %s}' % (d, geo if geo else '-', geos[geo])
        ytw.remove(YT_STAT_TMP_PATH + d, force = True)
    try:
        f = open(OUTPUT_JSON_FILE, 'wt')
        print >>f, json.dumps(data)
        f.close()
    except Exception, e:
        print >>sys.stderr, 'Saving result file error: %s.%s' % (str(e), get_traceback())
