#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from operator import itemgetter
from math import exp
import sys
import os
import time
import json
import ftplib
import re
from mapreducelib import *
import argparse
from datetime import date, datetime, timedelta as td


def getvalue(string, val, d="\t"):
    rv = ""
    tabs = string.split(d)
    for k in tabs:
        if k[0:(len(val) + 1)] == val + "=":
            rv = k[(len(val) + 1):]
    return rv


def wizardstat(key, recs):
    reqids = []
    clicks = {}
    tb = {}
    query = {}
    for rec in recs:
        if "type=BLOCKSTAT" in rec.value and ("default_search" in rec.value or "/stripe" in rec.value):
            reqid = getvalue(rec.value, "reqid")
            reqids.append(reqid)
            if reqid not in clicks.keys():
                clicks[reqid] = []
            tb[reqid] = "none"
            buck_vals = rec.value.split("@")
            for i in range(0, len(buck_vals)):
                if buck_vals[i] == "/test-ids":
                    tb[reqid] = buck_vals[(i + 2)][6:]
        if "type=CLICK" in rec.value:
            path = getvalue(rec.value, "path")
            # yield Record(key,"",path)
            if path in ["12.1620.486", "12.1620.1030", "12.1620.705"] or path[0:4] == "707.":
                buckets = getvalue(rec.value, "test-buckets")
                reqid = getvalue(rec.value, "reqid")
                if reqid not in clicks.keys():
                    clicks[reqid] = []
                clicks[reqid].append(path)
        if "type=REQUEST" in rec.value and "service=www.yandex" in rec.value:
            reqid = getvalue(rec.value, "reqid")
            query[reqid] = []
            query[reqid].append(getvalue(rec.value, "query"))
            relev = getvalue(rec.value, "search-props")
            query[reqid].append(
                getvalue(relev, "default_search_wizard", d=","))
            query[reqid][-1] = query[reqid][-1] + "|" + \
                getvalue(relev, "default_stripe_wizard", d=",")

    for k, v in clicks.items():
        if k in reqids and k in query.keys():
            v.insert(0, k[0:10])
            v = query[k] + v
            v.insert(0, tb[k])
            yield Record(k, key, "\t".join(v))


def main():

    print "get user_sessions tables"
    comm = 'MR_USER=tmp /Berkanavt/bin/mr_ls-dev -s cedar:8013 "user_sessions/2015[0-9]{4}" > user_sessions'
    result = os.system(comm)
    dates = []
    for k in open("user_sessions"):
        dates.append(int(k.rstrip()[-8:]))

    print "get already parsed days"
    comm = 'MR_USER=tmp /Berkanavt/bin/mr_ls-dev -s cedar:8013 "riddle/us2015[0-9]{4}" > done_us'
    result = os.system(comm)
    done_dates = []
    for k in open("done_us"):
        done_dates.append(int(k.rstrip()[-8:]))

    diff_dates = list(set(dates) - set(done_dates))

    MapReduce.useDefaults(server='cedar00:8013',
                          verbose=False, username='tmp', usingSubkey=True)
    append = " "
    for d in sorted(dates):
        if d >= int((date.today() - td(days=7)).strftime('%Y%m%d')):
            print d
            if d not in done_dates:
                print "calculate stat"
                MapReduce.runReduce(wizardstat, srcTable='user_sessions/' +
                                    str(d), dstTable='riddle/us' + str(d), appendMode=False)
            print "to destination table"
            comm = 'MR_USER=tmp /Berkanavt/bin/mr_cat-dev -s cedar:8013 -sub' + \
                append + '-o riddle/distribution_month_tmp riddle/us' + str(d)
            result = os.system(comm)
            append = " -append "
            print result
            with open("done_us", "a") as myfile:
                myfile.write(str(d) + "\n")
            print str(d) + " Done"
    comm = 'MR_USER=tmp /Berkanavt/bin/mapreduce-dev -server cedar:8013 -sort riddle/distribution_month_tmp'
    result = os.system(comm)
    comm = 'MR_USER=tmp /Berkanavt/bin/mapreduce-dev -server cedar:8013 -subkey -read riddle/distribution_month_tmp > distribution_stat.txt'
    result = os.system(comm)

if __name__ == '__main__':
    main()
