#!/usr/bin/env python
#coding=utf-8

usage = """Usage: %prog [options]

Insert your code into RecProcess, SessProcess, FetchResults, process or doCalcSingleUser function and run this script like this.

Examples:
  %prog --mode=map --dates=20110514 --dst=errorf/test"""

from mapreducelib import MapReduce, Record, TemporaryTable, MRSession
from usercalclib import *
import sys
import re
import libabt
import libra
import libkernelgeo
import datetime
from optparse import OptionParser

def datesListFromString(dates_string):
    result_list = []
    periods_list = dates_string.split(',')

    for period in periods_list:
        borders_list = period.split('-')

        if len(borders_list) == 1:
            yield period

        elif len(borders_list) == 2:
            begin = borders_list[0]
            end = borders_list[1]
            first_date = datetime.date(int(begin[0:4]), int(begin[4:6]), int(begin[6:8]))
            last_date = datetime.date(int(end[0:4]), int(end[4:6]), int(end[6:8]))
            day = datetime.timedelta(days=1)
            date_iter = first_date

            while date_iter <= last_date:
              yield date_iter.strftime('%Y%m%d')
              date_iter += day

        else:
            IOError('Wrong dates format')

def process(key, recs):
    try:
        session = libra.Parse(recs, 'blockstat.dict')
    except:
        return

    # clids = {"2224312": 0, "2224313": 0, "2224314": 0, "2224315": 0, "empty": 0}
    clids = {
            "2252520": 0,
            "2252521": 0,
            "2252522": 0,
            "2252513": 0,
            "2252514": 0,
            "2252515": 0,
            "2252516": 0,
            "2246649": 0,
            "2246650": 0,
            "2246651": 0,
            "2246652": 0,
            "2252509": 0,
            "2252510": 0,
            "2252511": 0,
            "2252512": 0,
            "2252517": 0,
            "2252518": 0,
            "2252519": 0,
            "2246653": 0,
            "2246654": 0,
            "2246655": 0,
            "2246656": 0,
            "2241979": 0,
            "2241980": 0,
            "2241981": 0,
            "2241979": 0,
            "2241980": 0,
            "2241981": 0,
            "2238715": 0,
            "2238716": 0,
            "2238717": 0,
            "2167028": 0,
            "2167029": 0,
            "2167030": 0,
            "2238712": 0,
            "2238713": 0,
            "2238714": 0,
            "2167028": 0,
            "2167029": 0,
            "2167030": 0,
            "2242002": 0,
            "2242349": 0,
            "2242350": 0,
            "2242003": 0,
            "2242004": 0,
            "2242002": 0,
            "2242349": 0,
            "2242350": 0,
            "2242003": 0,
            "2242004": 0,
            "2242008": 0,
            "2242009": 0,
            "2242010": 0,
            "2242011": 0,
            "2242008": 0,
            "2242009": 0,
            "2242010": 0,
            "2242011": 0,
            "2224316": 0,
            "2224317": 0,
            "2224318": 0,
            "2224319": 0,
            "2224316": 0,
            "2224317": 0,
            "2224318": 0,
            "2224319": 0,
            "2189881": 0,
            "2196599": 0,
            "2187646": 0,
            "2220367": 0,
            "2189881": 0,
            "2196599": 0,
            "2187646": 0,
            "2220367": 0,
            "2167025": 0,
            "2167026": 0,
            "2167027": 0,
            "2167025": 0,
            "2167026": 0,
            "2167027": 0,
            "2163765": 0,
            "2163430": 0,
            "2164776": 0,
            "2163765": 0,
            "2163430": 0,
            "2164776": 0,
            "2189879": 0,
            "2187268": 0,
            "2219044": 0,
            "2219045": 0,
            "2224772": 0,
            "2224312": 0,
            "2224313": 0,
            "2224314": 0,
            "2224315": 0,
            "2224312": 0,
            "2224313": 0,
            "2224314": 0,
            "2224315": 0,
            "2224312": 0,
            "2224313": 0,
            "2224314": 0,
            "2224315": 0,
            "2224312": 0,
            "2224313": 0,
            "2224314": 0,
            "2224315": 0,
            "2189880": 0,
            "2196598": 0,
            "2187644": 0,
            "2220366": 0,
            "2189880": 0,
            "2196598": 0,
            "2187644": 0,
            "2220366": 0,
            "2228989": 0,
            "2228990": 0,
            "2228991": 0,
            "2229633": 0,
            "2244045": 0,
            "2244046": 0,
            "2244047": 0,
            "2244048": 0
    }
    clid = False
    bnrd = ""
    for event in session.GetSessionYandexTechEvents():
        if not hasattr(event, "Vars"):
            continue
        for val in event.Vars.split(','):
            if val.startswith("-clid1") and val[len("-clid1") + 1 : ] in clids:
                clid = True
            elif val.startswith("-bnrd"):
                bnrd = val[len("-bnrd") + 1 : ]

    if not clid or len(bnrd) == 0:
        return

    for req in session.GetRequests():
        if not req.IsA("TYandexWebRequest") or req.PageNo != 0:
            continue

        flag = False
        for clid in clids:
            ind = req.FullRequest.find("clid=" + clid)
            if ind != -1 and (len(req.FullRequest) <= ind + len("clid=" + clid) or req.FullRequest[ind + len("clid=" + clid)] not in "0123456789"):
                clids[clid] += 1
                flag = True

        if not flag:
            clids["empty"] += 1

    res = key
    counter = 0
    for clid in clids:
        counter += clids[clid]
        res += '\t' + clid + "=" + str(clids[clid])

    if counter > 0:
        yield Record(bnrd, '', res)

def process2(key, recs):
    all_searches = 0
    clid_searches = 0
    for rec in recs:
        for val in rec.value.split('\t')[1:]:
            val = val.split("=")
            all_searches += int(val[1])
            if val[0] != "empty":
                clid_searches += int(val[1])

    yield Record (key, '', str(clid_searches), tableIndex = 0)

    yield Record (key, '', str(all_searches), tableIndex = 1)

def doMap(srcTableNamesList, dstTableName):
    for Table in srcTableNamesList:
        MapReduce.runReduce(process, srcTable = Table, dstTable = dstTableName, appendMode = True, files = ['blockstat.dict', 'geodata3.bin'])
    MapReduce.sortTable(dstTableName)
    MapReduce.runReduce(process2, srcTable = dstTableName, dstTables = [dstTableName + "_clids", dstTableName + "_all"], appendMode = True, files = ['blockstat.dict', 'geodata3.bin'])
    MapReduce.sortTables([dstTableName + "_clids", dstTableName + "_all"])

def main():

    modes_help = """Mode of operation. One of next variants
'map' - perform your map on user sessions. This mode is prefered if you are to extract some information from user_sessions and have no intention of calculating sum, mean or significance of diff between these values."""

    parser = OptionParser(usage)
    parser.add_option('--src', action='store', dest='prefix', default='user_sessions', help='Source table. Used like prefix with --dates option to carry on a calculation on several tables for different dates.')
    parser.add_option('--dates', action='store', dest='dates', default='', help='Used to run a calculation for a list of dates [<src>/<date1>, ...]. Prefix is taken from --src option.')
    parser.add_option('--dst', action='store', dest='dst', help='Output table.')

    (options, args) = parser.parse_args()

    MapReduce.useDefaults(server = 'sakura.search.yandex.net', proxyServer = 'padus05.search.yandex.net:8081', mrExec = '/Berkanavt/bin/mapreduce27-dev', mrFindExec = '/Berkanavt/bin/mr_find-dev', verbose = True)

    tables = [options.prefix + '/' + date for date in datesListFromString(options.dates)]

    try:
        doMap(tables, options.dst)
    except AttributeError:
        raise IOError('dstTable name needed.')

if __name__ == '__main__':
    main()
