mport sys
import os
import uatraits
import urllib
from mapreducelib import MapReduceClient, Record

from urlparse import urlparse

from parselib import SessionRecord, DatesRange

def get_url(request):
    index = request.find('url=')
    if index == -1:
        return None
    clid_part = request[index + 4:]
    index = clid_part.find('&')
    if index == -1:
        index = clid_part.find('\t')
        if index == -1:
            index = clid_part.find(' ')
            if index == -1:
                index = clid_part.find(';')
                if index == -1:
                    index = clid_part.find('?')
                    if index == -1:
                        return clid_part
    part = clid_part[:index]
    return part

def get_vtbNum(request):
    index = request.find('vtbNum=')
    if index == -1:
        return None
    clid_part = request[index + 7:]
    index = clid_part.find('&')
    if index == -1:
        index = clid_part.find('\t')
        if index == -1:
            index = clid_part.find(' ')
            if index == -1:
                index = clid_part.find(';')
                if index == -1:
                    index = clid_part.find('?')
                    if index == -1:
                        return clid_part
    part = clid_part[:index]
    return part

def get_vtbnum(request):
    index = request.find('vtbnum=')
    if index == -1:
        return None
    clid_part = request[index + 7:]
    index = clid_part.find('&')
    if index == -1:
        index = clid_part.find('\t')
        if index == -1:
            index = clid_part.find(' ')
            if index == -1:
                index = clid_part.find(';')
                if index == -1:
                    index = clid_part.find('?')
                    if index == -1:
                        return clid_part
    part = clid_part[:index]
    return part

def Map(rec):
    srec = SessionRecord(rec.value)
    ur = get_url(rec.value)
    if ur != None:
        try:
            url = urllib.unquote(ur.decode().encode('utf8'))
            url_host = urlparse(url)
            host = url_host.netloc

        except:
            print(ur)
            print("ERROR:", sys.exc_info()[0])

        vtbN = get_vtbNum(rec.value)
        vtbn = get_vtbnum(rec.value)

        if 'url' in rec.value:
            if 'vtbNum' in rec.value:
                yield Record(str(host), '','vtB- ' + str(vtbN))

            if 'vtbnum' in rec.value:
                yield Record(str(host), '','vtb- ' + str(vtbn))

def main():
    client = MapReduceClient(server='sakura.search.yandex.net', verbose=True)
    st = 'kosotis/vtbNum'
    client.dropTable(st)
    startDay = '20150915'
    endDay = '20150921'
    # client.dropTable(st)

    for day in DatesRange(startDay, endDay):

        srcTable = os.path.join('spy_log', day)
        print('Map day: ' + day)


        try:
            client.runMap(Map, srcTable=srcTable, dstTable=st, appendMode=True, sortMode=True)
        except:
            print("ERROR:", sys.exc_info()[0])
            print('MAP ENDED!!!')
            print('sort ended')
        # client.runReduce(Reduce, srcTable=ST, dstTables=STReduce, appendMode=True)


if __name__ == '__main__':
    main()

