# -*- coding: UTF-8 -*-

from mapreducelib import MapReduce, Record
from collections import defaultdict
from datetime import datetime
import libra
import urllib, re,random

def map_redir(rec):
    line = rec.value

    if not 'pid=12' in line or not 'cid=72910' in line:
        return

    data = dict([d.split('=', 1) for d in line.split('@@') if '=' in d])
    l = line.split('@@')
    try:
        uid = l[-1]
        ts = l[-3]
    except:
        return

    if len(str(uid)) < 10:
        return

    try:
        date =  str(datetime.fromtimestamp(float(ts)).isoformat()).split('T')[0]
    except:
        date = '-'

    if not 'path' in data:
        return

    path = data['path']
    if not path.startswith('top.'):
        return

    url = str(data.get('HTTP_REFERER'))
    tags = str(data.get('tags'))

    yield Record('y'+str(uid),'',date + '\t' + ts + '\t' + path + '\t' + tags + '\t' + url)


def main():

    MapReduce.useDefaults(
                            server   = 'sakura.search.yandex.net:8013',
                            username = 'userstats',
                            mrExec   = '/Berkanavt/bin/mapreduce-dev',
                            verbose  = True,
                            #testMode = True,
                         )

    dd = ['0125','0126','0127','0128','0129','0130','0131']
    for d in dd:
        src = 'redir_log/2016' + d

        dt = 'ensuetina/OBJECT_CARD/result_new'

        MapReduce.runMap(map_redir,
                         srcTable = src,
                         dstTable = dt,
                         appendMode = True,
                         sortMode = True
                        )


if __name__ == '__main__':
    main()
