uthor__ = 'kosotis'

import urllib
import getpass
import sys
import os
import urllib2
from parselib import DatesRange
from mapreducelib import MapReduceClient, Record
from parselib import SessionRecord, DatesRange
import time
from datetime import datetime

class PartnerRecord:
    type = None
    region_id = None
    vars = None
    path = None

    def __init__(self, srcLine):
        parts = srcLine.split("\t")
        for part in parts:
            if part.startswith('user-region='):
                self.region_id = int(part[12:])

class Map:
    region_id_dict = None

    def __init__(self, region_id_dict):
        self.region_id_dict = region_id_dict

    def __call__(self, rec):
        partnerRecord = PartnerRecord(rec.value)
        srec = SessionRecord(rec.value)

        uid = rec.key

        if not hasattr(srec, 'type'):
            return
        if not hasattr(srec, 'path'):
            return
        if not hasattr(srec, 'vars'):
            return
        if not hasattr(srec, 'test_buckets'):
            return


        var = srec.vars
        id =  srec.test_buckets
        req_t = rec.subkey
        req_ts = int(req_t)
        req_day = datetime.fromtimestamp(req_ts).strftime('%d-%m')


        if rec.key != '':
            if '17067' in id:
                if 'dayuse=0' in var:
                    if 'bro=chrome' in var:
                        if self.region_id_dict.__contains__(partnerRecord.region_id):
                            yield Record(uid, '',str(req_day))


def GetGeoInfo(root_region_id):
    service_url = 'http://g.ararat-lucid.dev.yandex.ru/'
    params = {'format': 'text',
              'root': str(root_region_id),
              'types': '_all_',
              'fields': 'id'}
    data = urllib.urlencode(params)
    req = urllib2.Request(service_url, data)
    response = urllib2.urlopen(req)
    region_id_dict = dict()
    response.readline()
    for result_line in response:
        split_result = result_line[:-1].split('\t')
        id = int(split_result[0])
        region_id_dict[id] = None
    return region_id_dict

def reduceq(key, records):
    res = sum(1 for rec in records)
    yield Record(key, '', str(res))


def main():

    country_region = 225
    startDay = '20151002'
    endDay = '20151008'
    claster = "sakura.search.yandex.net"
    region_id_dict = GetGeoInfo(country_region)
    client = MapReduceClient(server=claster, verbose=True)
    mapTable = 'kosotis/auto'


    resultTable= 'kosotis/auto_uniq'
    print('RUN MAP')
    print('Map table: ' + mapTable)
    client.dropTable(mapTable)
    client.dropTable(resultTable)
    map = Map(region_id_dict)
    for day in DatesRange(startDay, endDay):
        srcTable = os.path.join('user_sessions', day)
        print('Map day: ' + day)
        try:
            client.runMap(map, srcTable=srcTable, dstTable=mapTable, appendMode=True)
            client.sortTable(mapTable)
        except:
            print("ERROR:", sys.exc_info()[0])
            print('MAP ENDED!!!')

            print('sort ended')
    client.runReduce(reduceq, srcTable=mapTable, dstTable=resultTable, appendMode=True)


if __name__ == '__main__':
    main()

