#!/usr/bin/pypy

import sys
import datetime
import gc
import gzip
import os
import cPickle
import re
import urllib
import getpass
import resource
import itertools
from copy import copy
from collections import defaultdict
import random

sys.path.append("/home/trencher/lib")
from mapreducelib import MapReduce, Record


def get_versions(table):

    results = []
    total = 0 

    for rec in MapReduce.getSample(srcTable=table):
        ua_ver = rec.key
        ua, version = ua_ver.split('/')

        if version=='Unknown': 
            version=[0]
        else:
            version = [int(x) for x in version.split('.')] 
        count = int(rec.value)
        total += count
        results.append((ua,version,count,ua_ver,))

    print >> sys.stderr, "browser cutoff:", total/10000
    results = [x for x in results if x[2]>=total/10000]

    assert len(results)>0

    final_versions=set() 

    results = sorted(results, key=lambda x:x[0])

    for k, g in itertools.groupby(results, key=lambda x: x[0]):

        foo=list(g)

        sorted_versions=sorted(foo,key=lambda x:x[1],reverse=True) #last 3 versions 
        #print >> sys.stderr, sorted_versions[0:5]        
        final_versions |= set([x[3] for x in sorted_versions[0:3]])

        sorted_versions=sorted(foo,key=lambda x:x[2],reverse=True) # 3 most popular versions
        #print >> sys.stderr, sorted_versions[0:5]
        final_versions |= set([x[3] for x in sorted_versions[0:3]]) 

        final_versions -= set([k+'/Other'])

    final_versions -= set(['Unknown/Unknown'])

    print >> sys.stderr, sorted(final_versions)

    return list(final_versions)

    #TODO: calculate real versions
    #return ['Chrome/35','Chrome/36','Chrome/37','Chrome/38','Chrome/39','Chrome/40','YaBrowser/14.12','YaBrowser/14.10','YaBrowser/14.8','YaBrowser/14.7','YaBrowser/14.5']


def intermediate_browser_versions_map(recs):

    versions = defaultdict(int)

    for rec in recs:
        if rec.subkey=='redir':
            raw_records = cPickle.loads(rec.value)
            for r in raw_records:
                v = r.get('user_agent_version')
                if v is None:
                    ua = r.get('user_agent')
                    if ua is None:
                        ua="Unknown/Unknown"
                    ua = ua.split("/")[0]
                    v=ua+"/Unknown"
                versions[v] += 1

    for v,k in versions.iteritems():
        yield Record(str(v),'',str(k))

def intermediate_browser_versions_reduce(key,recs):

    total = 0 

    for r in recs:
        total += int(r.value)

    yield Record(key,'',str(total))


if __name__ == '__main__':
    MapReduce.useDefaults(mrExec="mapreduce-dev-sakura",server='sakura.search.yandex.net', usingSubkey=True, verbose=True, appendMode=False,  cpuIntensive=True, enableTableSwitching=True, optAttrs = {'user':'trencher','jobcount.multiplier':16,'threadcount':16})
    MapReduce.runCombine(intermediate_browser_versions_map, srcTable='tmp/trencher/v4rp-trencher-v4rp/cpickle20141224', dstTable='tmp/trencher/browsers_intm', sortMode=True)
    MapReduce.runReduce(intermediate_browser_versions_reduce, srcTable="tmp/trencher/browsers_intm", dstTable="tmp/trencher/browsers")
