#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
sys.path.append(".")

import itertools

_min_percent = 80
_max_second_percent = 50
_min_sum = 50
_min_single = 20

def spike_params():
    global _min_percent
    global _max_second_percent
    global _min_sum
    global _min_single
  		 
    if "--spike" in sys.argv:
        x = sys.argv[sys.argv.index("--spike") + 1].split(",")
        for y in x:
            key, val = y.split("=")
            if key == "min_percent":
                _min_percent = int(val)
            elif key == "max_second_percent":
                _max_second_percent = int(val)
            elif key == "min_sum":
                _min_sum = int(val)
            elif key == "min_single":
                _min_single = int(val)
spike_params()

def str2num(string):
    try:
        return int(string)
    except ValueError:
        return float(string)

class BaseAggregator(object):
    def __init__(self):
        self.reset()

    def popvalue(self):
        res = self.value
        self.reset()
        return self.encode(res)

    def decode(self, value):
        return value

    def encode(self, value):
        return value

class AggregatorFirstValue(BaseAggregator):
    def reset(self):
        self.value = None
        self.pushvalue = self._pushvalue1

    def _pushvalue1(self, value):
        self.value = self.decode(value)
        self.pushvalue = self._pushvalue2
        return True

class AggregatorFirstValueNum(AggregatorFirstValue):
    def decode(self, value):
        return str2num(value)

    def encode(self, value):
        return str(value)

class AggregatorSpike(BaseAggregator):
    def decode(self, value):
        return str2num(value)

    def encode(self, value):
        return str(value)

    def __init__(self, min_percent = _min_percent, max_second_percent = _max_second_percent, min_sum = _min_sum, min_single = _min_single):
        BaseAggregator.__init__(self)
        self.min_percent = min_percent
        self.max_second_percent = max_second_percent
        self.min_sum = min_sum
        self.min_single = min_single

    def reset(self):
        self.state = "empty"
        self.value = None
        self.values_sum = 0
        self.second_value = None

    def popvalue(self):
        if self.check_spike():
            res = BaseAggregator.popvalue(self)
        else:
            res = "0"
        self.reset()
        return res

    def check_spike(self):
        return (self.state == "has_first" and self.value > self.min_single) or \
               (self.state == "has_second" and self.values_sum >= self.min_sum \
               and self.value * 100 >= self.values_sum * self.min_percent and \
               self.second_value * 100 < self.max_second_percent * self.value)

    def pushvalue(self, value):
        v = self.decode(value)
        if self.state == "empty":
            self.value = v
            self.values_sum = v
            self.state = "has_first"
            return True
        elif self.state == "has_first":
            self.values_sum += v
            self.state = "has_second"
            if v > self.value:
                self.second_value = self.value
                self.value = v
                return True
            self.second_value = v
            return False
        elif self.state == "has_second":
            self.values_sum += v
            if v > self.value:
                self.second_value = self.value
                self.value = v
                return True
            elif v > self.second_value:
                self.second_value = v
            return False

class AggregatorKey(AggregatorFirstValue):
    def _pushvalue2(self, value):
        return False

class AggregatorFirst(AggregatorFirstValue):
    def _pushvalue2(self, value):
        return False

class AggregatorLast(AggregatorFirstValue):
    def _pushvalue2(self, value):
        self.value = value
        return True

class AggregatorLastNonEmpty(AggregatorFirstValue):
    def _pushvalue2(self, value):
        if value != '':
            self.value = value
            return True
        else:
            return False

class AggregatorSum(AggregatorFirstValueNum):
    def _pushvalue2(self, value):
        self.value += self.decode(value)
        return True

class AggregatorAvg(BaseAggregator):
    def reset(self):
        self.__sum = 0.0
        self.__count = 0
        
    def pushvalue(self, value):
        self.__sum += float(value)
        self.__count += 1

    @property
    def value(self):
        return self.__sum / self.__count

    def encode(self, value):
        return str(value)
    

class BaseRegionalAggregator(BaseAggregator):
    def reset(self):
        self._reg_dict = {}
        
    def pushvalue(self, value):
        if(not value):
            return
        value = value.split(",")
        for region in value:
                if not region:
                    continue
                reg, freq = region.split(":")
                self.update_region_freq(reg, float(freq))

    @property
    def value(self):
        regions_string = ""
        for reg, freq in self._reg_dict.items():
            regions_string += reg + ":" + str(freq) + ","
        return regions_string.rstrip(",")

    def encode(self, value):
        return str(value)

class AggregatorRegions(BaseRegionalAggregator):
    def update_region_freq(self, reg, freq):
        if reg in self._reg_dict:
            self._reg_dict[reg] += freq
        else:
            self._reg_dict[reg] = freq

class AggregatorRegionsMax(AggregatorRegions):
    def update_region_freq(self, reg, freq):
        self._reg_dict[reg] = max(freq, self._reg_dict.get(reg, 0.0))

class AggregatorWeights(BaseAggregator):
    def reset(self):
        self.__dict = {}
        
    def pushvalue(self, value):
        if(not value):
            return
        value = value.split(",")
        for region in value:
            reg, freq = region.split(":")
            if reg in self.__dict:
                self.__dict[reg] = min(float(freq), self.__dict[reg])
            else:
                self.__dict[reg] = float(freq)

    @property
    def value(self):
        regions_string = ""
        for reg, freq in self.__dict.items():
            regions_string += "%s:%.4g," % (reg,freq)
        return regions_string.rstrip(",")

    def encode(self, value):
        return str(value)

class AggregatorMax(AggregatorFirstValueNum):
    def _pushvalue2(self, value):
        v = self.decode(value)
        if self.value < v:
            self.value = v
            return True
        else:
            return False
        
class AggregatorMin(AggregatorFirstValueNum):
    def _pushvalue2(self, value):
        v = self.decode(value)
        if self.value > v:
            self.value = v
            return True
        else:
            return False

class AggregatorMaxLast(AggregatorFirstValueNum):
    def _pushvalue2(self, value):
        v = self.decode(value)
        if self.value <= v:
            self.value = v
            return True
        else:
            return False

class AggregatorMinLast(AggregatorFirstValueNum):
    def _pushvalue2(self, value):
        v = self.decode(value)
        if self.value >= v:
            self.value = v
            return True
        else:
            return False

class AggregatorCount(BaseAggregator):
    def reset(self):
        self.value = 0

    def pushvalue(self, value):
        self.value += 1

    def encode(self, value):
        return str(value)

class AggregatorEmptyCount(AggregatorCount):
    def pushvalue(self, value):
        if value == '':
            self.value += 1

class AggregatorNonEmptyCount(AggregatorCount):
    def pushvalue(self, value):
        if value != '':
            self.value += 1

class AggregatorCountDistinct(BaseAggregator):
    def reset(self):
        self.__values = set()
        
    def pushvalue(self, value):
        self.__values.add(value)

    @property
    def value(self):
        return len(self.__values)

    def encode(self, value):
        return str(value)

class AggregatorConcat(BaseAggregator):
    def __init__(self, sep=''):
        super(AggregatorConcat, self).__init__()
        self.__sep = sep

    def reset(self):
        self.__values = []
        
    def pushvalue(self, value):
        self.__values.append(value)

    @property
    def value(self):
        if len(self.__values) == 0:
            return None
        else:
            return self.__sep.join(self.__values)

class AggregatorTabJoin(AggregatorConcat):
    def __init__(self):
        AggregatorConcat.__init__(self, "\t")

class AggregatorAtAtJoin(BaseAggregator):
    def reset(self):
        self.__dict = {}
        
    def pushvalue(self, value):
        if(not value):
            return
        value = value.split("@@")
        for url in value:
            self.__dict[url] = True
            
    @property
    def value(self):
        uniq_urls = []
        for url in self.__dict.keys():
            uniq_urls.append(url)
        return "@@".join(uniq_urls)

    def encode(self, value):
        return str(value)

class AggregatorAtAtPosJoin(BaseAggregator):

    def reset(self):
        self.__values = []
        
    def pushvalue(self, value):
        if(not value):
            return
        value = value.split("@@")
        self.__values.append(value)
            
    @property
    def value(self):
        urls_list = itertools.izip_longest(*self.__values)
        goods = []
        for i in urls_list:
            goods.append("@@".join([el for el in i if el ]))
        return "@@".join(goods)

    def encode(self, value):
        return str(value)

class AggregatorConst(BaseAggregator):
    def __init__(self, value):
        self.value = value

    def reset(self):
        pass
        
    def pushvalue(self, value):
        pass
    
    def push(self, value):
        pass

class RecordAggregator(object):
    def __init__(self, fieldaggregator, fieldnum):
        self._aggr = fieldaggregator
        self._fidx = fieldnum - 1

    def reset(self):
        self._aggr.reset()

    def push(self, record):
        return self._aggr.pushvalue(record[self._fidx])

    def popvalue(self):
        return self._aggr.popvalue()

class AggregatorLinked(RecordAggregator):
    def __init__(self, fieldaggregator, fieldnum, linked_record_aggregator):
        super(AggregatorLinked, self).__init__(fieldaggregator, fieldnum)
        self._linked = linked_record_aggregator

    def reset(self):
        self._aggr.reset()
        self._linked.reset()
    
    def push(self, record):
        if self._linked.push(record):
            return self._aggr.pushvalue(record[self._fidx])
        else:
            return False

    def popvalue(self):
        res = self._aggr.popvalue()
        self.reset()
        return res

FIELDOPTS = {
    'key': AggregatorKey,
    'hiddenkey': AggregatorKey,
    'sum': AggregatorSum,
    'avg': AggregatorAvg,
    'max': AggregatorMax,
    'min': AggregatorMin,
    'max-last': AggregatorMaxLast,
    'min-last': AggregatorMinLast,
    'first': AggregatorFirst,
    'last': AggregatorLast,
    'last-nonempty': AggregatorLastNonEmpty,
    'count': AggregatorCount,
    'count-distinct': AggregatorCountDistinct,
    'count-empty': AggregatorEmptyCount,
    'count-nonempty': AggregatorNonEmptyCount,
    'concat': AggregatorConcat,
    'tabjoin': AggregatorTabJoin,
    'arg': AggregatorLast,

    '@@join': AggregatorAtAtJoin,
    '@@pos-join': AggregatorAtAtPosJoin,
    'wrapjoin': None,
    'spike': AggregatorSpike,


     'regional' : AggregatorRegions,
     'regional-max' : AggregatorRegionsMax,
     'weights' : AggregatorWeights
}

def get_record_aggregator(fnum, fopt, nested):
    aggr_class = FIELDOPTS.get(fopt, None)
    if (aggr_class is None):
        return AggregatorConst(fopt)
    aggr = aggr_class()
    if nested is None:
        return RecordAggregator(aggr, fnum)
    else:
        raggr = get_record_aggregator(*nested)
        return AggregatorLinked(aggr, fnum, raggr)

def parse_fieldopt(fieldopt):
    argparts = fieldopt.split('=', 1)
    if len(argparts) != 2:
        return None
    fnum, fopt = argparts
    try:
        fnum = int(fnum)
    except:
        return None
    
    if fnum <= 0:
        raise ValueError('Invalid field number "%s"' % fieldopt)

    optparts = fopt.split('(', 1)
    if len(optparts) != 2:
        nested_fieldopt = None
    else:
        fopt, nested_fieldopt = optparts
        if not nested_fieldopt.endswith(')'):
            raise ValueError('Invalid option syntax "%s"' % fieldopt)
        nested_fieldopt = nested_fieldopt[:-1].strip()
        if nested_fieldopt == '':
            nested_fieldopt = None            


    if (fopt.startswith('"') and fopt.endswith('"')) or \
        fopt.startswith("'") and fopt.endswith("'"):
        fopt = fopt[1:-1]
        nested_fieldopt = None
    elif not fopt in FIELDOPTS.keys():
        nested_fieldopt = None

    if nested_fieldopt is not None:
        nested = parse_fieldopt(nested_fieldopt)
        if nested is None:
            raise ValueError('Invalid nested option "%s"' % nested_fieldopt)        
        return fnum, fopt, nested
    else:
        return fnum, fopt, None
    
def parse_args():
    if "-t" in sys.argv:
        tabchar = sys.argv[sys.argv.index("-t") + 1]
    else:
        tabchar = '\t'

    fields = []
    L = len(sys.argv)
    i = 0
    while i < L-1:
        i += 1
        arg = sys.argv[i]
        if not arg[0].isdigit(): continue

        f = parse_fieldopt(arg)
        if f is not None:
            fields.append(f)
    return tabchar, fields

def process_sorted_lines(in_stream, fields, tabchar):
    keys = sorted(set([f[0] for f in fields if f[1] in ('key', 'hiddenkey')]))

    def records_iterator(in_stream):
        for line in in_stream:
            yield line.rstrip("\n").split(tabchar)

    key_func = lambda record: [record[k-1] for k in keys]
    aggregators = [get_record_aggregator(*f) for f in fields]
    printed_aggregators = [aggregators[i] for i in range(len(fields)) if fields[i][1] != 'hiddenkey']
                           
    for keyfields, group in itertools.groupby(records_iterator(in_stream), key_func):
        for record in group:            
            for aggr in aggregators:
                aggr.push(record)
        print tabchar.join(aggr.popvalue() for aggr in printed_aggregators)

def main():    
    tabchar, fields = parse_args()
    keys = sorted(set([f[0] for f in fields if f[1] == 'key']))
    process_sorted_lines(sys.stdin, fields, tabchar)


if __name__ == '__main__':
    main()
