# coding: utf-8
import cgi, datetime, types, operator, heapq, random, re, socket, struct, urlparse, urllib
__all__ = ["SessionRecord", "ReqansRecord", "RedirRecord", "SpyRecord", "OnlineSampler", "SessionChecks", "DatesRange"]

class LogRecord(object):
    __slots__ = ["type", "__dict__"]
    def setAttribute(self, attr, val):
        _val = getattr(self, attr, None)
        if _val is None:
            setattr(self, attr, val)
        elif isinstance(_val, types.StringTypes):
            setattr(self, attr, [_val, val])
        else:
            _val.append(val)

    def parseRelevFactor(self, g_attr, s_attr):
        s_value = getattr(self, g_attr, "")
        if isinstance(s_value, (str, unicode)):
            setattr(self, s_attr, dict(i.split("=", 1) for i in s_value.split(";") if "=" in i))
        else:
            setattr(self, s_attr, {})

    def parseSearchProps(self, g_attr, s_attr):
        props = {}
        if hasattr(self, g_attr):
            for part in getattr(self, g_attr).split(";"):
                if ":" not in part:                   continue
                desc, rest = part.split(":", 1)
                if not rest:                          continue
                for prm in rest.split(","):
                    if "=" not in prm:                continue
                    prmnam, prmval = prm.split("=", 1)
                    props["{0}.{1}".format(desc, prmnam)] = prmval
        setattr(self, s_attr, props)

    def __getattr__(self, attr):
        if attr.endswith("_list"):
            val = getattr(self, attr[:-5], None)
            if val is None: return []
            if isinstance(val, types.StringTypes): return [val]
            return val
        raise AttributeError("object {0!r} has no attributte {1!r}".format(type(self).__name__, attr))

    def __repr__(self):
        attrs = set(self.__slots__).union(["type"]).union(self.__dict__)
        attrDesc = ",".join("{0}={1!r}".format(k, getattr(self, k)) for k in attrs if hasattr(self, k))
        return "{0}({1})".format(type(self).__name__, attrDesc)


class ReqansRecord(LogRecord):
    __slots__ = ["reqrelev", "rearr", "search_props"]
    def __init__(self, desc):
        for item in desc.split("@@"):
            if "=" in item:
                attr, val = item.split("=", 1)
                self.setAttribute(attr, val)
        self.parseRelevFactor("reqrelev", "reqrelev")
        self.parseRelevFactor("rearr", "rearr")
        self.parseSearchProps("search_props", "search_props")


class RedirRecord(LogRecord):
    __slots__ = ["timestamp", "user_ip", "user_uid", "service_host"]

    def parseService(self):
        referer = getattr(self, "HTTP_REFERER", "")
        if referer:
            self.service_host = urlparse.urlparse(referer).netloc

    def __init__(self, desc):
        required = []
        for item in desc.split("@@"):
            if "=" in item:
                attr, val = item.split("=", 1)
                self.setAttribute(attr, val)
            else:
                required.append(item)
        self.timestamp, self.user_ip, self.user_uid = required[-3:]


class SessionRecord(LogRecord):
    __slots__ = ["reqrelev", "rearr", "search_props"]
    def __init__(self, desc):
        for item in desc.split("\t"):
            if "=" in item:
                attr, val = item.split("=", 1)
                self.setAttribute(attr.replace("-", "_"), val)
        self.parseRelevFactor("reqrelev", "reqrelev")
        self.parseRelevFactor("rearr", "rearr")
        self.parseSearchProps("search_props", "search_props")


class SpyRecord(LogRecord):
    __slots__ = ["timestamp", "ip", "uid", "fuid", "source", "dest", "title", "is_good_jump"]
    __amp_replace = {"amp": u"&", "quote": u"\"", "lt": u"<", "gt": u">", "copy": u"©", "apos": u"'", "nbsp": u" ",
        "laquo": u"«", "raquo": u"»", "reg": u"®", "trade": u"™", "ndash": u"–", "mdash": u"—"}
    __amp_re = re.compile(u"&(.{,6}?);")

    def __init__(self, desc):
        items = desc.split(' ')
        if len(items) < 5:
            raise AttributeError("incorrect SpyRecord format")
        self.type = "SPY"
        self.timestamp = items[0]
        self.ip = items[1]
        try:
            #try to recognize old (ui32) format for ip
            self.ip = ".".join(map(str, struct.unpack('4B', struct.pack('>I', int(items[1])))))
        except ValueError:
            #in this case ip field already need to has correct format
            if not self.CheckIPFormat(self.ip):
                raise AttributeError("incorrect value for ip: {0}".format(self.ip))
        self.uid = items[2]
        attrs = cgi.parse_qs(items[3])
        for k, vList in attrs.iteritems():
            for v in vList:
                self.setAttribute(k, v)
        self.source, self.dest, self.title = self.parseLink(attrs)
        self.is_good_jump = self.checkAttrs(attrs)
        if items[4] == '-':
            self.fuid = None
        else:
            self.fuid = items[4].split('&', 1)[0]

    @classmethod
    def CheckIPFormat(cls, ipaddr):
        try:
            socket.inet_pton(socket.AF_INET, ipaddr)
            return 4
        except:
            pass
        try:
            socket.inet_pton(socket.AF_INET6, ipaddr)
            return 6
        except:
            pass
        return None

    @classmethod
    def parseLink(cls, attr):
        src, dst, title = None, None, None
        if "url" in attr:
            dst = attr.pop("url")[0]
        if "referer" in attr:
            src = attr.pop("referer")[0]
        elif "referrer" in attr:
            src = attr.pop("referrer")[0]
        elif "oldurl" in attr:
            src = attr.pop("oldurl")[0]
        if "title" in attr:
            title = cls.unescapeTitle(attr.pop("title")[0])
        return src, dst, title

    @classmethod
    def checkAttrs(cls, attr):
        return attr.get("httpstatus") == ['200'] and attr.get("post") == ['0']

    @classmethod
    def amp_replace(cls, sRes):
        try:
            ptrn = sRes.group(1).lower()
            repl = cls.__amp_replace.get(ptrn)
            if repl is not None:
                return repl
            code = int(ptrn[2:], 16) if ptrn.startswith(u"#x") \
                else int(ptrn[1:]) if ptrn.startswith(u"#") \
                else None
            if code is not None:
                if 128 <= code < 160:
                    return chr(code).decode("cp1252")
                return unichr(code)
        except:
            pass
        return ""

    @classmethod
    def unescapeTitle(cls, value):
        value = urllib.unquote_plus(value)
        text = None
        for cp in ("utf-8", "cp1251",):
            try:
                text = unicode(value, cp)
                break
            except UnicodeDecodeError:
                pass
        return cls.__amp_re.sub(cls.amp_replace, text, re.U).encode("utf-8") if text else None


class SessionChecks:
    @classmethod
    def IsLatinicRequest(cls, item):
        return item.type == "REQUEST" and item.service == "www.yandex" \
            and item.rearr.get("qlang", "") \
            and (item.rearr.get("qlangruspure", "") != "1" and item.rearr.get("qlangskipforeign", "") != "1")

    @classmethod
    def IsRegularQuery(cls, item):
        return item.type == "REQUEST" and item.service == "www.yandex"


class OnlineSampler(object):
    def __init__(self, count):
        self.samples = [(0, None) for _ in xrange(count)]
    def add(self, item, weight):
        cnt, o = heapq.heappop(self.samples)
        cnt += weight
        if random.random() * cnt < weight:
            o = item
        heapq.heappush(self.samples, (cnt, o))
    def save(self, filename):
        with open(filename, "w") as stream_printer:
            for _, item in self.samples:
                if item:
                    print >>stream_printer, item
    def list_samples(self):
        return map(operator.itemgetter(1), self.samples)


class DatesRange:
    def __init__(self, dtFromStr, dtToStr, dateFormat="%Y%m%d"):
        self.dateFrom = datetime.datetime.strptime(dtFromStr, dateFormat)
        self.dateTo = datetime.datetime.strptime(dtToStr, dateFormat)
        self.dateFormat = dateFormat
    def __iter__(self):
        dt = self.dateFrom
        while dt <= self.dateTo:
            yield dt.strftime(self.dateFormat)
            dt += datetime.timedelta(1)
