# -*- coding: utf-8 -*-

import urllib

from sandbox.projects.common.base_search_quality.tree.node import Node as BaseNode


class Node(BaseNode):
    """
        some similar std::multimap (nodes & props can have few entry/values per one key)
    """

    def __init__(self, sort_nodes=True, sort_props=True):
        super(Node, self).__init__()
#        parent contain:
#        self._nodes = {} # hash<key, list<nodes>>
#        self._props = {} # hash<key, list<props>>
        self._sort_nodes = sort_nodes
        self._sort_props = sort_props

    def add_node(self, k, n):
        if k in self._nodes:
            self._nodes[k].append(n)
        else:
            self._nodes[k] = [n, ]

    def add_prop(self, k, v=None):
        if v is None:
            v = ''
        if k in self._props:
            self._props[k].append(v)
        else:
            self._props[k] = [v, ]

    def iterlistnodes(self):
        """
            iterate by list nodes with the same key
        """
        if self._sort_nodes:
            for nk, nlist in sorted(self._nodes.iteritems(), key=lambda k: k[0]):
                yield nk, nlist
        else:
            for nk, nlist in self._nodes.iteritems():
                yield nk, nlist

    def iternodes(self):
        for nk, nlist in self.iterlistnodes():
            for nv in nlist:
                yield nk, nv

    def iterlistprops(self):
        """
            iterate by list properties with the same key
        """
        if self._sort_props:
            for k, lv in sorted(self._props.iteritems(), key=lambda k: k[0]):
                yield k, lv
        else:
            for k, lv in self._props.iteritems():
                yield k, lv

    def iterprops(self):
        for k, lv in self.iterlistprops():
            for v in lv:
                yield k, v


class Request:
    def __init__(self, reqid, node):
        self.reqid = reqid
        self.node = node


class Requests:
    def __init__(self, fn):
        self.reqs = []  # list<Request>
        self.idx = {}  # hash<reqid, Request>
        self._parse_eventlog_txt(fn)
        self._patch()

    def _parse_eventlog_txt(self, fn):
        """
            parse subreqs to Node (one source to one subnode, etc)
            return hash Nodes indexes by reqid
        """
        with open(fn) as f:
            cur_frame = None
            srcs = {}
            subreq = Node()  # subsource-id -> cgi
            reqid = None
            for line in f:
                line = line.rstrip('\n')
                tk = line.split('\t')
                frame = int(tk[1])
                event = tk[2]
                if frame != cur_frame:
                    if cur_frame:
                        if reqid:
                            req = Request(reqid, subreq)
                            if reqid not in self.idx:
                                self.idx[reqid] = req
                                self.reqs.append(req)
                        subreq = Node()
                    cur_frame = frame
                if event == 'SubSourceInit':
                    src_id = '{}.{}'.format(tk[6], tk[3])
                    srcs[int(tk[3])] = src_id
                if event == 'SubSourceRequest':
                    src_num = int(tk[3])
                    src_id = srcs.get(src_num, None)
                    if src_id is not None:
                        if src_id.startswith('SERP_OBJECT.'):
                            continue  # requests to SERP_OBJECT too unstable
                        node_cgi = parse_uri(tk[7])
                        if src_id not in subreq._nodes:
                            subreq._nodes[src_id] = [node_cgi, ]
                        list_reqid = node_cgi._props.get('reqid', None)
                        if list_reqid:
                            reqid = list_reqid[0]

    def _patch(self):
        """
            remove/replace unstable params
        """
        for req in self.reqs:
            # iterate sources
            for src_id, node_src in req.node.iternodes():
                node_src._props.pop('ruid', None)


def parse_uri(uri):
    """
        return Node (_node, _props contain cgi params tree)
    """
    tk = uri.split('?', 1)[1].split('&')
    cgi_params = Node()
    for param in tk:
        if not param:
            continue
        kv = param.split('=', 1)
        if len(kv) == 2:
            k, v = kv
            if k == 'req_path':
                continue

            if k == 'relev' or k == 'rearr':
                cgi_params.add_node(k, parse_props(urllib.unquote(v)))
            else:
                cgi_params.add_prop(k, urllib.unquote(v))
        else:
            cgi_params.add_prop(kv[0])

    gta = cgi_params._props.get('gta')
    if gta:
        gta.sort()
    return cgi_params


def parse_props(s, node=None, prop_sep=';', kv_sep='='):
    """
        split <s> from 'k=v;...' to _props[k]=v
        return Node
    """
    if node is None:
        node = Node()
    tk = s.split(prop_sep)
    for param in tk:
        if not param:
            continue
        kv = param.split(kv_sep, 1)
        if len(kv) == 2:
            node.add_prop(kv[0], kv[1])
        else:
            node.add_prop(kv[0])
    return node
