#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import traceback
import re
import cjson
import urllib
import urlparse

from collections import namedtuple

webdav_re = re.compile(
    r'(?P<datetime>[\d\-]*\s\d\d\:\d\d\:\d\d\.\d\d\d)\d\d\d'
    r'(?P<params>[\w:]*.*:\s)'
    r'(?P<server>[\w\.]*)\s'
    r'(?P<ip>[\d\.]*)\s'
    r'(?P<login>[\S]*)\s'
    r'(?P<query>\".*?\")\s\"\"\s\"\"\s'
    r'(?P<client>\".*\")'
    r'(?P<code>\d*)'
)

mpfs_re = re.compile(
    r'(?P<datetime>[\d\-]*\s[\d\.:,]*)\s'
    r'(?P<params>\[\d*\]\s\d*_\d*\s[\w_]*\s)[\w]*\s\/\w*\/'
    r'(?P<query>.*?\s).*'
)
src_re = re.compile(r'.*src=(?P<src>.*?)&')
dst_re = re.compile(r'.*dst=(?P<dst>.*?)&')
path_re = re.compile(r'.*path=(?P<path>.*?)&')

parser = {
    're':{
        'webdav': webdav_re,
        'mpfs': mpfs_re,
    },
    'cnt':{
        'webdav': namedtuple('Access', ['datetime', 'params', 'server', 'ip', 'login', 'query', 'client', 'code']),
        'mpfs': namedtuple('Access', ['datetime', 'params', 'query']),
    },
}

webdav_actions_map = {
    'MKCOL': 'create_dir',
    'PUT': 'upload',
    'DELETE': 'delete',
    'PROPPATCH': 'proppatch',
    'MOVE': 'move',
}

mpfs_actions_map = {
    'store': 'upload',
    'trash_append': 'delete',
    'move': 'move',
    'copy': 'copy',
    'copy_default': 'copy_default',
    'trash_drop': 'trash_drop',
    'trash_restore': 'trash_restore',
    'dstore': 'update',
    'mkdir': 'create_dir',
}


def process_client(rawclient):
    if not 'Yandex.Disk' in rawclient:
        return rawclient
    else:
        a = rawclient[rawclient.index('{'):-1].decode('string-escape')
        client_params = cjson.decode(a)
        return "YD_OS:%s_V:%s" % (client_params.get('os').replace(' ', '_'), client_params.get('vsn'))


def process_webdav_query(rawquery):
    rawquery = rawquery[1:-1]
    action, resource = rawquery.split(' ', 1)
    action = webdav_actions_map.get(action)

    resource = urllib.unquote(resource)
    return (action, resource)


def process_mpfs_query(rawquery):
    rawquery = urllib.unquote_plus(rawquery)
    query, params = rawquery.split('?', 1)
    params = urlparse.parse_qs(params)

    action = query
    if 'path' in params:
        resource = params.get('path')
    elif 'src' in params and 'dst' in params:
        resource = "%s %s" % (params.get('src')[0], params.get('dst')[0])
    else:
        resource = str(params)
    action = action.replace('/', '')
    action = action.replace('async_', '')
    action = mpfs_actions_map.get(action)

    return (action, resource)


def access_iter(source_iter, log_type):
    for line in source_iter:
        match = parser['re'][log_type].match(line)
        if match:
            yield parser['cnt'][log_type](**match.groupdict())


def work(logfiles, path, req_action):
    webdav_file = logfiles.get('webdav')
    log_type = 'webdav'
    for item in access_iter(webdav_file, log_type):
        try:
            action, resource = process_webdav_query(item.query)
            if not action:
                continue
            if not path in resource:
                continue
            if req_action and (action != req_action):
                continue
            print("%s %s %s %s %s" % (
                item.datetime,
                '%s %s' % (action, resource),
                item.code,
                process_client(item.client),
                item.ip,
            ))
        except Exception:
            print(traceback.format_exc())
    mpfs_file = logfiles.get('mpfs')
    log_type = 'mpfs'
    for item in access_iter(mpfs_file, log_type):
        try:
            if not 'path=' in item.query and not 'src=' in item.query:
                continue
            action, resource = process_mpfs_query(item.query)
            if not action:
                continue
            if not path in resource:
                continue
            if req_action and (action != req_action):
                continue
            print("%s %s YD_WEB" % (
                item.datetime,
                '%s %s' % (action, resource),
            ))
        except Exception:
            print(traceback.format_exc())

