#!/usr/bin/env python

from sets import Set
from decimal import Decimal
import urllib
import fileinput
import re
import sys
import getopt

# XXX should use docviewer's MimeTypes or something like that

document_mime_types = Set(["application/vnd.oasis.opendocument.text", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "text/html", "text/plain", "application/pdf", "application/vnd.ms-excel", "application/msword"])

archive_mime_types = Set(["application/zip", "application/x-rar", "application/x-gzip", "application/x-zip-compressed"])

def simple_type(mime_type):
    parts = mime_type.split("/")
    if parts[0] in Set(["video", "audio", "image"]):
        return parts[0]
    elif mime_type in document_mime_types:
        return "document"
    elif mime_type in archive_mime_types:
        return "archive"
    else:
        return "unknown"

"""
2013-09-09 00:01:56,723 INFO  qtp717879615-10537810#rid=tXErl3wW,20130908T200156.722.rp.6v90rvp5fqt3ylsb61jvfxmhl-k8h.k8h-823015/r.y.co.uploader.registry.UploadRegistry: New record: RequestMeta[id=20130908T200156.722.rp.6v90rvp5fqt3ylsb61jvfxmhl-k8h.k8h-823015, created=2013-09-08T20:01:56.722Z] MpfsRequest.RegeneratePreview[originalFile=7828.yadisk:111970282.209408075756228473221935674127, mimeType=application/vnd.ms-excel, size=21504, apiVersion=ApiVersion[string=0.2], chemodanFile=ChemodanFile[uidOrSpecial=UidOrSpecial.Uid[passportUid=1], uniqueFileId=unused, path=unused], callbackUri=None, maxFileSize=None]

2013-09-09 00:01:51,390 INFO  qtp717879615-10537793#rid=J287c6HT/r.y.co.uploader.registry.UploadRegistry: New record: RequestMeta[id=20130908T200151.389.utd.54c89yle5bqf60d4h2msi1z16-k8h.k8h-823005, created=2013-09-08T20:01:51.389Z] MpfsRequest.UploadToDefault[apiVersion=ApiVersion[string=0.2], chemodanFile=ChemodanFile[uidOrSpecial=UidOrSpecial.Uid[passportUid=29087675], uniqueFileId=8a2c807a94095484acf906553686000fe5d8123b4850c26c8dde7d4e7cba43ce, path=29087675:/disk/-----2013/DSCN2897.JPG], callbackUri=Some(http://mpfs.disk.yandex.net:80/service/kladun_callback?uid=29087675&oid=a4d0a5663009b25ba92f6a87c7d21ad316e16a0273b583206d389d74be8f8271), maxFileSize=Some(34554473609)]

2013-09-09 00:01:17,179 INFO  qtp1326946791-10655751#rid=raqSMPv5: "PUT /upload-target/20130908T200110.345.utd.9ga2lhfbzx3mgvq1gcrvuo80f-k15h.k15h-822107" HTTP/1.1 "Yandex.Disk {"os":"windows","vsn":"1.0.1.3812","id":"32997D7432A74B749F7C837150CEFAE4"}" 201 79.172.46.178 2215870 0 6.606

2013-09-09 00:01:52,874 INFO  qtp25745710-10192182#rid=jEtnr0by/r.y.mi.web.servletContainer.jetty.access: "PUT /upload-target/20130908T200147.890.utd.9k8tpkutynukuh0zs3cn1651q-k8h.k8h-822999" HTTP/1.1 "Yandex.Disk {"os":"windows","vsn":"1.0.1.3812","id":"0C6366F322004CFE82A3431B255E20F9"}" 201 109.69.77.175 2605615 0 4.921

2013-09-09 00:01:56,923 INFO  qtp25745710-10692468#rid=K3Hdmw9R,20130908T200156.336.utd.egj46a304hi9mffru4bzqz8xp-k8h.k8h-823014/r.y.ch.up.log.Events: 20130908T200156.336.utd.egj46a304hi9mffru4bzqz8xp-k8h.k8h-823014 type=UploadToDefault, stage=userFile, duration=0.202, file_size=1458, success=true
"""

#new_pattern = re.compile('.*New record: RequestMeta\[id=([^,]*).*Uid\[passportUid=([0-9]*).*')
#put_pattern = re.compile('.*PUT /upload-target/([^"]*)" HTTP/1.1 "([^ "]*).*')
#evt_pattern = re.compile('.*r.y.ch.up.log.Events: ([^ ]*) type=([^ ,]*), stage=([^ ,]*), duration=([^ ,]*)(, file_size=([0-9]*))?.*success=([a-z]*)')

new_pattern = re.compile('[^ ]* [^ ]* INFO  [^ ]* New record: RequestMeta\[id=([^,]*), created=[^\]]*\] MpfsRequest.UploadToDefault[^U]*UidOrSpecial.Uid\[passportUid=([0-9]*)')
put_pattern = re.compile('[^ ]* [^ ]* INFO  [^ ]* "PUT /upload-target/([^"]*)" HTTP/1.1 "(-|Yandex.Disk {"os":"([^"]*)".*)" ')
evt_pattern = re.compile('[^ ]* [^ ]* INFO  [^/]*/r.y.ch.up.log.Events: ([^ ]*) ((, )?(type=([^,]*)|stage=([^,]*)|duration=([^,]*)|file_size=([0-9]*)|success=([a-z-]*)|mime-type=([^,]*)|[a-z_-]*=[^,]*))*')

id_pattern = re.compile("^([0-9]*)T([0-9]*).([0-9]*).*$")

"""
2013-09-09 00:25:13.022946 yadrop_web:360:notice [{<0.3478.439>,11}]->[access]: webdav.yandex.ru 95.31.4.107 ZloyStacevich;uid=99244161 "PUT /disk/%D0%9C%D1%83%D0%B7%D1%8B%D0%BA%D0%B0/06-Snow%20Storm%20-%206.%20Military%20march.mp3" "" "" "Yandex.Disk {\"os\":\"android 4.1.2\",\"src\":\"disk.mobile\",\"vsn\":\"1.23-1145\",\"id\":\"9518465e78359e4ba0e6e28c5649098a\"}" 201 3.830964 216 3578528
"""
webdav_pattern = re.compile('[^ ]* [^ ]* [^ ]* [^ ]* webdav.yandex.ru [^ ]* [^;]*uid=([0-9]*) "PUT ([^"]*)" "[^"]*" "[^"]*" "Yandex.Disk {\\"os\\":\\"([a-z]*)[^ ]* 201')

def parse_webdav_log(filename):
    filepath_to_client = {}
    for l in fileinput(filename):
        m = webdav_pattern.match(l)
        if m:
            print m.groups()
            filepath_to_client[m.group(1) + ":" + m.group(2)] = m.group(3)

view = "flat"

opts, args = getopt.getopt(sys.argv[1:], "", ["view=", "wevdavlog="])
for o, a in opts:
    if o == "--view":
        if a in Set(["flat", "compact", "compact-norm", "size"]):
            view = a
        else:
            print >>sys.stderr("uknown view option")
            sys.exit(1)
    elif o == "--webdavlog":
        filepath_to_client = parse_webdav_log(a)
    else:
        print >>sys.stderr, "unknown option", o, a
        sys.exit(1)

def to_short_id(id):
    m = id_pattern.match(id)
    time = m.group(1) + m.group(2) + m.group(3) #+ m.group(4)
    return time[6:]

class Stats:
    def __init__(self):
        self.stage_durations = {}
        self.upload_speed = 0
        self.file_size = 0
        self.mime_type = None

id_to_uid = {}
id_to_client = {}
id_stats = {}

for l in fileinput.input('-'):
    m = evt_pattern.match(l)
    if m:
        #print "EVENT:", m.groups()
        #print "##", l.strip()
        id = m.group(1)
        type = m.group(5)
        stage = m.group(6)
        duration = Decimal(m.group(7))
        #print ">", m.group(5), m.group(6), m.group(7)
        file_size = m.group(8)
        success = m.group(9)
        mime_type = m.group(10)
        if not id in id_stats:
            id_stats[id] = Stats()
        if type == "UploadToDefault" and success == "true":
            id_stats[id].stage_durations[stage] = float(duration)
            if stage == "userFile" and file_size is not None and duration > 0:
                id_stats[id].upload_speed = float(file_size) / float(duration)
                id_stats[id].file_size = float(file_size)
        if mime_type is not None and id_stats.has_key(id):
            id_stats[id].mime_type = mime_type
        continue

    m = new_pattern.match(l)
    if m:
        #print "NEW", m.groups()
        id_to_uid[m.group(1)] = int(m.group(2))
        continue

    m = put_pattern.match(l)
    if m:
        #print "PUT", m.groups()
        client = None
        id_to_client[m.group(1)] = ("desktop/" + m.group(3)) if m.group(3) is not None else "webdav"
        continue

stages_list = [
    "userFile",
    "payloadInfo",
	"pp.commitFileInfo",
	"pp.fileMulcaUploadInfo",
	"pp.digestMulcaUploadInfo",
	"pp.commitFileUpload",
	"pp.previewDocumentStatus",
	"pp.previewImageStatus",
	"pp.previewVideoStatus",
	"pp.exifInfo",
	"pp.mediaInfo",
	"pp.videoInfo",
	"pp.antivirusResult2",
    "commitFinal",
]
stages_set = Set(stages_list)
size_dependent_stages = Set(["userFile", "pp.fileMulcaUploadInfo", "pp.digestMulcaUploadInfo", "pp.antivirusResult2"])

if view == "flat":
    print "id stage duration"
elif view == "compact":
    print "id", "client", " ".join(stages_list)
elif view == "compact-norm":
    print "id", "client", " ".join((x if x not in size_dependent_stages else (x + "*")) for x in stages_list)
elif view == "size":
    print "id size duration speed client uid mime_type simple_type"
else:
    print >>sys.stderr, "unknown view"
    sys.exit(1)

#print "id_stats.keys =\n", "\n".join(sorted(id_stats.keys()))
#print "id_to_client.keys =\n", "\n".join(sorted(id_to_client.keys()))
#print "id_to_uid.keys =\n", "\n".join(sorted(id_to_uid.keys()))

id_n = 0
for id in sorted(id_stats.keys()):
    if id not in id_to_client or id not in id_to_uid:
        #print "bummer"
        continue

    id_n += 1

    stats = id_stats[id]

    #display_id = id_n
    display_id = to_short_id(id)

    if view == "flat":
        for s in stages_list:
            print display_id, s, stats.stage_durations.get(s, 0)
    elif view == "compact":
        print display_id, id_to_client[id], " ".join(str(stats.stage_durations.get(s, 0)) for s in stages_list)
    elif view == "compact-norm":
        if stats.file_size > 0:
            norm_durations = []
            for s in stages_list:
                duration = stats.stage_durations.get(s, 0.0)
                if s in size_dependent_stages:
                    norm_durations.append(duration * 1048576 / stats.file_size)
                else:
                    norm_durations.append(duration)
            print display_id, id_to_client[id], " ".join(str(d) for d in norm_durations)
    elif view == "size":
        if stats.file_size > 0:
            duration = stats.stage_durations.get("userFile", 0)
            print display_id, stats.file_size, duration, duration / stats.file_size, id_to_client[id], id_to_uid[id], urllib.quote(stats.mime_type), simple_type(stats.mime_type)
    else:
        print >>sys.stderr, "unknown view"
        sys.exit(1)

