import nile
from nile.api.v1 import (
	clusters,
	filters as nf,
	extractors as ne,
	aggregators as na,
	statface as ns,
	Record
)
from qb2.api.v1 import filters as sf
import datetime
import os
import argparse


def error_type(error):
	if ("Action is from yandex servers network" in error):
		return "Non-critical"
	else:
		return "Critical"

def push_to_stat_new(report_table, scale, report, username, stat_token):
    client = ns.StatfaceClient(proxy='upload.stat.yandex-team.ru',
                               username=username,
                               token=stat_token)

    ns.StatfaceReport().path(report) \
                       .scale(scale) \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path=report_table,
                                       async_mode=False,
                                       upload_config=False)

errors = ["ParsedBadFormat.", "Wrong uid", "Invalid uid", "Invalid ip", "bad position",
         "Bad format for field bebr-client-ts",
         "Bad format for field report-item-topic",
         "Bad format for field duration",
         "Bad format for field error_code",
         "Bad format for field watchedTime",
         "Bad format for field hdtime",
         "Bad format for field playing-duration",
         "Bad format for field serial",
         "Invalid page number",
         "bad web",
         "Bad format for field timestamp",
         "Bad format for field time",
         "Bad format for field user_cards_count",
         "Invalid mouse move entropy",
         "Bad format for field client-timestamp",
         "Bad format for field code",
         "Bad format for field exp_config_version",
         "Bad format for field ticks",
         "Bad html_url",
         "Host with invalid Yandex domain suffix",
         "URL is too short",
         "ParsedRootless",
         "Host w/o TLD.",
         "Bad format for field since_first_change",
         "Invalid times field", "ParsedBadHost",
         "Invalid ratio field",
         "Url can not be unescaped", "URL scheme invalid",
         "Bad src url", "Bad url", "util/draft/enum.h:64",
         "Bad format for field since_first_change",
         "Bad format for field cts",
         "Bad format for field with",
         "Bad format for field since_last_change",
         "Bad format for field total_input_time", "Bad format for field rnd",
         "Bad format for field resnum", "Bad format for field timefs", "Bad format for field test-buckets",
         "Bad format for field user-region", "Unexpected symbol"]

def get_rows(tab, serv):
    rows = []

    for rec in tab.read():
        subkey = rec["subkey"]
        for err in errors:
            if (err in rec["subkey"]):
                if (err == "Bad format for field with"):
                    pos = rec["subkey"].find("dtype")
                    subkey = rec["subkey"][:pos-1]
                    if ('":' in subkey):
                        pos = subkey.find('":')
                        subkey = subkey[:pos+1]
                elif (err == "Unexpected symbol"):
                    pos = rec["subkey"].find(err)
                    pos += len(err)
                    subkey = rec["subkey"][:pos]
                    left = rec["subkey"].find("at pos")
                    right = rec["subkey"].rfind("in string") + len("in string")
                    subkey += " "  + rec["subkey"][left:right]
                else:
                    pos = rec["subkey"].find(err)
                    pos += len(err)
                    subkey = rec["subkey"][:pos]

                    if (err == "util/draft/enum.h:64"):
                        subkey += ': not found in enum.'
                break

        left = subkey.find(')') + 1
        right = left + subkey[left:].find(':') + 1
        right += subkey[right:].find(':') + 1
        subkey = subkey[:left] + subkey[right:]

        for key_var in [rec["key"], "_total_"]:
            for subkey_var in [subkey, "_total_"]:
                for service_var in [serv, "_total_"]:
                    for error_type_var in [error_type(subkey), "_total_"]:
                        rows.append(Record(key=key_var, service=service_var, error_type=error_type_var,
                                           count_errors=rec["count_errors"], subkey=subkey_var))
    return rows

def build_video_table(date):
    cluster = clusters.yt.Hahn()
    job = cluster.job()
    log = job.table("//user_sessions/pub/search/daily/" + date + "/errors")
    log = log.filter(
    sf.or_(sf.and_(sf.or_(sf.equals('key', 'videoreqans_log'),
                   sf.equals('key', 'access_log'),
                   sf.equals('key', 'blockstat_log'),
                   sf.equals('key', 'redir_log')),
                   sf.or_(sf.equals('key', 'videoreqans_log'), sf.contains('value', 'request=/video'),
                          sf.contains('value', 'yandex.ru/video'), sf.contains('value', 'yandex.kz/video'),
                          sf.contains('value', 'yandex.ua/video'), sf.contains('value', 'yandex.by/video'))
                  ),
            sf.and_(sf.equals('key', 'redir_log'),
                   sf.or_(sf.contains('value', 'path=player-events'),
                          sf.contains('value', 'path=heartbeat'),
                          sf.contains('value', 'duration')
                   )
            )
    ))

    table = log.groupby('key', 'subkey').aggregate(count_errors=na.count())
    table.put("//tmp/white2302/video_table_daily_" + date)
    job.run()
    tab = cluster.job().table("//tmp/white2302/video_table_daily_" + date)

    rows = get_rows(tab, "video")

    cluster.job().table("//tmp/white2302/video_info_daily_" + date).write(rows)
    job = cluster.job()
    log = job.table("//tmp/white2302/video_info_daily_" + date)
    table = log.groupby('key', 'subkey', 'service', 'error_type') \
                .aggregate(count_errors=na.sum("count_errors")) \
                .project(log='key', error='subkey', fielddate=ne.const(date), count_errors='count_errors',
                        service='service', error_type='error_type')
    table.put("//tmp/white2302/video_errors_daily_" + date)
    job.run()

def build_web_table(date):
    cluster = clusters.yt.Hahn()
    job = cluster.job()
    log = job.table("//user_sessions/pub/search/daily/" + date + "/errors")
    log = log.filter(
        sf.and_(sf.or_(sf.equals('key', 'reqans_log'),
                       sf.equals('key', 'blockstat_log'),
                       sf.equals('key', 'redir_log')),
                sf.or_(sf.contains('value', 'request=/search'), sf.contains('value', 'yandex.kz/search'),
                       sf.contains('value', 'yandex.ru/search'), sf.contains('value', 'yandex.ua/search'),
                       sf.contains('value', 'yandex.by/search'))))

    table = log.groupby('key', 'subkey').aggregate(count_errors=na.count())
    table.put("//tmp/white2302/web_table_daily_" + date)
    job.run()

    tab = cluster.job().table("//tmp/white2302/web_table_daily_" + date)

    rows = get_rows(tab, "web")

    cluster.job().table("//tmp/white2302/web_info_daily_" + date).write(rows)
    job = cluster.job()
    log = job.table("//tmp/white2302/web_info_daily_" + date)
    table = log.groupby('key', 'subkey', 'service', 'error_type') \
                .aggregate(count_errors=na.sum("count_errors")) \
                .project(log='key', error='subkey', fielddate=ne.const(date), count_errors='count_errors',
                        service='service', error_type='error_type')
    table.put("//tmp/white2302/web_errors_daily_" + date)
    job.run()

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--stat_username', type=str, required=True)
    parser.add_argument('--stat_token', type=str, required=True)
    args = parser.parse_args()

    cluster = clusters.yt.Hahn()
    while not cluster.driver.exists("//user_sessions/pub/search/daily/" + args.date + "/errors"):
        print ("No tables")
        time.sleep(100)
    build_web_table(args.date)
    build_video_table(args.date)
    cluster = clusters.yt.Hahn()
    job = cluster.job()
    tab_web = job.table("//tmp/white2302/web_errors_daily_" + args.date)
    tab_video = job.table("//tmp/white2302/video_errors_daily_" + args.date)
    tab = job.concat(tab_web, tab_video)
    tab = tab.groupby('log', 'error', 'service', 'error_type', 'fielddate') \
             .aggregate(count_errors=na.sum("count_errors"))
    tab.put("//tmp/white2302/errors_daily_" + args.date)
    job.run()
    push_to_stat_new("//tmp/white2302/errors_daily_" + args.date, "daily", "Video.All/Video%20errors%20stat", args.stat_username, args.stat_token)

if __name__ == '__main__':
    main()
