import urllib
import os.path
import re
import argparse
import pandas as pd
from bs4 import BeautifulSoup
from slimit.parser import Parser
from slimit.visitors import nodevisitor
from slimit import ast
from pandas import DataFrame

import statface


DATE_PREFIX = 'new Date('
ADDCOLUMN_PREFIX = 'data.addColumn('


SERVICE = "service"
TYPE = "simple"
DATA_TYPE = "data_type"
SLICE = "slice"
SLICE_ALL = "<all>"
SLICE_P50 = "p50"
TYPE_ALL = "<all>"
TYPE_SIMPLE = "simple"


INVALID_FILE_NAME_CHARACTERS = re.compile('([<,>])')
INVALID_FIELD_NAME_CHARACTERS = re.compile('([ -./])')


def read_html_file(name):
    with open(name) as dfile:
        return dfile.read()


def read_html_url(url):
    return urllib.urlopen(url).read()


def extract_data(point):
    result = []
    for item in point.items:
        data = item.to_ecma()
        if data.startswith(DATE_PREFIX):
            dc = [c.strip() for c in data[len(DATE_PREFIX):-1].split(',')]
            result.append("{}-{:02}-{:02}".format(int(dc[0]), int(dc[1]) + 1, int(dc[2])))
        else:
            result.append(float(data) if data != 'null' else None)
    return result


def find_data(funcdecl):
    result = []
    for node in nodevisitor.visit(funcdecl):
        if isinstance(node, ast.VarDecl):
            if isinstance(node.initializer, ast.Array):
                for point in node.initializer.items:
                    result.append(extract_data(point))
    return result


def find_dimension(funcdecl):
    result = []
    for node in nodevisitor.visit(funcdecl):
        if isinstance(node, ast.ExprStatement):
            data = node.to_ecma()
            if data.startswith(ADDCOLUMN_PREFIX):
                result.append(data[len(ADDCOLUMN_PREFIX):-2].split(',')[1].replace("'", '').strip())
    return result


def extract_frames(data, parser=None):
    if not parser:
        parser = Parser()
    soup = BeautifulSoup(data, 'html.parser')
    for script in soup.body.find_all('script'):
        if script:
            tree = parser.parse(script.get_text())
            for node in nodevisitor.visit(tree):
                if isinstance(node, ast.FuncDecl):
                    if node.identifier.value.startswith('gvisDataLineChart'):
                        yield DataFrame(data=find_data(node), columns=find_dimension(node))


def print_processor(name, df, params, n):
    print params[DATA_TYPE][n]
    print str(df)


def df_name(params, n):
    result = '.'.join((params[SERVICE], params[TYPE], params[DATA_TYPE][n], params[SLICE])).replace(' ', '_')
    return INVALID_FILE_NAME_CHARACTERS.sub('', result).lower()


def save_processor(name, df, params, n, save_dir):
    file_name = df_name(params, n) + '.json'
    print 'Processing ', file_name
    with open(os.path.join(save_dir, file_name), 'w') as result:
        df.to_json(result, orient='records')


def statface_processor(name, df, params, n, server, save_dir, login, password):
    scale = 'd'
    report = statface.StatReport(os.path.join(save_dir, name, df_name(params, n)), server, login, password)
    print 'Publishing ', report.report_path
    # if report.exists():
    #     report.delete_scale(scale=scale)
    df.rename(columns={'date': statface.FIELD_DATE}, inplace=True)
    df.rename(columns=lambda name: INVALID_FIELD_NAME_CHARACTERS.sub('_', name).lower(), inplace=True)
    df = df.where((pd.notnull(df)), None)
    # report.create(scale, 'KPI {}: {}'.format(params[TYPE], params[DATA_TYPE][n]), statface.config_from_df(df, [statface.FIELD_DATE]))
    result = report.write_data(scale, df)
    print 'Upload',result,'lines'


def main(args):
    PREFIX = "http://trencher.search.yandex.net/~trencher/kpi-yt/" + args.date + "/"
    print PREFIX
    READER = read_html_url
    #PROCESSOR = lambda name, df, params, n: save_processor(name, df, params, n, os.path.join(os.path.dirname(__file__), 'data'))
    trencher_id = open(args.id_path).read()
    PROCESSOR = lambda name, df, params, n: statface_processor(name, df, params, n, "https://" + args.statface_url, args.report_path,
                                                               trencher_id.split(':')[0], trencher_id.split(':')[1])
    #PROCESSOR = print_processor
    KPI_HTML = {
        "kpi.simple.www.gVis.html": {
           SERVICE: "www",
           TYPE: TYPE_SIMPLE,
           DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size"],
           SLICE: SLICE_ALL
        },
        "kpi.simple.www.p50.gVis.html": {
            SERVICE: "www",
            TYPE: TYPE_SIMPLE,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size"],
            SLICE: SLICE_P50

        },
        "kpi.simple.www.tablet.gVis.html": {
            SERVICE: "tablet",
            TYPE: TYPE_SIMPLE,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size"],
            SLICE: SLICE_ALL
        },
        "kpi.simple.www.tablet.p50.gVis.html": {
            SERVICE: "tablet",
            TYPE: TYPE_SIMPLE,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size"],
            SLICE: SLICE_P50
        },
        "kpi.simple.www.touch.gVis.html": {
            SERVICE: "touch",
            TYPE: TYPE_SIMPLE,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size"],
            SLICE: SLICE_ALL
        },
        "kpi.simple.www.touch.p50.gVis.html": {
            SERVICE: "touch",
            TYPE: TYPE_SIMPLE,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size"],
            SLICE: SLICE_P50
        },
        "kpi.images.gVis.html": {
            SERVICE: "images",
            TYPE: TYPE_ALL,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size", "Time to First Paint", "Browsers, RU, ttfp", "Browsers, RU, ttfb", "Browsers, RU, ttlb"],
            SLICE: SLICE_ALL
        },
        "kpi.video.gVis.html": {
            SERVICE: "video",
            TYPE: TYPE_ALL,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size", "Time to First Paint", "Browsers, RU, ttfp", "Browsers, RU, ttfb", "Browsers, RU, ttlb"],
            SLICE: SLICE_ALL
        },
        "kpi.www.gVis.html": {
            SERVICE: "www",
            TYPE: TYPE_ALL,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size", "Time to First Paint",
                        "Browsers, RU, ttfp", "Browsers, RU, ttfb", "Browsers, RU, ttlb"],
            SLICE: SLICE_ALL
        },
        "kpi.www.tablet.gVis.html": {
            SERVICE: "tablet",
            TYPE: TYPE_ALL,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size", "Time to First Paint",
                        "Browsers, RU, ttfp", "Browsers, RU, ttfb", "Browsers, RU, ttlb"],
            SLICE: SLICE_ALL
        },
        "kpi.www.touch.gVis.html": {
            SERVICE: "touch",
            TYPE: TYPE_ALL,
            DATA_TYPE: ["Server Latency", "Time to First Byte", "Time to Last Byte", "SERP size", "Time to First Paint",
                        "Browsers, RU, ttfp", "Browsers, RU, ttfb", "Browsers, RU, ttlb"],
            SLICE: SLICE_ALL
        }
    }
    parser = Parser()
    for html_file, params in KPI_HTML.iteritems():
        for n, frame in enumerate(extract_frames(READER(PREFIX + html_file), parser)):
            PROCESSOR(html_file.replace('.gVis.html', ''), frame, params, n)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--date", help="Date in format yyyymmdd", type=str, required=True)
    parser.add_argument("-s", "--statface_url", help="Statface url(stat-beta.yandex-team.ru or stat.yandex-team.ru) for reports uploading", type=str, required=True)
    parser.add_argument("-r", "--report_path", help="Statface report path", type=str, required=True)
    parser.add_argument("-i", "--id_path", help="Path to trencher id", type=str, required=True)
    main(parser.parse_args())
