# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

import yt.wrapper as yt
import logging
from yql.api.v1.client import YqlClient
import datetime

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

yt.config['proxy']['url'] = 'hahn.yt.yandex.net'


def get_tables(path):
    return yt.list(path)


def get_table(path):
    try:
        return yt.get(path, attributes=['key', 'resource_usage'])
    except Exception:
        return None


def create_table(path):
    _time = datetime.datetime.utcnow() + datetime.timedelta(seconds=3*24*3600)
    try:
        yt.create('table', path, attributes={'expiration_time': _time.isoformat(), 'task_status': 'created'})
        return True
    except Exception as e:
        logger.info(e)
        return False


def add_attribute(path, key, value):
    yt.set_attribute(path, key, value)


def get_attribute(path, key):
    try:
        return yt.get_attribute(path, key)
    except Exception:
        return None


def remove_table(path):
    yt.remove(path)


def prepare_secrets():
    import os
    return os.environ["YT_TOKEN"], os.environ["HEC_TOKEN"], os.environ["YQL_TOKEN"]


# Read tables from paths(list) and return dict with key equal to filename and value equal to 20 rows
def table_reader(token, paths, rows_count=None, cluster="hahn", table_format=yt.YsonFormat()):
    yt.config["token"] = token
    proxy = cluster + ".yt.yandex.net"
    yt.config["proxy"]["url"] = proxy
    result = dict()
    if rows_count is None:
        rows_count = 30

    for path in paths:
        filename = path.split("/")[-1]
        logger.info(path)
        table = yt.read_table(path, table_format)
        table = list(table)
        # table = table[-rows_count:]
        result[filename] = table
    return result


def send_to_splunk(hec_token, result_tables):
    from hec_sender import SplunkHECSender

    for filename, data in result_tables.items():
        source = filename
        logger.info("Calling splunk sender with source %s ...", source)
        sender = SplunkHECSender(token=hec_token, source=source)
        sender.send_data(data)


def send_to_splunk_wrapper(yt_token, hec_token, to_table):
    result_tables = table_reader(yt_token, [to_table])
    logger.info('Result table length = %d' % len(result_tables[result_tables.keys()[0]]))
    key = result_tables.keys()[0]
    for i in range(len(result_tables[key])):
        _time = datetime.datetime.strptime(result_tables[key][i]['timestamp'], '%Y-%m-%dT%H:%M:%S')
        result_tables[key][i]['__meta'] = {'time': _time.strftime('%s')}
    # print(result_tables[result_tables.keys()[0]][0])
    send_to_splunk(hec_token, result_tables)
    add_attribute(to_table, 'task_status', 'done')


def get_previous_date(days=14):
    d = datetime.datetime.today() - datetime.timedelta(days=days)
    return d.strftime('%Y-%m-%dT%H:') + '00:00'


def main():
    logger.info("Preparing secrets ...")
    yt_token, hec_token, yql_token = prepare_secrets()
    tables = get_tables('//logs/taxi-access-log/1h')

    # Clean your space
    date_for_clean = get_previous_date(14)
    logger.info('Start cleaning')
    for t in tables:
        if date_for_clean > t:
            tmp_path = '//home/taxi-fraud/analytics/a-konyshev/internal-logs/' + t
            logger.info('Remove %s' % tmp_path)
            remove_table(tmp_path)
    logger.info('Finish cleaning')
    # Prepare objects

    logger.info('Start main job')
    for t in tables:
        path = '//home/taxi-fraud/analytics/a-konyshev/internal-logs/' + t
        logger.info('Path: %s' % path)
        internal_table = get_table(path)
        if internal_table is not None:
            status = get_attribute(path, 'task_status')
            if status == 'ready_to_splunk':
                logger.info('ready_to_splunk %s' % path)
                send_to_splunk_wrapper(yt_token, hec_token, path)
                logger.info('send_to_splunk_wrapper done: %s' % path)
                continue
            elif status == 'created':
                logger.info('remove_table %s' % path)
                remove_table(path)
            else:
                logger.info('task_status: %s, continue' % status)
                continue
        created = create_table(path)
        logger.info('create_table:%s \tresult: %s' % (path, str(created)))
        # create and analyze logs
        from_table = '//logs/taxi-access-log/1h/' + t
        test = get_table(from_table)
        if test is None:
            logger.info('old table %s' % from_table)
            continue
        req_body = '''
USE hahn;

use hahn;
PRAGMA yt.InferSchema;

$script = @@

def get_yandex_login(cookies):
    cookies = str(cookies)
    if cookies is None or cookies == '':
        return '-'
    else:
        items = cookies.split(';')
        for i in items:
            tmp = i.strip()
            if tmp.startswith('yandex_login='):
                test = tmp.split('=')
                if len(test) >= 2:
                    result = tmp.split('=')[1]
                    return result.replace('\'', '').replace('"', '')
                else:
                    return '-'
        return '-'

def get_yandex_uid(cookies):
    import re
    cookies = str(cookies)
    if cookies is None or cookies == '':
        return '-'
    else:
        items = cookies.split(';')
        for i in items:
            tmp = i.strip()
            if tmp.lower().startswith('session_id_safe='):
                value = ''.join(tmp.split('=')[1:])
                matches = [x for x in re.findall('\\|(\\d{16})', value) if x != '']
                if len(matches) == 1:
                    return matches[0]
        return '-'

def get_yandex_uid_cookie(cookies):
    cookies = str(cookies)
    if cookies is None or cookies == '':
        return '-'
    else:
        items = cookies.split(';')
        for i in items:
            tmp = i.strip()
            if tmp.startswith('yandexuid='):
                test = tmp.split('=')
                if len(test) >= 2:
                    return tmp.split('=')[1]
                else:
                    return '-'
        return '-'
@@;

$udf_yandex_login = Python::get_yandex_login(Callable<(String?)->String>, $script);
$udf_yandex_uid = Python::get_yandex_uid(Callable<(String?)->String>, $script);
$udf_yandex_uid_cookie = Python::get_yandex_uid_cookie(Callable<(String?)->String>, $script);

insert into `%s`
with truncate


SELECT
`timestamp`,
ip as `src_ip`,
WeakField(remote_addr, "String"),
x_real_ip,
WeakField(upstream_status, "String") as `status`,
WeakField(http_host, "String"),
WeakField(method, "String"),
request as `request`,
request_body,
WeakField(cookies, "String"),
vhost,
$udf_yandex_login(WeakField(cookies, "String")) as `yandex_login_from_cookie`,
$udf_yandex_uid(WeakField(cookies, "String")) as `yandex_uid_from_session_id`,
WeakField(referer, "String") as `http_referrer`,
user_agent as `http_user_agent`,
WeakField(bytes_sent, "String") as `bytes`
FROM `%s`
WHERE vhost LIKE '%%.yandex-team.ru'
ORDER BY `timestamp` ASC;

    '''
        try:
            with YqlClient(db='hahn', token=yql_token) as client:
                add_attribute(path, 'task_status', 'yql_running')
                request = client.query(req_body % (path, from_table), syntax_version=1)
                request.run()  # run query
                request.get_results()  # listen to query
                add_attribute(path, 'task_status', 'ready_to_splunk')
                logger.info('finish yql: %s' % path)

            # Prepare enrich table
            logger.info("Starting enrichment of logs ...")

            send_to_splunk_wrapper(yt_token, hec_token, path)
            logger.info('send_to_splunk_wrapper done: %s' % path)
        except Exception as e:
            remove_table(path)
            logger.info(e)
            raise e


if __name__ == "__main__":
    main()
