import pandas
from yql.api.v1.client import YqlClient
import json
import sys
import argparse
import logging
import uuid
from datetime import date
import re
import yt.wrapper as yt
from pathlib import Path

YT_TABLE_TTL = 604800000  # one week
BALANCE_TMP_PATH = 'home/balance/prod/tmp'


class YqlClientWrapper:
    def __init__(self, yql_token: str, cluster: str):
        self.yql_token = yql_token
        self.cluster = cluster

    @staticmethod
    def get_data_from_yt(query: dict, yql_token: str, cluster: str) -> pandas.DataFrame:
        yql_response = YqlClient(db=cluster, token=yql_token).query(query['query']).run()
        logging.info(f"YQL shared link: {yql_response.share_url}")
        return yql_response.get_results().dataframe

    @staticmethod
    def write_table_to_yt(absolute_table_path: str, list_wrapped_emails_uuid: list, yql_token: str, yt_token: str,
                          cluster: str) -> None:
        YqlClient(db=cluster, token=yql_token).write_table(f'{absolute_table_path}',
                                                           list_wrapped_emails_uuid,
                                                           ['message_to_send', 'message_id'],
                                                           ['Json', 'String?'],
                                                           external_tx='new'
                                                           )
        yt_client = yt.YtClient(proxy=cluster, token=yt_token)
        expiration_attr_path = yt.ypath_join(f'//{absolute_table_path}', '@expiraion_time')
        yt_client.set(expiration_attr_path, YT_TABLE_TTL)

    @staticmethod
    def check_if_table_exist(absolute_table_path: str, yql_token: str, cluster: str) -> bool:
        query = f'SELECT Path from folder(`//{BALANCE_TMP_PATH}`) where Type = "table" and Path ="{absolute_table_path}"'
        yql_response = YqlClient(db=cluster, token=yql_token).query(query).run()
        logging.info(f"YQL shared link: {yql_response.share_url}")

        df_result_table = yql_response.get_results().dataframe

        if df_result_table is None or df_result_table.empty:
            return False
        else:
            return True


def save_data_to_yt(dataframe_input_data: pandas.DataFrame, yql_token: str, yt_token: str, cluster: str,
                    absolute_table_path: str) -> None:
    """
    Saving results to YT with generated message id for consistency mailing
    :param dataframe_input_data:
    :param yql_token:
    :param yt_token:
    :param cluster:
    :param absolute_table_path:
    :return:
    """
    list_of_dicts_result = dataframe_input_data.to_dict('records')

    list_wrapped_emails_uuid: list = []
    for l_ in list_of_dicts_result:
        list_wrapped_emails_uuid.append([l_, str(uuid.uuid4())])

    logging.info(f'Saving result to YT table {absolute_table_path}')
    YqlClientWrapper.write_table_to_yt(absolute_table_path, list_wrapped_emails_uuid, yql_token, yt_token, cluster)


def make_daily_result_table_name(url: str, postfix: str = '') -> str:
    """
    Making daily result table name by notify id from sender plus current date
    :param url:
    :return:
    """

    if len(postfix) > 0:
        postfix = f'.{postfix}'

    if url:
        notify_id = re.search('/transactional/(.*)?', url)
        prefix = notify_id.group(1)
    else:
        prefix = 'empty_notify_id'

    return f"{prefix}.{date.today().strftime('%Y%m%d')}{postfix}"


def parser() -> argparse:
    parser_ = argparse.ArgumentParser(description="This program must be run from the root of Arcadia.")
    parser_.add_argument('-fq', '--file_with_query', help='Input file with query', required=True)
    parser_.add_argument('-of', '--out_file', help='Output file name', default='output_yql_json')
    parser_.add_argument('-yqltf', '--yql_token_file', help='YQL token file', required=True)
    parser_.add_argument('-yttf', '--yt_token_file', help='YT token file', required=True)
    parser_.add_argument('-cluster', '--cluster', help='YT cluster', default='hahn', choices=['hahn', 'arnold'])
    parser_.add_argument('-ll', '--log_level', help='Log level YqlOverYt', default='info',
                         choices=['debug', 'info', 'warning', 'error', 'critical'])
    parser_.add_argument('-psfx', '--file_postfix', default='')

    return parser_.parse_args()


def logging_settings(args) -> None:
    logging.basicConfig(
        level=args.log_level.upper(),
        stream=sys.stdout
    )


def query_processing(file_with_query: str, yql_token: str, yt_token: str,
                     out_file: str, cluster: str, file_postfix: str = '') -> None:
    """
    The YQL query won't be executed again if the table with results already exists.
    :param file_with_query:
    :param yql_token:
    :param yt_token:
    :param out_file:
    :param cluster:
    :param file_postfix:
    :return:
    """
    logging.info('Reading file with query')
    with open(file_with_query) as f:
        query_list = json.load(f)

    if not query_list or len(query_list) == 0:
        logging.info('There are no any query to execute')
        return

    for query in query_list:
        if query['query_type'] == 'YQL':
            table_name = make_daily_result_table_name(query['notify_url'], file_postfix)
            absolute_table_path = f'{BALANCE_TMP_PATH}/{table_name}'
            logging.info(f'Checking if table {absolute_table_path} exists')
            table_exist = YqlClientWrapper.check_if_table_exist(absolute_table_path, yql_token, cluster)

            if table_exist:
                logging.info(f'Table {absolute_table_path} already exists, continue')
                with open(out_file, 'w') as f:
                    f.write(f'{absolute_table_path}')
                continue

            logging.info('Getting data from YT by query')
            dataframe_result_from_yt = YqlClientWrapper.get_data_from_yt(query, yql_token, cluster)

            if dataframe_result_from_yt is None or dataframe_result_from_yt.empty:
                logging.info(f'The query has returned nothing.\n {query}')
                continue

            dataframe_result_from_yt.dropna(inplace=True)
            dataframe_result_from_yt['notify_url'] = query['notify_url']
            dataframe_result_from_yt['to'] = query['to']
            dataframe_result_from_yt['cc'] = query['cc']
            dataframe_result_from_yt['bcc'] = query['bcc']

            save_data_to_yt(dataframe_result_from_yt, yql_token, yt_token, cluster, absolute_table_path)

            logging.info(f'Writing result table path {absolute_table_path} to output file')
            with open(out_file, 'w') as f:
                f.write(f'{absolute_table_path}')


def main() -> None:
    args = parser()
    logging_settings(args)

    with open(args.yql_token_file, 'r') as f:
        yql_oauth_token = f.read()

    with open(args.yt_token_file, 'r') as f:
        yt_token_file = f.read()

    query_processing(args.file_with_query, yql_oauth_token.rstrip(),
                     yt_token_file.rstrip(), args.out_file, args.cluster, args.file_postfix)

    if not Path(args.out_file).is_file():
        with open(args.out_file, 'w') as f:
            f.write('')


if __name__ == '__main__':
    main()
