import datetime
import json
import subprocess

import pandas as pd
import requests
import logging

import typing as tp

from config import CH_ALIAS, SHOWS_CLICKS_CLUSTER, LOGIN_PUID_TABLENAME, PUSHES_CLUSTER, METRICS_PATH, \
    ID_LOGIN_TABLENAME
from exceptions import DataError, ChytException
from libs import Json, create_board_html, gather_results_from_api, get_author_boards_meta, get_board_block_html, \
    aggregate_and_fill_values, get_shows_clicks_by_json, datetime_from_date, PLOTLY_SCRIPT, \
    create_graphs_html, get_pushes_by_json, get_push_events_from_dataframe, Board, BoardCreateSeries, parse_time, \
    create_time_stopper, get_author_cards_meta, Card, CardCreateSeries

logger = logging.getLogger(__name__)


def get_token() -> str:
    with open('/home/nsbondartsev/.yt/token', 'r') as fl:
        token = fl.read().strip()
    return token


def execute_chyt_query(query: str, cluster: str, token: str, alias: str = '*ch_public', timeout: int = 600) -> str:
    logger.info('Executing query: %s', query)
    # change the encoding from default pythonic utf-8 to latin-1, since the query body is encoded as if it was latin-1
    query = query.encode('utf-8').decode('latin-1')
    proxy = f'http://{cluster}.yt.yandex.net'
    s = requests.Session()
    url = f'{proxy}/query?database={alias}&password={token}'
    resp = s.post(url, data=query, timeout=timeout)
    if resp.status_code != 200:
        logger.error('Response status: %s', resp.status_code)
        logger.error('Response headers: %s', resp.headers)
        logger.error('Response content: %s', resp.content)
    try:
        logger.info('Trace id: %s', resp.headers['X-Yt-Trace-Id'])
        logger.info('Query id: %s', resp.headers['X-ClickHouse-Query-Id'])
    except KeyError:
        raise ChytException(query)
    resp.raise_for_status()
    content = resp.content.strip().decode()
    logger.info('Time spent: %s seconds, length of content: %s', resp.elapsed.total_seconds(), len(content))
    return content


def get_metric_tables() -> tp.Sequence[str]:
    obj = subprocess.Popen(['yt', 'list', f'--proxy={SHOWS_CLICKS_CLUSTER}', METRICS_PATH],
                           stdout=subprocess.PIPE,
                           stderr=subprocess.STDOUT)
    stdout, stderr = obj.communicate()
    if stderr is not None:
        raise DataError('There was an error while getting YT tables with metrics')
    if not stdout:
        return []
    return stdout.decode().strip().split('\n')


def get_chyt_shows_clicks(
    board_ids: tp.Sequence[str],
    end_date: datetime.date,
    horizon: int = 30,
    split_ui: bool = False,
    possible_locations: tp.Tuple[str, ...] = ()
) -> tp.List[Json]:

    start_date = end_date - datetime.timedelta(days=horizon)
    board_ids_str = ', '.join(f"'{board_id}'" for board_id in board_ids)
    groupper = 'concat(location, \', \', ui)' if split_ui else 'location'
    location_condition = f'AND location IN {possible_locations}' if possible_locations else ''

    query = f'''
    select
        intDiv(timestamp, 60) * 60 as timestamp,
        {groupper} as groupper,
        sum(clicks) as clicks,
        count(*) as shows
    FROM concatYtTablesRange('//home/collections/analytics/boards_viewer/shows_clicks','{start_date}', '{end_date}')
    WHERE board_id IN ({board_ids_str})
    {location_condition}
    GROUP BY timestamp, {groupper}
    format JSON
    '''
    # TODO: populate token from outside of the function to make testing easier afterwards
    token = get_token()

    result = execute_chyt_query(query=query, cluster=SHOWS_CLICKS_CLUSTER, alias=CH_ALIAS, token=token)
    parsed = json.loads(result)

    return parsed['data']


def get_additional_stats(board_id: str) -> tp.Mapping[str, tp.Any]:
    metrics = get_metric_tables()
    all_metrics = {}
    token = get_token()
    for table in metrics:
        query = f'''
        SELECT * FROM `{METRICS_PATH}/{table}`
        WHERE board_id = '{board_id}'
        FORMAT JSON
        '''
        query_res = execute_chyt_query(query, cluster=SHOWS_CLICKS_CLUSTER, token=token, alias=CH_ALIAS)
        if not query_res:
            continue
        parsed = json.loads(query_res)
        try:
            all_metrics.update(parsed['data'][0])
        except IndexError:
            pass
    return all_metrics


def build_full_board_html(
    board_id: str,
    end_date: datetime.date,
    horizon: int = 30,
    freq: str = 'h',
    split_ui: bool = False,
    possible_locations: tp.Tuple[str, ...] = ()
) -> str:
    shows_clicks_json = get_chyt_shows_clicks([board_id], end_date, horizon, split_ui, possible_locations)

    additional_stats = get_additional_stats(board_id)

    return create_board_html(
        board_id=board_id,
        filter_datetime=datetime_from_date(end_date, 23, 59, 59),
        shows_clicks_json=shows_clicks_json,
        additional_stats=additional_stats,
        horizon=horizon,
        freq=freq
    )


def get_author_puid_by_login(login: str) -> str:
    query = f'''
    SELECT puid
    FROM `//home/collections/analytics/boards_viewer/{LOGIN_PUID_TABLENAME}`
    WHERE login='{login}'
    FORMAT JSON
    '''
    res = execute_chyt_query(query, SHOWS_CLICKS_CLUSTER, get_token(), CH_ALIAS)
    parsed = json.loads(res)
    if not parsed['data']:
        raise DataError(f'Was not able to find puid for login {login}')
    return parsed['data'][0]['puid']


def get_author_login_by_id(author_id: str) -> str:
    query = f'''
    SELECT login
    FROM `//home/collections/analytics/boards_viewer/{ID_LOGIN_TABLENAME}`
    WHERE id='{author_id}'
    FORMAT JSON
    '''
    res = execute_chyt_query(query, SHOWS_CLICKS_CLUSTER, get_token(), CH_ALIAS)
    parsed = json.loads(res)
    if not parsed['data']:
        raise DataError(f'Was not able to find login for author_id {author_id}')
    return parsed['data'][0]['login']


def get_author_id_by_boards(login: str, boards: tp.Sequence[Board]) -> str:
    try:
        return boards[0]['owner']['id']
    except IndexError:
        raise DataError(f'No boards have been found for author {login}. Unable to get ID.')
    except KeyError as e:
        raise DataError from e


def get_user_pushes(
    puids: tp.Sequence[str],
    start_date: datetime.date,
    end_date: datetime.date,
    push_types: tp.Tuple[str, ...] = (),
    columns: tp.Tuple[str, ...] = ()
) -> tp.Sequence[Json]:

    select_list = ', '.join(columns) if columns else '*'
    types_condition = f'AND push_type IN {push_types}' if push_types else ''
    puids_str = ', '.join(f"'{puid}'" for puid in puids)

    query = f'''
    SELECT {select_list}
    FROM concatYtTablesRange('//home/images/dev/nsbondartsev/TASKS/COLA-240_pushes','{start_date}', '{end_date}')
    WHERE puid IN ({puids_str})
    {types_condition}
    FORMAT JSON
    '''

    res = execute_chyt_query(query, PUSHES_CLUSTER, get_token(), CH_ALIAS)
    parsed = json.loads(res)
    return parsed['data']


def get_board_create_events_from_boards(boards: tp.Sequence[Board]) -> BoardCreateSeries:
    return BoardCreateSeries(pd.Series(data=(parse_time(board['service']['created_at']) for board in boards)))


def get_card_create_events(cards: tp.Sequence[Card]) -> CardCreateSeries:
    return CardCreateSeries(pd.Series(data=(parse_time(card['service']['created_at']) for card in cards)))


def build_full_author_html(
    login: str,
    end_date: datetime.date,
    horizon: int = 30,
    freq='h',
    split_ui: bool = False,
    possible_locations: tp.Tuple[str, ...] = ()
):
    # prepare parameters
    filter_datetime = datetime_from_date(end_date, 23, 59, 59)
    start_datetime = filter_datetime - datetime.timedelta(days=horizon)

    # predicate to stop requests for api, since we are interested in only the recent info
    stopper = create_time_stopper(start_datetime)

    # get data by apis
    boards = gather_results_from_api(get_author_boards_meta, {'login': login}, time_to_stop=stopper)
    cards = gather_results_from_api(get_author_cards_meta, {'login': login}, time_to_stop=stopper)
    board_ids = [board['id'] for board in boards]

    # get author ids
    puid = get_author_puid_by_login(login)
    author_id = get_author_id_by_boards(login, boards)

    # draw boards
    boards_html = ''
    for board in boards:
        boards_html += get_board_block_html(board)

    # get pushes data
    raw_pushes_json = get_user_pushes(
        (puid,), start_datetime.date(), filter_datetime.date(), (), ('push_type', 'ts as timestamp')
    )
    pushes = get_pushes_by_json(raw_pushes_json)

    # get events
    if len(pushes):
        events = list(get_push_events_from_dataframe(pushes))
    else:
        events = []
    board_events = get_board_create_events_from_boards(boards)
    card_events = get_card_create_events(cards)

    # merge all events
    events.append(board_events)
    events.append(card_events)

    # get shows and clicks data
    shows_clicks_json = get_chyt_shows_clicks(board_ids, end_date, horizon, split_ui, possible_locations)
    shows_clicks = get_shows_clicks_by_json(shows_clicks_json)
    aggregated_dfs = aggregate_and_fill_values(shows_clicks, start_datetime, filter_datetime, freq)

    # get graphs html
    graphs_html = create_graphs_html(aggregated_dfs, events, start_datetime)

    return f'''
    <html>
        <head>
            <link rel="stylesheet" href="css/styles.css">
            {PLOTLY_SCRIPT}
        </head>
        <body>
            <h1>{login}</h1>
            <a href='https://yandex.ru/collections/user/{login}'>{login}</a>
            <div>puid:</div>
            <div>{puid}</div>
            <div>author_id:</div>
            <div>{author_id}</div>
            <div>
                {graphs_html}
            </div>
            <div>
                {boards_html}
            </div>
        </body>
    </html>
    '''
