# coding: utf8
from __future__ import absolute_import, division, print_function, unicode_literals

import logging
from datetime import timedelta

import sandbox.sandboxsdk.environments as sdk_environments
from sandbox import sdk2
from sandbox.projects.common import solomon

from sandbox.projects.avia.base import AviaBaseTask
from sandbox.projects.avia.lib import logs
from sandbox.projects.avia.lib.datetime_helpers import (
    get_utc_now, _dt_to_unixtime_utc, _round_unixtime, _unixtime_to_msk_dt,
    _dt_to_string, _dt_to_date_string, _timestamp_to_utc_string
)
from sandbox.projects.avia.lib.yt_helpers import _parse_timestamp

logger = logging.getLogger(__name__)


CLICK_HOST = 'http://clickhouse.metrika.yandex.net:8123/'


def get_clickhouse_data(query, host, user, password, connection_timeout):
    import requests
    r = requests.post(host, auth=(user, password), params={'query': query}, timeout=connection_timeout)
    return r.text


def get_clickhouse_df(query, host, user, password, connection_timeout=1500):
    import pandas as pd
    import StringIO
    data = get_clickhouse_data(query, host, user, password, connection_timeout)
    df = pd.read_csv(StringIO.StringIO(data), sep='\t')
    return [(timestamp, visits) for i, timestamp, visits in df.itertuples()]


def get_avia_visits_query(start_dt, end_dt, time_window):
    start_timestamp = _round_unixtime(_dt_to_unixtime_utc(start_dt), time_window)
    end_timestamp = _round_unixtime(_dt_to_unixtime_utc(end_dt), time_window)
    start_date = _dt_to_date_string(start_dt)
    end_date = _dt_to_date_string(end_dt)

    query = '''
    SELECT
        round(toUnixTimestamp(StartTime) / (60*{time_window})) * 60*{time_window} as timestamp,
        sum(Sign) as visits
    FROM visits_all

    WHERE {start_timestamp} <= timestamp and timestamp <= {end_timestamp}
        AND CounterID = 13979182
        AND '{start_date}' <= StartDate AND StartDate <= '{end_date}'

    GROUP BY timestamp
    ORDER BY timestamp

    FORMAT TabSeparatedWithNames'''.format(start_date=start_date, end_date=end_date,
                                           start_timestamp=start_timestamp, end_timestamp=end_timestamp,
                                           time_window=time_window)

    return query


class SendAviaVisitsToSolomon(AviaBaseTask):
    """
    Send Avia visits from Metrica ClickHouse to solomon.
    """
    _yt_client = None

    class Parameters(sdk2.Task.Parameters):

        with sdk2.parameters.Group('ClickHouse settings') as ch_block:
            ch_user = sdk2.parameters.String('ClickHouse user', required=True, default='avia')
            ch_vault_user = sdk2.parameters.String('ClickHouse vault user', required=True, default='AVIA')
            ch_vault_name = sdk2.parameters.String('Token vault name', required=True, default='CLICKHOUSE_PASSWORD')

        with sdk2.parameters.Group('YT settings') as yt_block:
            yt_cluster = sdk2.parameters.String('YT cluster', default='hahn', required=True)
            yt_user = sdk2.parameters.String('YT user', required=True)
            yt_token_vault = sdk2.parameters.String('Token vault name', required=True, default='YT_TOKEN')
            solomon_timestamp_path = sdk2.parameters.String('Solomon table for timestamp storage', required=True,
                                                            default='//home/avia/dev/kateov/solomon_timestamps')
            avia_visits_last_load_name = sdk2.parameters.String('Name in timestamp storage', required=True,
                                                                default='avia_visits_last_load')

        with sdk2.parameters.Group('Solomon settings') as debug_settings:
            solomon_project = sdk2.parameters.String('Solomon project', required=True, default='avia')
            solomon_cluster = sdk2.parameters.String('Solomon cluster', required=True, default='clickhouse')
            solomon_service = sdk2.parameters.String('Solomon service', required=True, default='metrica')

        with sdk2.parameters.Group('Settings') as date_block:
            start_time = sdk2.parameters.String('Start date (or time)', required=False,
                                                default='2019-02-25T00:00:00')
            end_time = sdk2.parameters.String('End date (default now)', required=False)
            time_window = sdk2.parameters.Integer('Time window', required=True, default=20)
            last_saved_overlap = sdk2.parameters.Integer(
                'Latest saved time point overlap (minutes)',
                required=True,
                default=60,
                description='Start time will be shifted back by this amount from last saved'+
                    'time point if start date (or time) is NOT explicitly defined',
            )

        with sdk2.parameters.Group('Debug settings') as debug_settings:
            debug_run = sdk2.parameters.Bool('Debug run', default=False, required=True)

    class Requirements(sdk2.Requirements):
        # https://wiki.yandex-team.ru/sandbox/clients/#client-tags-multislot
        cores = 1  # exactly 1 core
        ram = 8192  # 8GiB or less

        class Caches(sdk2.Requirements.Caches):
            pass  # means that task do not use any shared caches

        environments = [sdk_environments.PipEnvironment('yandex-yt', version='0.10.8'),
                        sdk_environments.PipEnvironment('raven'),
                        sdk_environments.PipEnvironment('requests'),
                        ]

    def _get_yt_client(self):
        if self._yt_client is None:
            import yt.wrapper
            self._yt_client = yt.wrapper.YtClient(
                proxy=self.Parameters.yt_cluster,
                token=sdk2.Vault.data(self.Parameters.yt_user, self.Parameters.yt_token_vault or 'YT_TOKEN')
            )
            self._yt_json_format = yt.wrapper.JsonFormat()
        return self._yt_client

    def send_data_to_solomon(self, itertuples_df):
        common_labels = {
            'project': self.Parameters.solomon_project,
            'cluster': self.Parameters.solomon_cluster,
            'service': self.Parameters.solomon_service,
        }
        logger.info(common_labels)
        sensors = []
        for timestamp, visits in itertuples_df:
            logger.info(timestamp, visits)
            sensors.append(
                {
                    'ts': _timestamp_to_utc_string(timestamp),
                    'labels': {'sensor': 'visits'},
                    'value': visits,
                }
            )
            if len(sensors) >= 1000:
                solomon.upload_to_solomon(common_labels, sensors)
                sensors = []
        logger.info(sensors)
        solomon.upload_to_solomon(common_labels, sensors)

    @staticmethod
    def get_max_timestamp(itertuples_df):
        return max([timestamp for timestamp, visits in itertuples_df])

    def _get_saved_timestamp(self):
        try:
            return _parse_timestamp(
                self._get_yt_client().get_attribute(self.Parameters.solomon_timestamp_path,
                                                    self.Parameters.avia_visits_last_load_name)
            )
        except Exception as e:
            logger.warning(e.message)
            return None

    def on_execute(self):
        logs.configure_logging(logs.get_sentry_dsn(self))
        logging.info('Start')

        yt_client = self._get_yt_client()

        if not yt_client.exists(self.Parameters.solomon_timestamp_path):
            yt_client.create('map_node', self.Parameters.solomon_timestamp_path, recursive=True)
        saved_dt = self._get_saved_timestamp()
        if saved_dt:
            saved_dt = saved_dt - timedelta(minutes=self.Parameters.last_saved_overlap)
        start_dt = _parse_timestamp(self.Parameters.start_time) or saved_dt
        end_dt = _parse_timestamp(self.Parameters.end_time) or get_utc_now()
        logger.info('saved_dt=%s, start_dt=%s, end_dt: %s', saved_dt, start_dt, end_dt)

        ch_query = get_avia_visits_query(start_dt, end_dt, self.Parameters.time_window)
        logger.info('ClickHouse query: %s', ch_query)

        stat = get_clickhouse_df(ch_query,
                                 host=CLICK_HOST,
                                 user=self.Parameters.ch_user,
                                 password=sdk2.Vault.data(self.Parameters.ch_vault_user,
                                                          self.Parameters.ch_vault_name))

        if stat:
            logger.info('Sending to Solomon')
            logger.info('Stat=%s', stat)
            self.send_data_to_solomon(stat)

            new_dt = _unixtime_to_msk_dt(self.get_max_timestamp(stat))
            if saved_dt is None or saved_dt < new_dt:
                yt_client.set_attribute(self.Parameters.solomon_timestamp_path,
                                        self.Parameters.avia_visits_last_load_name,
                                        _dt_to_string(new_dt))

                logger.info('New timestamp: %s', _dt_to_string(new_dt))
        else:
            logger.info('No data processed, no attributes saved')

        logger.info('Done')
