import logging

from sandbox.sandboxsdk import environments
from sandbox import sdk2
from sandbox import common
import sandbox.common.types.task as ctt
from sandbox.sandboxsdk import errors

class ComputeAnalyzeInstanceStartTimings(sdk2.Task):
    YT_PREFIX = "//home/cloud/compute_analytics"
    INSTANCE_CREATION_TIME_TABLE_NAME = "instance_creation_time"
    DATE_LOOKUP_QUERY = """
use chyt.hahn;
CREATE TABLE IF NOT EXISTS "%INSTANCE_CREATION_TIME_TABLE%" (
    event_time String,
    instance_id String
) ENGINE = YtTable();
INSERT INTO `%INSTANCE_CREATION_TIME_TABLE%` (instance_id, event_time)
SELECT '%INSTANCE_ID%' as instance_id, iso_eventtime as event_time
FROM concatYtTablesRange('//logs/yc-events-prod/1d')
WHERE event_type = 'yandex.cloud.events.compute.CreateInstance' AND YPathString(_rest, '/details/instance_id') = '%INSTANCE_ID%'
ORDER BY event_time ASC
LIMIT 1
"""
    TIME_DIFF_QUERY = """
use hahn;
$instance_id = "%INSTANCE_ID%";
$start_ts = "%START_TIMESTAMP%";

$GetDayTable = ($timestamp) -> {
    $parse_time_from_events = DateTime::Parse("%Y-%m-%d %H:%M:%S");
    $fmt = DateTime::Format("logs/yandexcloud-prod-log/1d/%Y-%m-%d");
    $ts = DateTime::MakeTimestamp($parse_time_from_events($timestamp));
    return $fmt($ts);
};

$table = $GetDayTable($start_ts);

$instance_creation_operations = (SELECT DISTINCT OPERATION_ID FROM $table WHERE (INSTANCE_ID = $instance_id or TARGET_ID = $instance_id) AND OPERATION_ID IS NOT NULL);

INSERT INTO @instance_creation_log
SELECT * FROM $table WHERE INSTANCE_ID = $instance_id or TARGET_ID = $instance_id or OPERATION_ID IN $instance_creation_operations;

COMMIT;

$ParseTimestamp = ($timestamp) -> {
    return DateTime::MakeTimestamp(DateTime::ParseIso8601($timestamp));
};

$vm_start_end_time = (select $ParseTimestamp(`TIMESTAMP`) AS Time from @instance_creation_log where MESSAGE like "% Node task already completed. Skip pausing." and PATHNAME = "/usr/lib/yc/compute/lib/python3.5/site-packages/yc_compute/instances/common.py");

select 
TS.Time,
TS.uunit,
TS.message,
TS.pathname,
TS.Time - NTS.Time_New as timediff
from (
SELECT 
    $ParseTimestamp(`TIMESTAMP`) AS Time,
    ROW_NUMBER() OVER w AS rownum,
    `UNIT` as uunit,
    MESSAGE as message,
    PATHNAME as pathname
FROM
    @instance_creation_log
WINDOW w AS (ORDER BY $ParseTimestamp(`TIMESTAMP`) ASC)) as TS LEFT JOIN
(select $ParseTimestamp(`TIMESTAMP`) AS Time_New, CAST(ROW_NUMBER() OVER w AS Uint16) + 1  AS rownum FROM @instance_creation_log WINDOW w AS (ORDER BY $ParseTimestamp(`TIMESTAMP`) ASC)) as NTS ON TS.rownum = NTS.rownum
where TS.Time < $vm_start_end_time
ORDER BY
    timediff DESC
"""
    FULL_INSTANCE_CREATION_LOG_QUERY = """
USE hahn;

$instance_id = "%INSTANCE_ID%";
$start_ts = "%START_TIMESTAMP%";

$GetDayTable = ($timestamp) -> {
    $parse_time_from_events = DateTime::Parse("%Y-%m-%d %H:%M:%S");
    $fmt = DateTime::Format("logs/yandexcloud-prod-log/1d/%Y-%m-%d");
    $ts = DateTime::MakeTimestamp($parse_time_from_events($timestamp));
    return $fmt($ts);
};

$table = $GetDayTable($start_ts);

$instance_creation_operations = (SELECT DISTINCT OPERATION_ID FROM $table WHERE (INSTANCE_ID = $instance_id or TARGET_ID = $instance_id) AND OPERATION_ID IS NOT NULL);

COMMIT;

INSERT INTO @instance_creation_log
SELECT * FROM $table WHERE INSTANCE_ID = $instance_id or TARGET_ID = $instance_id or OPERATION_ID IN $instance_creation_operations;

COMMIT;

$ParseTimestamp = ($timestamp) -> {
    return DateTime::MakeTimestamp(DateTime::ParseIso8601($timestamp));
};

$vm_start_end_time = (select $ParseTimestamp(`TIMESTAMP`) AS Time from @instance_creation_log where MESSAGE like "% Node task already completed. Skip pausing." and PATHNAME = "/usr/lib/yc/compute/lib/python3.5/site-packages/yc_compute/instances/common.py");

SELECT
    $ParseTimestamp(`TIMESTAMP`) AS Time,
    `UNIT`,
    MESSAGE,
    PATHNAME
FROM
    @instance_creation_log
WHERE $ParseTimestamp(`TIMESTAMP`) < $vm_start_end_time
ORDER BY
    Time ASC;
"""

    class Parameters(sdk2.Task.Parameters):
        instance_id = sdk2.parameters.String(
            "Identifier of instance to be analyzed",
            required=True
        )
        description = "Instance start analytics"
        with sdk2.parameters.Output:
            instance_creation_time = sdk2.parameters.String("Instance creation time")
            timediff_request_url = sdk2.parameters.Url(
                "URL to instance creation timediff reuest in YQL"
            )
            instance_creation_log_request_url = sdk2.parameters.Url(
                "URL to full instance creation log reuest in YQL"
            )


    class Context(sdk2.Context):
        timediff_task_id = None
        instance_creation_log_task_id = None
            

    class Requirements(sdk2.Task.Requirements):
        environments = (
            environments.PipEnvironment("yandex-yt"),
            environments.PipEnvironment("yandex-yt-yson-bindings"),
            environments.PipEnvironment("yandex-yt-yson-bindings-skynet"),
        )
        cores = 1
        ram = 1024

    def get_instance_creation_time(self, yt, instance_id):
        creation_times = yt.read_table(yt.TablePath(
            self.instance_creation_time_table, columns=["instance_id", "event_time"]))
        creation_time_record = list(filter(lambda r: r["instance_id"] == instance_id, creation_times))
        if len(creation_time_record) == 0:
            return None
        return creation_time_record[0]["event_time"]

    def create_yql_subtask(self, query=None, trace_query=True, owner="YC_COMPUTE", publish_query=True, use_v1_syntax=True, custom_placeholders=None, use_clickhouse_syntax=False):
        task_class = sdk2.Task["RUN_YQL_2"]
        sub_task = task_class(
            self, query=query, trace_query=trace_query, owner=owner, publish_query=publish_query,
            use_v1_syntax=use_v1_syntax, custom_placeholders=custom_placeholders,
            description='Child of task {}'.format(self.id),
            create_sub_task=False, publish_download_link=True,
            use_clickhouse_syntax=use_clickhouse_syntax
        )
        sub_task.Requirements.ram = 128
        sub_task.Requirements.cores = 1
        sub_task.Requirements.disk_space = 1
        sub_task.save().enqueue()
        return sub_task
    
    def await_subtasks(self, subtasks):
        raise sdk2.WaitTask(subtasks, ctt.Status.Group.FINISH, wait_all=True)
        for task in subtasks:
            logging.info("task: {}".format(task))
            logging.info("task.status: {}".format(task.status))
            if task.status not in ctt.Status.Group.SUCCEED:
                raise errors.SandboxTaskFailureError('Child task is failed.')

    def find_task(self, id):
        tasks = list(self.find(id=id))
        if len(tasks) == 0:
            raise errors.SandboxTaskFailureError('Task with id {} not found.'.format(id))
        if len(tasks) > 1:
            raise errors.SandboxTaskFailureError('Found more than one task with id {}.'.format(id))
        return tasks[0]

    def publish_yql_request_url_to_params(self, task, param):
        share_id = task._get_share_id(task.Parameters.result_operation_id)
        url = "{yql_webui_base_url}/Operations/{operation_id}".format(
            yql_webui_base_url=task.YQL_WEBUI_BASE_URL,
            operation_id=share_id
        )
        param = setattr(self.Parameters, param, url)

    def add_link(self, title, task):
        share_id = task._get_share_id(task.Parameters.result_operation_id)
        url = "{yql_webui_base_url}/Operations/{operation_id}".format(
            yql_webui_base_url=task.YQL_WEBUI_BASE_URL,
            operation_id=share_id
        )
        message = "{title}: <a href=\"{operation_url}\">{operation_url}</a>".format(
            title=title,
            operation_url=url
        )
        self.set_info(message, do_escape=False)


    def on_execute(self):
        import yt.wrapper as yt
        yt.config['token'] = sdk2.Vault.data("YC_COMPUTE", "YT_TOKEN")
        yt.config['proxy']['url'] = 'hahn'

        instance_id = self.Parameters.instance_id
        self.instance_creation_time_table = "{}/{}".format(self.YT_PREFIX, self.INSTANCE_CREATION_TIME_TABLE_NAME)


        instance_creation_time = self.get_instance_creation_time(yt, instance_id)
        with self.memoize_stage.lookup_date:
            if instance_creation_time is None:
                task = self.create_yql_subtask(
                    query=self.DATE_LOOKUP_QUERY,
                    custom_placeholders={
                        "%INSTANCE_CREATION_TIME_TABLE%": self.instance_creation_time_table,
                        "%INSTANCE_ID%": instance_id
                    },
                    use_clickhouse_syntax=True
                )
                self.await_subtasks([task])
                instance_creation_time = self.get_instance_creation_time(yt, instance_id)
        if instance_creation_time is None:
            raise common.errors.TaskError("Instance with id {} is not found in logs/yc-events-prod table."
                "Are instance logs already there? Or bay be more than year has passed?".format(instance_id))
        logging.info("Instance creation_time = {}".format(instance_creation_time))
        self.Parameters.instance_creation_time = instance_creation_time
        with self.memoize_stage.get_instance_analytics:
            timediff_task = self.create_yql_subtask(
                query=self.TIME_DIFF_QUERY,
                custom_placeholders={
                    "%INSTANCE_ID%": instance_id,
                    "%START_TIMESTAMP%": instance_creation_time
                }
            )
            self.Context.timediff_task_id = timediff_task.id
            full_instance_creation_log_task = self.create_yql_subtask(
                query=self.FULL_INSTANCE_CREATION_LOG_QUERY,
                custom_placeholders={
                    "%INSTANCE_ID%": instance_id,
                    "%START_TIMESTAMP%": instance_creation_time
                }
            )
            self.Context.instance_creation_log_task_id = full_instance_creation_log_task.id

            self.await_subtasks([timediff_task,full_instance_creation_log_task])


        timediff_task = self.find_task(self.Context.timediff_task_id)
        full_instance_creation_log_task = self.find_task(self.Context.instance_creation_log_task_id)

        self.publish_yql_request_url_to_params(timediff_task, "timediff_request_url")
        self.publish_yql_request_url_to_params(full_instance_creation_log_task, "instance_creation_log_request_url")
        self.add_link("Timediff analytics URL", timediff_task)
        self.add_link("Full instance log creation URL", full_instance_creation_log_task)

