from sandbox import sdk2

import logging

from os.path import join as pj
import os
import re
import solomon_util

from sandbox.sdk2.vcs.svn import Arcadia
from sandbox.sandboxsdk import environments
from sandbox.projects.common.arcadia import sdk as arcadia_sdk

import random
import string

import datetime
from datetime import timedelta
import time
import sys
import json


class UsDataSizeGraphs(sdk2.Task):
    class Requirements(sdk2.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt', version='0.10.8'),
            environments.PipEnvironment('requests'),
            environments.PipEnvironment('networkx', version='2.2', use_wheel=True),
            environments.PipEnvironment('future'),
        ]

    class Parameters(sdk2.Parameters):
        kill_timeout = 3600
        with sdk2.parameters.Group("Solomon token") as solomon_token_block:
            solomon_token_secret_owner = sdk2.parameters.String("Owner of sb-vault-secret with solomon token", required=True, default="USERSESSIONSTOOLS")
            solomon_token_secret_name = sdk2.parameters.String("Name of sb-vault-secret with solomon token", required=True, default="robot_make_sessions_solomon_token")
        with sdk2.parameters.Group("Common setting") as setting_block:
            datetime_str = sdk2.parameters.String("datetime_str. Datetime in format <YYYY-MM-DD>. Or 'yesterday' or number of delta days", default="yesterday")
            first_datetime_str = sdk2.parameters.String("first_datime_str. First in diapason. Datetime in format <YYYY-MM-DD>. If it's set, then datetime_str is ignored and last_datetime_str is required. And diapason will be handled backwards", default="")
            last_datetime_str = sdk2.parameters.String("last_datime_str. Last in diapason. Datetime in format <YYYY-MM-DD>. If it's set, then datetime_str is ignored and first_datetime_str is required. And diapason will be handled backwards", default="")
            filter_logs = sdk2.parameters.String("Filter log names, comma-separated")
            filter_sessions = sdk2.parameters.String("Filter sessions names, comma-separated")
            filter_tables = sdk2.parameters.String("Filter sessions table names, comma-separated")

        with sdk2.parameters.Group("Features to calc") as features_block:
            calc_logs = sdk2.parameters.Bool("Process source //logs", default=True)
            calc_build = sdk2.parameters.Bool("Process intermediate //user_sessions/build", default=True)
            calc_pub = sdk2.parameters.Bool("Process published //user_sessions/pub", default=True)
            calc_pub_nano = sdk2.parameters.Bool("Process published //user_sessions/pub/nano_sessions", default=True)
            check_published = sdk2.parameters.Bool("Check publish events", default=True)
            calc_column_size = sdk2.parameters.Bool("Calculate table columns sizes", default=True)

        with sdk2.parameters.Group("Reactor token") as reactor_token_block:
            reactor_token_secret_owner = sdk2.parameters.String("Owner of sb-vault-secret with reactor token", required=True)  # on practice: USERSESSIONSTOOLS. But do not specify default here for safety
            reactor_token_secret_name = sdk2.parameters.String("Name of sb-vault-secret with reactor token", required=True)  # on practice: robot-make-sessions-reactor-token (TODO: create secret). But do not specify default here for safety

        with sdk2.parameters.Group("Yt token") as yt_token_block:
            yt_token_secret_owner = sdk2.parameters.String("Owner of sb-vault-secret with yt token", required=True)  # on practice: USERSESSIONSTOOLS. But do not specify default here for safety
            yt_token_secret_name = sdk2.parameters.String("Name of sb-vault-secret with yt token", required=True)  # on practice: robot-make-sessions-yt-token (TODO: create secret). But do not specify default here for safety

        with sdk2.parameters.Group("Debug") as debug:
            is_dry = sdk2.parameters.Bool("Dry run?", default=False)

    class Context(sdk2.Context):
        first_time = True
        task_autoretries_count = 0

    def GetSolomonToken(self):
        secret_owner = self.Parameters.solomon_token_secret_owner
        secret_name = self.Parameters.solomon_token_secret_name
        token = sdk2.Vault.data(secret_owner, secret_name)
        return token

    def GetReactorTokenPath(self):
        secret_content = self.GetReactorToken()
        reactor_token_path = pj(str(self.path()), 'reactor_token_file')
        os.system('echo {} > {}'.format(secret_content, reactor_token_path))

        return reactor_token_path

    def CheckoutArcadiaSubfolder(self, arcadia_subfolder, arcadia_src_dir, svn_url, use_cache=True):
        pos = svn_url.rfind('@')

        if pos != -1:
            dir_url = pj(svn_url[:pos], arcadia_subfolder) + svn_url[pos:]
        else:
            dir_url = pj(svn_url, arcadia_subfolder)

        arcadia_subfolder_local_abs_path = pj(arcadia_src_dir, arcadia_subfolder)

        if use_cache:
            with arcadia_sdk.mount_arc_path(dir_url, use_arc_instead_of_aapi=True) as p:
                sdk2.paths.copy_path(str(p), arcadia_subfolder_local_abs_path)
        else:
            sdk2.svn.Arcadia.checkout(dir_url, arcadia_subfolder_local_abs_path)

        return arcadia_subfolder_local_abs_path

    def PrepareArcadia(self):
        svn_url = Arcadia.ARCADIA_TRUNK_URL

        arcadia_src_dir = pj(str(self.path()), 'local_arcadia_' + \
            ''.join(random.SystemRandom(time.time()).choice(
                string.ascii_lowercase + string.ascii_uppercase + string.digits
            ) for _ in range(20)))

        reactor_dir  = self.CheckoutArcadiaSubfolder('quality/user_sessions/reactor', arcadia_src_dir, svn_url=svn_url)
        sys.path.append(reactor_dir)

        rem_client_dir = self.CheckoutArcadiaSubfolder('rem/client', arcadia_src_dir, svn_url=svn_url)
        sys.path.append(rem_client_dir)

    class ArtifactInstanceInfoAccumulator(object):
        def __init__(self, reactor_client):
            self.reactor_client = reactor_client
            self.user_timestamp_namespace_path_to_instances = {}

        def has_instance_with_target_usertime(self, namespace_path, user_timestamp):
            if user_timestamp not in self.user_timestamp_namespace_path_to_instances:
                self.user_timestamp_namespace_path_to_instances[user_timestamp] = {}

            if namespace_path not in self.user_timestamp_namespace_path_to_instances[user_timestamp]:
                logging.info("checking artifact for {}".format(namespace_path))
                art_insts = self.reactor_client.get_artifact_range(namespace=namespace_path, limit=100, statuses=["CREATED", "ACTIVE"],
                                                                   from_user_ts=user_timestamp, to_user_ts=user_timestamp + 1)["range"]
                if len(art_insts) == 0:
                    self.user_timestamp_namespace_path_to_instances[user_timestamp][namespace_path] = None
                else:
                    self.user_timestamp_namespace_path_to_instances[user_timestamp][namespace_path] = art_insts

            if not self.user_timestamp_namespace_path_to_instances[user_timestamp][namespace_path]:
                return False
            else:
                return True

    def GetReactorToken(self):
        secret_owner = self.Parameters.reactor_token_secret_owner
        secret_name = self.Parameters.reactor_token_secret_name
        token = sdk2.Vault.data(secret_owner, secret_name)
        return token

    def IsArtifactInstantiated(self, artifact_path, datetime_dt, artifact_instance_info_accumulator):
        from us_processes.time_util import _convert_to_unixtime

        if self.IsTooOldDatetime(datetime_dt):
            return True

        usertime_ts = _convert_to_unixtime(datetime_dt)

        if not artifact_instance_info_accumulator.has_instance_with_target_usertime(artifact_path, usertime_ts):
            return False
        else:
            return True

    def HandleAndInitializeDatetime(self):
        from us_processes.time_util import datetime_round

        self.Context.datetime_strs = []
        if self.Parameters.first_datetime_str != "" or self.Parameters.last_datetime_str != "":
            if self.Parameters.first_datetime_str == "" or self.Parameters.last_datetime_str == "":
                raise Exception("Only one bound of datetime diapason is set! Zero or two is required!")
            first_datetime_dt = datetime.datetime.strptime(self.Parameters.first_datetime_str, "%Y-%m-%d")
            last_datetime_dt = datetime.datetime.strptime(self.Parameters.last_datetime_str, "%Y-%m-%d")
            day_delta = datetime.timedelta(days=1)
            while first_datetime_dt <= last_datetime_dt:
                self.Context.datetime_strs.append(datetime.datetime.strftime(last_datetime_dt, "%Y-%m-%d"))
                last_datetime_dt = last_datetime_dt - day_delta
        else:
            if self.Parameters.datetime_str == "yesterday":
                datetime_dt = datetime_round(datetime.datetime.now(), datetime.timedelta(days=1)) - datetime.timedelta(days=1)
            elif re.match(r'\d+', self.Parameters.datetime_str):
                datetime_dt = datetime_round(datetime.datetime.now(), datetime.timedelta(days=1)) - datetime.timedelta(days=int(self.Parameters.datetime_str))
            else:
                datetime_dt = datetime.datetime.strptime(self.Parameters.datetime_str, "%Y-%m-%d")
            self.Context.datetime_strs.append(datetime.datetime.strftime(datetime_dt, "%Y-%m-%d"))

    YT_TABLE_FULL_PATH_DT_TEMPLATE = "yt_table_full_path_dt_template"
    DATETIME_MASK = "datetime_mask"
    SENSORS = "sensors"
    DATETIME_DAYS_DELAY = "datetime_days_delay"
    REACTOR_ARTIFACT_OR_REM_TAG = "reactor_artifact_or_rem_tag"
    IS_REACTOR = "is_reactor"
    COLUMN_SIZE = "column_size"

    @staticmethod
    def get_draw_task_json(yt_path_dt_template, mask, sensors, datetime_days_delay, reactor_artifact_or_rem_tag, is_reactor, column_size):
        return {
            UsDataSizeGraphs.YT_TABLE_FULL_PATH_DT_TEMPLATE: yt_path_dt_template,
            UsDataSizeGraphs.DATETIME_MASK: mask,
            UsDataSizeGraphs.SENSORS: sensors,
            UsDataSizeGraphs.DATETIME_DAYS_DELAY: datetime_days_delay,
            UsDataSizeGraphs.REACTOR_ARTIFACT_OR_REM_TAG: reactor_artifact_or_rem_tag,
            UsDataSizeGraphs.IS_REACTOR: is_reactor,
            UsDataSizeGraphs.COLUMN_SIZE: column_size,
        }

    @staticmethod
    def get_yt_table_full_path(drawing_task, datetime_dt):
        return drawing_task[UsDataSizeGraphs.YT_TABLE_FULL_PATH_DT_TEMPLATE].format(dt=datetime.datetime.strftime(datetime_dt, drawing_task[UsDataSizeGraphs.DATETIME_MASK]))

    class EventInfo(object):
        def __init__(self, reac_art_or_rem_tag_dt_template, is_reactor):
            self.reac_art_or_rem_tag_dt_template = reac_art_or_rem_tag_dt_template
            self.is_reactor = is_reactor

    def get_lf_artifact(self, log_name):
        return "/logfeller/hahn/logs/{log_name}/1d".format(log_name=log_name)

    sess_from_rem = [
        "translate",
        "market",
        "market_clicks",
        "market_clicks_orders_billed",
        "market_recommend",
        "market_blue",
        "market_recommend_click_activity",
        "similargroup",
    ]

    def get_sess_event_info(self, sess_type):
        if sess_type in UsDataSizeGraphs.sess_from_rem:
            if sess_type == "market_blue":
                tag_body = "create_market_blue_tables"
            elif sess_type == "market_recommend_click_activity":
                tag_body = "create_recommend_click_activity"
            else:
                tag_body = "publish_{name}_sessions".format(name=sess_type)

            return self.EventInfo("cluster=hahn_{body}_{date}".format(body=tag_body, date="{dt}"), is_reactor=False)
        else:
            return self.EventInfo("/user_sessions/hahn/pub/{name}/1d".format(name=sess_type), is_reactor=True)

    logfeller_logs = [
        "bs-watch-log",
        "bs-chevent-log",
        "bar-navig-log",
        "common-failure-event-log",
        "common-redir-log",
        "search-proto-reqans-log",
        "search-redir-log",
        "search-redir-tech-log",
        "search-web-blockstat-log",
        "images-blockstat-log",
        "images-redir-log",
        "images-redir-tech-log",
        "news-blockstat-log",
        "search-proto-reqans-log",
        "search-proto-reqans-misc-log",
        "report-web-search-reqans-event-log",
        "turbo-blockstat-log",
        "video-blockstat-log",
        "video-redir-log",
        "video-redir-tech-log",
        "yandex-access-log",
    ]

    sessions_type_to_tables = {
        "search": ["clean", "tech", "frauds", "robots", "outstaff", "yandex_staff", "yandex_robots", "errors"],
        "images": ["clean", "tech", "frauds", "robots"],
        "video": ["clean", "tech", "frauds", "robots"],
        "watch_log_tskv": ["clean", "frauds", "outstaff", "yandex_staff", "errors"],
        "spy_log": ["clean", "frauds", "robots", "outstaff", "yandex_staff", "errors"],
        "direct_urls": ["clean", "frauds", "robots", "errors"],
        "bs-dsp-log": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "market": ["clean", "frauds", "robots", "yandex_staff", "yandex_robots", "errors"],
        "sbapi_lookup_access_log": ["clean", "frauds", "robots", "errors"],
        "similargroup": ["clean", "frauds", "robots", "yandex_staff", "errors"],
        "similargroup_mobile": ["clean", "frauds", "robots", "errors"],
        "chats_with_businesses-scarab-log": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "collections-redir-log": ["clean", "yandex_staff", "errors"],
        "common-failure-event-log": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "market_blue": ["clean"],
        "market_clicks": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "market_recommend": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "market_recommend_click_activity": ["clean"],
        "metrika_mobile_log": ["clean", "yandex_staff", "errors"],
        "recommender-reqans-log": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "renderer-profile-event-log": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "shinypages": ["clean", "errors"],
        "translate": ["clean", "yandex_staff", "yandex_robots", "errors"],
        "ugc-db-update-scarab-log": ["clean", "yandex_staff", "yandex_robots", "errors"],
    }

    sessions_type_to_days_delay = {
        "market_clicks_orders_billed": 7,
        "translate": 1,
        "market": 1,
        "market_blue": 1,
        "direct_urls": 1,
    }

    search_raw_sessions_to_tables = {
        "bs-chevent-log": ["sessions"],
        "common-redir-log": ["sessions", "tech", "errors"],
        "morda-access-log": ["sessions"],
        "morda-blockstat-log": ["sessions"],
        "report-web-search-reqans-event-log": ["sessions", "errors", "yandex_robots"],
        "yandex-access-log": ["sessions", "errors"],
    }

    nano_sessions_services = [
        "web",
        "images",
        "cbir",
        "video",
    ]

    def get_tasks_list(self):
        res = []

        if not self.Parameters.calc_pub:
            return res

        filter_sessions = map(unicode.strip, self.Parameters.filter_sessions.split(','))
        filter_tables = map(unicode.strip, self.Parameters.filter_tables.split(','))

        for sess_type, table_names in UsDataSizeGraphs.sessions_type_to_tables.iteritems():
            if filter_sessions and sess_type not in filter_sessions:
                continue

            event_info = self.get_sess_event_info(sess_type)
            for table_name in table_names:
                if filter_tables and table_name not in filter_tables:
                    continue

                if table_name == "yandex_staff":
                    yt_path_dt_template = "//user_sessions/pub/yandex_staff/{sess_type}/daily/{dt}".format(sess_type=sess_type, dt = "{dt}")
                else:
                    yt_path_dt_template = "//user_sessions/pub/{sess_type}/daily/{dt}/{table_name}".format(sess_type=sess_type, dt = "{dt}", table_name=table_name)

                res.append(UsDataSizeGraphs.get_draw_task_json(yt_path_dt_template,
                                                               "%Y-%m-%d",
                                                               {"kind": "pub_sesions", "name": sess_type, "subname": table_name},
                                                               UsDataSizeGraphs.sessions_type_to_days_delay.get(sess_type, 0),
                                                               event_info.reac_art_or_rem_tag_dt_template,
                                                               event_info.is_reactor,
                                                               column_size=self.Parameters.calc_column_size))

        if not self.Parameters.calc_pub_nano:
            return res

        for nano_service in UsDataSizeGraphs.nano_sessions_services:
            if filter_sessions and nano_service not in filter_sessions:
                continue

            event_info = self.get_sess_event_info("nano")
            yt_path_dt_template = "//user_sessions/pub/nano_sessions/daily/{dt}/{nano_service}/clean".format(dt = "{dt}", nano_service=nano_service)
            res.append(UsDataSizeGraphs.get_draw_task_json(yt_path_dt_template,
                                                           "%Y-%m-%d",
                                                           {"kind": "pub_sesions", "name": "nano", "subname": nano_service + "_clean"},
                                                           0,
                                                           event_info.reac_art_or_rem_tag_dt_template,
                                                           event_info.is_reactor,
                                                           column_size=self.Parameters.calc_column_size))

        return res

    def get_tasks_list_for_build(self):
        res = []

        if not self.Parameters.calc_build:
            return res

        filter_logs = map(unicode.strip, self.Parameters.filter_logs.split(','))
        filter_tables = map(unicode.strip, self.Parameters.filter_tables.split(','))

        for search_raw_sessions, table_names in UsDataSizeGraphs.search_raw_sessions_to_tables.iteritems():
            if filter_logs and search_raw_sessions not in filter_logs:
                continue

            event_info = self.get_sess_event_info("search")
            for table_name in table_names:
                if filter_tables and table_name not in filter_tables:
                    continue

                yt_path_dt_template = "//user_sessions/build/logs/{raw_sess}/1d/{dt}/{table_name}".format(raw_sess=search_raw_sessions,
                                                                                                          dt="{dt}",
                                                                                                          table_name=table_name)
                res.append(UsDataSizeGraphs.get_draw_task_json(yt_path_dt_template,
                                                               "%Y-%m-%d",
                                                               {"kind": "build_logs_sesions", "name": search_raw_sessions, "subname": table_name},
                                                               0,
                                                               event_info.reac_art_or_rem_tag_dt_template,
                                                               event_info.is_reactor,
                                                               column_size=False))
        return res

    def get_tasks_list_for_lf(self):
        res = []

        if not self.Parameters.calc_logs:
            return res

        filter_logs = map(unicode.strip, self.Parameters.filter_logs.split(','))

        for lf_log in UsDataSizeGraphs.logfeller_logs:
            if filter_logs and lf_log not in filter_logs:
                continue

            yt_path_dt_template = "//logs/{log_name}/1d/{dt}".format(log_name=lf_log, dt = "{dt}")
            res.append(UsDataSizeGraphs.get_draw_task_json(yt_path_dt_template,
                                                           "%Y-%m-%d",
                                                           {"kind": "logs", "name": lf_log, "subname": "log"},
                                                           0,
                                                           self.get_lf_artifact(lf_log),
                                                           is_reactor=True,
                                                           column_size=False))

        return res

    def InitializeContextTasksDict(self):
        from collections import defaultdict

        self.Context.datetime_to_draw_tasks = defaultdict(list)
        tasks_list = self.get_tasks_list()
        tasks_list_for_build = self.get_tasks_list_for_build()
        tasks_list_for_lf = self.get_tasks_list_for_lf()
        for datetime_str in self.Context.datetime_strs:
            # no hard limits for //user_sessions/pub
            self.Context.datetime_to_draw_tasks[datetime_str] = tasks_list[:]

            # set hard limits to //user_sessions/build
            if datetime.datetime.strptime(datetime_str, "%Y-%m-%d") > (datetime.datetime.now() - datetime.timedelta(days=5)):
                self.Context.datetime_to_draw_tasks[datetime_str] += tasks_list_for_build[:]
            # set hard limits to //logs
            if datetime.datetime.strptime(datetime_str, "%Y-%m-%d") > (datetime.datetime.now() - datetime.timedelta(days=100)):
                self.Context.datetime_to_draw_tasks[datetime_str] += tasks_list_for_lf[:]

    def raise_for_errors_if_needed(self, errors, no_art_inst_errors):
        if len(no_art_inst_errors) > 0:
            return

        fail_msg = ""
        for error in errors:
            fail_msg += error + "\n"
        if fail_msg:
            if self.Context.task_autoretries_count > 3:
                raise Exception("\n" + fail_msg)
            else:
                self.Context.task_autoretries_count += 1

    def get_rem_connector(self):
        import remclient

        conn_retries = 3
        return remclient.Connector(
                "http://veles02.search.yandex.net:8103",
                conn_retries=conn_retries,
        )

    def IsTooOldDatetime(self, datetime_dt):
        return datetime_dt < datetime.datetime.now() - datetime.timedelta(days=10)

    def IsRemTagSet(self, rem_tag, datetime_dt):
        if self.IsTooOldDatetime(datetime_dt):
            return True
        else:
            rem_tag_obj = self.get_rem_connector().Tag(rem_tag)
            for try_ind in xrange(2):
                try:
                    return rem_tag_obj.Check()
                except Exception as e:
                    logging.info("Checking rem tag failed on rem_tag {} and datetime_dt {}. Exception: {}".format(rem_tag, datetime_dt, e))
            raise Exception("Failed to check rem tag")

    def pub_events_happened(self, drawing_task, datetime_dt, artifact_instance_info_accumulator):
        if drawing_task[UsDataSizeGraphs.IS_REACTOR]:
            return self.IsArtifactInstantiated(drawing_task[UsDataSizeGraphs.REACTOR_ARTIFACT_OR_REM_TAG], datetime_dt, artifact_instance_info_accumulator)
        else:
            return self.IsRemTagSet(drawing_task[UsDataSizeGraphs.REACTOR_ARTIFACT_OR_REM_TAG].format(dt=datetime.datetime.strftime(datetime_dt, "%Y%m%d")), datetime_dt=datetime_dt)

    def ExecuteDrawTasksAndGetNotExecuted(self, datetime_dt, draw_tasks, err_accumulator, no_art_inst_err_accumulator, artifact_instance_info_accumulator):
        import yt.wrapper as yt
        from us_processes.time_util import _convert_to_unixtime

        def _get_metric_sensors(metric_name, y_unit, corrected_datetime_dt, value, extra_labels=None):
            ts = _convert_to_unixtime(corrected_datetime_dt)
            labels = {"y-unit": y_unit, "metric": metric_name}
            if extra_labels is not None:
                labels.update(extra_labels)
            return {"labels": labels, "ts": ts, "value": value}

        left_tasks = []
        for drawing_task_ind, drawing_task in enumerate(draw_tasks):
            left_tasks.append(drawing_task)  # in case of success we'll pop it

            try:
                corrected_datetime_dt = datetime_dt - datetime.timedelta(days=drawing_task[UsDataSizeGraphs.DATETIME_DAYS_DELAY])

                if self.Parameters.check_published:
                    try:
                        published = self.pub_events_happened(drawing_task, corrected_datetime_dt, artifact_instance_info_accumulator)
                    except Exception as e:
                        published = None
                        err_accumulator.append("Reactor (or REM) failed on; error: {}".format(str(e)))
                else:
                    published = True

                yt_full_path = UsDataSizeGraphs.get_yt_table_full_path(drawing_task, corrected_datetime_dt)

                if published == True:
                    logging.info("Checking if exists: {}".format(yt_full_path))
                    absent_table_is_error = (published == True)
                    if yt.exists(yt_full_path):
                        for table in yt.search(yt_full_path,
                                               node_type="table",
                                               attributes=["uncompressed_data_size",
                                                           "row_count",
                                                           "schema",
                                                           "compression_ratio",
                                                           "data_weight",
                                                           "resource_usage"]):

                            unc_size = float(table.attributes["uncompressed_data_size"])
                            row_count = float(table.attributes["row_count"])
                            avg_row_size_kb = unc_size / row_count / 1024 if row_count != 0 else 0
                            unc_size_tb = unc_size / 1024**4
                            compression_ratio = float(table.attributes["compression_ratio"])
                            data_weight = float(table.attributes['resource_usage']['disk_space'])
                            data_weight_tb = data_weight / 1024**4

                            data_sensors_list = []
                            data_sensors_list.append(_get_metric_sensors("row_count", "rows", corrected_datetime_dt, row_count))
                            data_sensors_list.append(_get_metric_sensors("uncompr_size", "tb", corrected_datetime_dt, unc_size_tb))
                            data_sensors_list.append(_get_metric_sensors("data_weight", "tb", corrected_datetime_dt, data_weight_tb))
                            data_sensors_list.append(_get_metric_sensors("avg_row_uncompr_size", "kb", corrected_datetime_dt, avg_row_size_kb))
                            data_sensors_list.append(_get_metric_sensors("compression_ratio", "", corrected_datetime_dt, compression_ratio))

                            if drawing_task[UsDataSizeGraphs.COLUMN_SIZE] and (str(table).endswith('/clean') or drawing_task[UsDataSizeGraphs.SENSORS].get('subname', '') == 'log'):
                                columns = [attr['name'] for attr in table.attributes["schema"]] + ['key', 'subkey', 'value']
                                for column in set(columns):
                                    paths = [str(table) + '{' + column + '}']
                                    weight = None
                                    try:
                                        raw_stat = yt.transaction_commands._make_transactional_request("get_table_columnar_statistics", {"paths": paths})
                                        stat = json.loads(raw_stat)
                                        weight = float(stat[0]["column_data_weights"][column]) / 1024**2
                                    except Exception as e:
                                        logging.info("Exception happened in get column {} statistics: {}".format(column, e))
                                    else:
                                        logging.debug(paths[0] + " data weight bytes = " + str(weight))
                                        data_sensors_list.append(_get_metric_sensors("column_data_weight",
                                                                                     "mb",
                                                                                     corrected_datetime_dt,
                                                                                     weight,
                                                                                     extra_labels={"column": column}))

                            is_success = solomon_util.send_to_solomon(cluster="hahn",
                                                                      common_sensors_dict=drawing_task[UsDataSizeGraphs.SENSORS],
                                                                      data_sensors_list=data_sensors_list,
                                                                      solomon_token=self.GetSolomonToken(),
                                                                      dry=self.Parameters.is_dry)
                            if is_success:
                                left_tasks.pop()
                            else:
                                err_accumulator.append("Solomon failed on {}".format(yt_full_path))
                    else:
                        if absent_table_is_error:
                            err_accumulator.append("{} is absent!".format(yt_full_path))
                elif published == False:
                    no_art_inst_err_accumulator.append("No publish event for datetime {} for artifact/rem-tag {}".format(
                                                        corrected_datetime_dt, drawing_task[UsDataSizeGraphs.REACTOR_ARTIFACT_OR_REM_TAG]))
                    logging.info("NOT Checking if exists: {} (because no publish event)".format(yt_full_path))
            except Exception as e:
                logging.info("Exception happened: {}".format(e))
                left_tasks += draw_tasks[drawing_task_ind + 1:]  # +1 because we add current drawing_task into list in the very beginning of iteration
                return (left_tasks, e)

        return left_tasks, None

    def some_tasks_are_left(self):
        for dt, draw_tasks_arr in self.Context.datetime_to_draw_tasks.iteritems():
            if len(draw_tasks_arr) > 0:
                return True
        return False

    def on_execute(self):
        logging.info('UsDataSizeGraphsTask: Start')

        self.PrepareArcadia()

        if self.Context.first_time:
            self.HandleAndInitializeDatetime()
            self.InitializeContextTasksDict()
            self.Context.first_time = False

        ########################
        # reactor preparation
        from us_reactor.lib.client import ReactorAPIClient
        reactor_client = ReactorAPIClient("reactor.yandex-team.ru", self.GetReactorToken())
        artifact_instance_info_accumulator = UsDataSizeGraphs.ArtifactInstanceInfoAccumulator(reactor_client)
        ########################
        # yt preparation
        import yt.wrapper as yt
        yt.config["proxy"]["url"] = "hahn"
        secret_owner = self.Parameters.yt_token_secret_owner
        secret_name = self.Parameters.yt_token_secret_name
        yt_token = sdk2.Vault.data(secret_owner, secret_name)
        yt.config["token"] = yt_token
        ########################

        err_accumulator = []
        no_art_inst_err_accumulator = []

        for datetime_str in self.Context.datetime_strs:
            datetime_dt = datetime.datetime.strptime(datetime_str, "%Y-%m-%d")

            draw_tasks = self.Context.datetime_to_draw_tasks[datetime_str]
            self.Context.datetime_to_draw_tasks[datetime_str], exc = self.ExecuteDrawTasksAndGetNotExecuted(datetime_dt, draw_tasks, err_accumulator=err_accumulator,
                                                                                                       no_art_inst_err_accumulator=no_art_inst_err_accumulator,
                                                                                                       artifact_instance_info_accumulator=artifact_instance_info_accumulator)
            if exc is not None:
                raise exc

        self.raise_for_errors_if_needed(errors=err_accumulator, no_art_inst_errors=no_art_inst_err_accumulator)

        if not self.some_tasks_are_left():
            return
        else:
            raise sdk2.WaitTime(15 * 60)
