import logging
import textwrap

import sandbox.sandboxsdk.environments as environments
from sandbox import sdk2
from sandbox import common

YT_SECRET = 'WEATHER-ADMIN', 'yt_yql_token'


def gen_ts_aurora_query(into, path, ranges):
    q_text = textwrap.dedent("""\
        use arnold;
        pragma yt.Pool = 'meteo-experiment';
        $pathre = Re2::Capture(@@(\\d+)$@@);
        $extract_ts = ($str) ->{RETURN CAST(CAST($pathre($str)._1 As Int64) As Datetime)};
    """)
    for start, end in ranges:
        q_forrange = textwrap.dedent("""\
            insert into [{into}]
            select $extract_ts(TablePath()) as ts, TableRecord() as id, key, value
            from RANGE([{path}], [{start}], [{end}]);
        """).format(into=into, path=path, start=start, end=end)
        q_text += q_forrange
    q_name = "{}: ".format(path) + " ,".join(("{}-{}".format(r[0], r[1]) for r in ranges))
    return [(q_name, q_text)]


def gen_parse_query(into, sources):
    sources_joined = ", ".join(["[{}]".format(table) for table in sources])
    q_text = textwrap.dedent("""\
        use arnold;
        pragma yt.DataSizePerJob = '100M';
        pragma yt.Pool = 'meteo-experiment';

        $v = Yson::ParseJson(value);
        $dt = ($yson) -> {{RETURN Yson::ConvertToString($yson.dt)}};
        $tm = ($yson) -> {{RETURN Yson::ConvertToString($yson.tm)}};

        $data_rows = (
            select
                ts as table_ts, id,
                CAST(DateTime::TimestampFromStringFormat($dt($v.gentime) || " " || $tm($v.gentime), "%Y-%m-%d %H:%M:%S") AS Datetime) as gentime,
                CAST(DateTime::TimestampFromStringFormat($dt($v.time) || " " || $tm($v.time), "%Y-%m-%d %H:%M:%S") AS Datetime) as time,
                Yson::ConvertToList($v.data) as data,
                Yson::ConvertToBool($v.time.fact) as time_fact,
                Yson::ConvertToBool($v.time.now) as time_now,
                Yson::ConvertToBool($v.gentime.fact) as gentime_fact,
                Yson::ConvertToBool($v.gentime.now) as gentime_now
            from
                (
                     select *
                     from CONCAT({sources})
                )
        );

        insert into [{into}]
        select * from (
            select
                table_ts, id, gentime, time, time_fact, time_now, gentime_fact, gentime_now,
                Yson::ConvertToDouble(data.0) as lat,
                Yson::ConvertToDouble(data.1) as lon,
                Yson::ConvertToString(data.2) as colour
            from $data_rows
            flatten by data
        )
        where colour != '#000000'
        order by gentime, time, lat, lon
    """.format(into=into, sources=sources_joined))
    return q_text


def get_query_result(request, query_name):
    res = request.get_results()
    if res.is_success:
        logging.info("Query `{}' was successful".format(query_name))
        return True
    else:
        logging.info("Query `{}' produced errors:".format(query_name))
        if res.errors:
            for error in res.errors:
                logging.info(' - ' + str(error))
        return False


class WeatherRemapGismeteoNowcast(sdk2.Task):
    class Requirements(sdk2.Task.Requirements):
        environments = [
            environments.PipEnvironment('yandex-yt', use_wheel=True),
            environments.PipEnvironment('yql', use_wheel=True),
        ]

    class Parameters(sdk2.Task.Parameters):
        # common parameters
        kill_timeout = 60 * 60 * 24     # parsing is very slow

        # custom parameters
        # name = sdk2.parameters.String("Your name", default="Anonymous", required=True)

    def on_execute(self):
        import yt.wrapper as yt
        from yql.api.v1.client import YqlClient

        yt_token = yql_token = sdk2.Vault.data(*YT_SECRET)
        yt.config['proxy']['url'] = 'arnold'
        yt.config['token'] = yt_token
        yt.config['pool'] = 'meteo-experiment'
        client = YqlClient(token=yql_token)

        # remap single-timestamp tables to one big table (for each region p1 / p2 / p3)
        logging.info("Started remapping")

        delete_after, all_successful = True, True
        to_delete = []
        executed_queries = []
        remapped_tables = []
        for aurora_part in range(1, 4):
            aurora_path = '//home/meteo/aurora/gismeteo-nowcast_p' + str(aurora_part)
            remapped_destination = '//home/meteo/aurora/gismeteo-nowcast-remapped-p' + str(aurora_part)
            remapped_tables.append(remapped_destination)
            # aurora_tses = [x for x in sorted(yt.list(aurora_path, absolute=False)) if x <= "1527772077"]  # is useful if you run parts of the script independently
            aurora_tses = [x for x in sorted(yt.list(aurora_path, absolute=False)) if x is not None]

            # set schemas for all single-timestamp tables
            logging.info("Setting schemas for tables in {}".format(aurora_path))
            for table in aurora_tses:
                yt.set(
                    aurora_path + '/' + table + '/@_read_schema',
                    '<strict=%false>[{name="key";type="string";};{name="value";type="string";}]',
                    format="yson"
                )
                to_delete.append(aurora_path + '/' + table)

            # calculate ranges for parallel processing
            range_start = None
            ts_ranges = []
            for i, ts in enumerate(aurora_tses):
                if range_start is None:
                    range_start = ts
                if i % 1000 == 999:
                    ts_ranges.append((range_start, ts))
                    range_start = None
            if range_start is not None:
                ts_ranges.append((range_start, ts))

            # start remap queries
            for q_name, q_text in gen_ts_aurora_query(remapped_destination, aurora_path, ts_ranges):
                logging.info("Planned query `{}'".format(q_name))
                req = client.query(q_text)
                req.run()
                executed_queries.append((q_name, req))  # let's do it in parallel
        # collect results of queries
        for q_name, req in executed_queries:
            # blocks while the request is PENDING/RUNNING
            success = get_query_result(req, q_name)
            all_successful = all_successful and success    # don't delete any tables if there are any YQL errors

        # delete single-timestamp tables, if needed
        logging.info("Finished remapping, all_successful = {}, delete_after = {}".format(all_successful, delete_after))
        if delete_after and all_successful:
            logging.info("Deleting raw timestamp-tables")
            for path in to_delete:
                yt.remove(path)

        # parse each data-row
        logging.info("Started parsing")
        parsed_destination = '//home/meteo/aurora/gismeteo-parsed-temp'
        parse_query = gen_parse_query(parsed_destination, remapped_tables)

        request = client.query(parse_query)
        request.run()
        success = get_query_result(request, 'parse query')
        if not success:
            # remove parsed_destination (if exists) because it's sorted and doesn't support appending in gen_parse_query
            if delete_after:
                yt.remove(parsed_destination, force=True)
            raise common.errors.TaskError("Can't finish parsing")

        # merge parsed data with existing table
        logging.info("Started merging with previously parsed")
        parsed_destination_full = '//home/meteo/aurora/gismeteo-parsed'
        op = yt.run_merge(
            source_table=[parsed_destination_full, parsed_destination],
            destination_table=parsed_destination_full,
            mode='sorted', job_count=3000
        )
        success = op.get_state() == 'completed'
        if not success:
            raise common.errors.TaskError("Can't finish merging")

        # delete remapped tables and current part of parsed data, if needed
        if delete_after:
            logging.info("Deleting intermediate tables")
            for path in remapped_tables:
                yt.remove(path)
            yt.remove(parsed_destination)
