import json
import logging
import multiprocessing
import os
import time
from sandbox import sdk2
from sandbox.projects.yabs.base_bin_task import BaseBinTask


EVENT_TABLE = '//home/yabs/stat/AdfoxRichLogEventTable'
UPLOAD_QUEUE = '//home/yabs/stat/AdfoxRichLogUploadQueue/queue/queue'
SELECT_OLD_ROWS_QUERY = '''
$script = @@#py
import library.python.codecs as codecs

def decode(s):
    return codecs.loads("{codec}", s)
@@;

$Decompress = Python3::decode(
    Callable<(String)->String>,
    $script
);

$now = DateTime::ToSeconds(CurrentUtcTimestamp());
SELECT
    request_session,
    banner_id,
    rtb_auction_schedule_step_index,
    request_id,
    nsec,
    uts,
    engine_id,
    ToBytes(
        Yson::SerializeJson(
            Yson::Parse(
                $Decompress(
                    Unwrap(values)
                )
            )
        )
    ) AS values
FROM `//home/yabs/stat/AdfoxRichLogEventTable`
WHERE uts < $now - ({time_to_live} + 3600);
'''


class YabsAdfoxRichLogEventTableCleanup(BaseBinTask):
    class Requirements(sdk2.Requirements):
        cores = 10
        ram = 4096
        disk_space = 4096

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(BaseBinTask.Parameters):
        description = 'old rows mover task'

        with BaseBinTask.Parameters.version_and_task_resource() as version_and_task_resource:
            resource_attrs = sdk2.parameters.Dict('Filter resource by', default={'name': 'YabsAdfoxRichLogEventTableCleanup'})

        yt_token_secret = sdk2.parameters.YavSecret(
            'YT  token secret',
            default='sec-01d4mdr98tm9n1k6a3rmyn685t#YT_TOKEN',
            required=True
        )

        yql_token_secret = sdk2.parameters.YavSecret(
            'YQL token secret',
            default='sec-01d4mdr98tm9n1k6a3rmyn685t#YQL_TOKEN',
            required=True
        )

        yt_cluster = sdk2.parameters.String('cluster with table for move rows', required=True)
        yql_cluster = sdk2.parameters.String('cluster with table for yql select', required=True)

        num_rows_in_one_move_rows = sdk2.parameters.Integer(
            'num rows in one move rows',
            default=100000,
        )

        sleep_time = sdk2.parameters.Integer(
            'sleep time in seconds between move rows',
            default=1,
        )

        max_move_rows_runs_per_task_run = sdk2.parameters.Integer(
            'max move rows per one run (0 = inf)',
            default=20,
        )

        max_move_rows_in_batch = sdk2.parameters.Integer(
            'max rows in one batch to QYT',
            default=4000
        )

        shards = sdk2.parameters.Integer(
            'amount of shards in queue',
            default=200
        )

        time_to_live = sdk2.parameters.Integer(
            'time after which event could be moved',
            default=86400
        )

        codec = sdk2.parameters.String('codec used in EventTable and UploadQueue', default='zstd_6')

    def move_rows(self, yt_client, rows):
        import library.python.codecs as codecs

        keys = [{key: row[key] for key in row if key != 'values'} for row in rows]

        def process_rows(process_from, process_to, json_format):
            logging.info('process_rows started with arguments {process_from} {process_to}'.format(process_from=process_from, process_to=process_to))
            for i in range(process_from, process_to):
                values = json.loads(rows[i]['values'])
                for key, val in keys[i].items():
                    values[key] = val
                json_format.append(json.dumps(values))
            logging.info('process_rows ended with arguments {process_from} {process_to}'.format(process_from=process_from, process_to=process_to))

        divider = len(rows) // self.Requirements.cores
        pool = []
        manager = multiprocessing.Manager()
        json_format = manager.list()
        for core in range(self.Requirements.cores):
            l = core * divider
            r = len(rows) if core + 1 == self.Requirements.cores else (core + 1) * divider
            p = multiprocessing.Process(target=process_rows, args=(l, r, json_format))
            p.start()
            pool.append(p)

        for proc in pool:
            proc.join(5)
            proc.terminate()
        logging.info("Joined with processes")

        if len(json_format) != len(rows):
            logging.error('Processes did not process all rows (probably some process hung up)')
            logging.error('Program will process rows without multiprocessing')
            json_format = []
            process_rows(0, len(rows), json_format)
        assert len(json_format) == len(rows)

        to_queue = []
        while len(json_format) > 0:
            rows_batch = '\n'.join(json_format[:self.Parameters.max_move_rows_in_batch])
            json_format = json_format[self.Parameters.max_move_rows_in_batch:]

            data = codecs.dumps(self.Parameters.codec, rows_batch)
            logging.debug('Total length: {length}'.format(length=len(data)))
            to_queue.append({'value': data, 'codec': self.Parameters.codec})

        with yt_client.Transaction(type='tablet'):
            yt_client.delete_rows(EVENT_TABLE, keys, require_sync_replica=False)
            yt_client.insert_rows(UPLOAD_QUEUE, to_queue)
            logging.info("Data was written")

    def on_execute(self):
        from yql.api.v1.client import YqlClient
        from yt.wrapper import YtClient

        self.yt_token = self.Parameters.yt_token_secret.data()[self.Parameters.yt_token_secret.default_key]
        self.yql_token = self.Parameters.yql_token_secret.data()[self.Parameters.yql_token_secret.default_key]
        os.environ['YT_TOKEN'] = self.yt_token
        os.environ['YT_PROXY'] = self.Parameters.yt_cluster

        yql_client = YqlClient(db=self.Parameters.yql_cluster, token=self.yql_token)
        request = yql_client.query(SELECT_OLD_ROWS_QUERY.format(time_to_live=self.Parameters.time_to_live, codec=self.Parameters.codec), syntax_version=1)
        yt_client = YtClient(token=self.yt_token, proxy=self.Parameters.yt_cluster, config={"backend": "rpc"})

        logging.info("Start running YQL")
        results = request.run().get_results()
        logging.info("YQL finished")

        rows = []
        move_rows_count = 0
        for table in results:
            logging.info("Start process table")
            for row in table.get_iterator():
                rows.append(dict(zip(table.column_names, row)))
                if len(rows) >= self.Parameters.num_rows_in_one_move_rows:
                    logging.info("Collected {} rows iter {}".format(len(rows), move_rows_count))
                    move_rows_count += 1
                    self.move_rows(yt_client, rows)
                    logging.info("Collected rows were moved")
                    rows = []
                    if (self.Parameters.max_move_rows_runs_per_task_run > 0) and \
                       (move_rows_count > self.Parameters.max_move_rows_runs_per_task_run - 1):
                        logging.info("Max move count rows were made")
                        break
                    time.sleep(self.Parameters.sleep_time)
                    logging.info("Process woke up after sleep")

        if len(rows) > 0:
            logging.info("move rest {} rows".format(len(rows)))
            self.move_rows(yt_client, rows)


if __name__ == '__main__':
    pass
