import logging

from sandbox.sandboxsdk import environments
from sandbox import sdk2


class CloudBillingToYT(sdk2.Task):
    """ Task to import data from billing s3 bucket to YT for cloud analytics"""
    # TODO(syndicut): Move all constants to job params?
    YT_PREFIX = '//home/cloud_analytics/import/billing'
    TRUNCATED_ZERO = 'Result #0 (Truncated: 0): '
    S3_REGION_NAME = 'us-east-1'
    S3_ENDPOINT_URL = 'https://storage.yandexcloud.net'
    S3_BUCKET_NAME = 'billing'

    class Requirements(sdk2.Task.Requirements):
        # TODO(syndicut): Use prebuilt wheels here
        environments = (
            environments.PipEnvironment('yandex-yt'),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet'),
            environments.PipEnvironment('boto3'),
        )

    def on_execute(self):
        import yt.wrapper as yt
        import boto3

        yt.config['token'] = sdk2.Vault.data(self.owner, "billing-to-yt-yt-token")
        yt.config['proxy']['url'] = 'hahn'

        session = boto3.session.Session(
            region_name=self.S3_REGION_NAME,
            aws_access_key_id=sdk2.Vault.data(self.owner, "billing-to-yt-aws_access_key_id"),
            aws_secret_access_key=sdk2.Vault.data(self.owner, "billing-to-yt-aws_secret_access_key"),
        )

        s3 = session.client(
            service_name='s3',
            endpoint_url=self.S3_ENDPOINT_URL
        )

        def yt_path(object_key):
            parts = object_key.split('/')
            table_name = parts[-2]
            datetime = parts[-1].rstrip('.json')
            return '/'.join([self.YT_PREFIX, table_name, datetime])

        def parse_data(data):
            first_line = data.next()
            if first_line != self.TRUNCATED_ZERO:
                raise ValueError('Expected "{}", got "{}"'.format(self.TRUNCATED_ZERO, first_line))
            for line in data:
                yield line

        for s3_object in s3.list_objects(Bucket=self.S3_BUCKET_NAME)['Contents']:
            # TODO(syndicut): Multiprocessing can be usefull heregit
            table = yt_path(s3_object['Key'])
            if not yt.exists(table):
                logging.info('Importing table {}'.format(table))
                body = s3.get_object(Bucket=self.S3_BUCKET_NAME, Key=s3_object['Key'])['Body'].iter_lines()
                with yt.Transaction():
                    yt.create('table', table, recursive=True)
                    # TODO(syndicut): Using ujson for parsing can speed up everything, but can be dangerous
                    yt.write_table(table, parse_data(body), format='json', raw=True)
