# -*- coding: utf-8 -*-
import gzip
import hashlib
import logging
import os
from datetime import datetime, timedelta
from StringIO import StringIO

from sandbox import sdk2
from sandbox.projects.common import binary_task

DEFAULT_AWS_ENDPOINT_URL = "http://s3.mds.yandex.net"
DEFAULT_AWS_BUCKET_NAME = "mediascope-bucket"

DEFAULT_YT_PROXY = "hahn"
DEFAULT_YT_METRICA_DIR = "//home/yabs-rt/ads-verify/mediascope-metrica"
DEFAULT_YT_TABLE_TTL = 15

DEFAULT_AWS_SECRET = "sec-01fcws88d1vbmwzxsj69avs985"  # alias: avp-mediascope
DEFAULT_YAV_SECRET = "sec-01dhp5zhknswkzjk3grmr0rebq"  # alias: robot-yabs-avp


class YabsAvpMetricaExporter(binary_task.LastBinaryTaskRelease, sdk2.Task):
    """
    YABS_AVP_METRICA_EXPORTER

    This is binary task. Check out documentation https://docs.yandex-team.ru/sandbox/dev/binary-task
    What it does?
    1) Runs yql query to collect data into YT table
    2) Downloads table content and compresses with gzip
    3) Exports compressed data to S3 bucket and SFTP server
    """

    class Parameters(sdk2.Parameters):

        with sdk2.parameters.Group('Export parameters') as export_params:
            parts = sdk2.parameters.List("Metric parts", description="Metrica parts to export", required=True)
            pre_export = sdk2.parameters.Bool("Pre export", default=False)
            export_date = sdk2.parameters.String("Export date (format: 'YYYY-mm-dd')", required=True)

        with sdk2.parameters.Group('YT & YQL parameters') as yt_yql_params:
            yt_proxy = sdk2.parameters.String("YT cluster to store metric tables", default=DEFAULT_YT_PROXY, required=True)
            yt_dir = sdk2.parameters.String("YT directory to store metric tables", default=DEFAULT_YT_METRICA_DIR, required=True)
            yt_ttl = sdk2.parameters.Integer("TTL for metric tables in YT", default=DEFAULT_YT_TABLE_TTL, required=True)

        with sdk2.parameters.Group('AWS S3 parameters') as aws_s3_params:
            aws_endpoint_url = sdk2.parameters.String("AWS endpoint url", default=DEFAULT_AWS_ENDPOINT_URL, required=True)
            aws_bucket_name = sdk2.parameters.String("AWS bucket name", default=DEFAULT_AWS_BUCKET_NAME, required=True)

        with sdk2.parameters.Group('SFTP parameters') as sftp_params:
            sftp_host = sdk2.parameters.String("SFTP host")
            sftp_user = sdk2.parameters.String("SFTP user")
            sftp_port = sdk2.parameters.String("SFTP port", default=22)
            sftp_path = sdk2.parameters.String("SFTP path", default="./")

        with sdk2.parameters.Group('Run') as run:
            env_vars = sdk2.parameters.Dict(
                'env_vars',
                description="Environment variables (e.g. key value).\nMay be used with Yav Secret: sec-xxx:key"
            )

        ext_params = binary_task.binary_release_parameters(stable=True)

    def prepare_env_vars(self):
        for k, v in self.Parameters.env_vars.items():
            if v.startswith('sec-'):
                ver, var = v.split(':', 1)
                v = sdk2.yav.Secret(ver).data()[var]
            os.environ[k] = v

    def on_execute(self):
        self.prepare_env_vars()

        export_date = str(self.Parameters.export_date)
        date = datetime.strptime(export_date, "%Y-%m-%d")

        # Prepare salt
        salt = date.strftime("%Y%m")
        if self.Parameters.pre_export:
            salt = (date + timedelta(days=10)).strftime("%Y%m")

        for part in self.Parameters.parts:
            prefix = part + bool(self.Parameters.pre_export) * "2"
            self.process(filename_prefix=prefix, part=part, date=export_date, salt=salt)

    def process(self, filename_prefix, part, date, salt):
        import boto3
        import paramiko
        import library.python.resource as resources
        from yql.api.v1.client import YqlClient
        import yt.wrapper as yw

        yt_dir = self.Parameters.yt_dir.rstrip("/")
        filename = "{}_{}".format(filename_prefix, date)
        aws_filename = hashlib.sha512(filename.encode()).hexdigest()

        logging.info("{} start {} with salt {}, aws_filename is {}".format(date, filename, salt, aws_filename))

        # Create metrics table with YQL query
        template = "{}_template.yql".format(part)
        query_text = resources.find(template).format(YT_DIR=yt_dir, DATE=date, SALT=salt, FILENAME=filename)
        try:
            yql_client = YqlClient(token=os.environ.get("YQL_TOKEN"))
            query = yql_client.query(query_text, syntax_version=1)
            query.run()
        except Exception:
            logging.exception("Can not execute query:\n{}".format(query_text))
            raise

        result = query.get_results()
        if not result.is_success:
            raise RuntimeError("Query status failed: {}".format('\n'.join(str(err) for err in result.errors)))

        # Download table content
        yt_path = "{}/{}".format(yt_dir, filename)
        try:
            yt_client = yw.YtClient(proxy=self.Parameters.yt_proxy, token=os.environ.get("YT_TOKEN"))
            yt_client.config['read_parallel']['enable'] = True
            yt_client.config['read_parallel']['max_thread_count'] = 32
            yt_client.config['read_parallel']['data_size_per_thread'] = 8 * 1024 * 1024

            records = yt_client.read_table(yt_path, format=yw.JsonFormat(enable_ujson=True, raw=True))
        except Exception:
            logging.exception("Couldn't get data from '{}'".format(yt_path))
            raise

        # Gzip downloaded table content
        output_stream = StringIO()
        with gzip.GzipFile(fileobj=output_stream, mode="w") as f:
            for record in records:
                f.write(record.decode('utf-8').encode("ISO-8859-1"))
        data = output_stream.getvalue()

        # Push to S3 bucket
        session = boto3.session.Session(
            aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
            aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"]
        )
        s3 = session.client(
            service_name='s3',
            endpoint_url=str(self.Parameters.aws_endpoint_url)
        )
        s3.put_object(
            Bucket=self.Parameters.aws_bucket_name,
            Key=aws_filename,
            Body=data
        )
        logging.info("{} uploaded to https://{}.s3.yandex.net/{}".format(filename,
                                                                         self.Parameters.aws_bucket_name,
                                                                         aws_filename))

        # Push to SFTP server, silent mode
        try:
            private_key_str = os.environ["SFTP_PKEY"]
            pkey = paramiko.RSAKey.from_private_key(StringIO(private_key_str))
            with paramiko.SSHClient() as client:
                client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
                client.connect(hostname=self.Parameters.sftp_host, username=self.Parameters.sftp_user, pkey=pkey,
                               look_for_keys=False, allow_agent=False)
                sftp = client.open_sftp()
                sftp.chdir(self.Parameters.sftp_path)
                if date not in sftp.listdir():
                    sftp.mkdir(date)

                tag = "add" if self.Parameters.pre_export else "main"
                filepath = "{}/{}_{}_{}.ndjson.gz".format(date, part, tag, date.replace('-', ''))
                output_stream.seek(0)
                sftp.putfo(output_stream, filepath)
                sftp.close()
        except:
            logging.exception("Upload to SFTP server failed!")

        # Cleanup old tables
        tables = list(map(str, yt_client.list(yt_dir)))
        for table_name in tables:
            try:
                if table_name[:table_name.rfind("_")] == filename_prefix:
                    table_date = datetime.strptime(table_name.split("_")[-1], "%Y-%m-%d")
                    if (datetime.now() - table_date).days > int(self.Parameters.yt_ttl):
                        table_path = "{}/{}".format(yt_dir, table_name)
                        logging.info("Removing table {}".format(table_path))
                        yt_client.remove(table_path)
            except:
                continue

        logging.info("{} finish {} with salt {}, aws_filename is {}".format(date, filename, salt, aws_filename))
