# coding=utf8

import csv
import logging
import mimetypes
import os
from time import mktime
from typing import Any, Iterable, Tuple
from wsgiref.handlers import format_date_time as http_format_date_time

import requests

from sandbox import sdk2
from sandbox.projects.Strm.StrmBuildClickhouseDictionary.utils import SourceNotModified, stream_decode_response_unicode


class StrmClickhouseDictionary(sdk2.Resource):
    """
    File for importing clickhouse as dictionary
    """

    releasable = False
    dictionary_name = sdk2.parameters.String("Dictionary name")


class StrmBuildClickhouseDictionaryBase(sdk2.Task):
    """
    Task to convert data to ClickHouse dictionary format and upload it to S3
    """

    class Requirements(sdk2.Requirements):
        kill_timeout = 600

        cores = 1
        ram = 1024
        disk_space = 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):

        dictionary_name = sdk2.parameters.String("Dictionary name", required=True)

        output_name = sdk2.parameters.String(
            "Output file name (with extension)",
            required=True,
        )

        source_url = sdk2.parameters.String("Source file URL", required=True)

        check_last_modified = sdk2.parameters.Bool(
            "Check if output is newer than source (via Last-Modified)",
            default=True,
        )

        with sdk2.parameters.Group("S3 Output parameters"):
            upload_to_s3 = sdk2.parameters.Bool("Upload dictionary to S3", default=True)

            with upload_to_s3.value[True]:
                s3_endpoint = sdk2.parameters.String(
                    "S3 Endpoint URL",
                    default="https://s3.mds.yandex.net",
                    required=True,
                )

                s3_bucket = sdk2.parameters.String(
                    "S3 Bucket",
                    default="strm",
                    required=True,
                )

                s3_prefix = sdk2.parameters.String(
                    "S3 prefix for files",
                    default="clickhouse/dicts/",
                    required=True,
                )

                s3_secret = sdk2.parameters.YavSecret(
                    "S3 Access Key Secret (with AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY)",
                    required=True,
                )

    @staticmethod
    def output_writer(output):
        return csv.writer(output, dialect="excel", escapechar="\\").writerows

    @staticmethod
    def parse_lines(data_file):
        # type: (Iterable[str]) -> Iterable[Tuple[str, int]]
        raise NotImplementedError

    @staticmethod
    def decode_chunks_to_lines(chunks):
        pending = None

        for chunk in stream_decode_response_unicode(chunks):

            if pending is not None:
                chunk = pending + chunk

            lines = chunk.splitlines()

            if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]:
                pending = lines.pop()
            else:
                pending = None

            for line in lines:
                yield line

        if pending is not None:
            yield pending

    def create_s3_client(self):
        import botocore.session

        s3_creds = self.Parameters.s3_secret.data()

        session = botocore.session.get_session()
        client = session.create_client(
            "s3",
            endpoint_url=self.Parameters.s3_endpoint,
            aws_access_key_id=s3_creds["AWS_ACCESS_KEY_ID"],
            aws_secret_access_key=s3_creds["AWS_SECRET_ACCESS_KEY"],
        )

        return client

    def get_last_modified(self, s3_client):
        from botocore.exceptions import ClientError

        logging.info("Getting Last-Modified of output object")

        try:
            request = s3_client.head_object(
                Bucket=self.Parameters.s3_bucket,
                Key=os.path.join(self.Parameters.s3_prefix, self.Parameters.output_name),
            )
            last_modified = mktime(request["LastModified"].timetuple())
        except ClientError as e:
            if e.response["Error"]["Code"] != "404":
                raise
            last_modified = 0

        last_modified = http_format_date_time(last_modified)
        logging.info("Got Last-Modified: %s", last_modified)

        return last_modified

    def download_source(self, url, if_modified_since=None):
        logging.info("Streaming source from %s", self.Parameters.source_url)

        headers = {}

        if if_modified_since is not None:
            headers["If-Modified-Since"] = if_modified_since

        with requests.get(url, headers=headers, stream=True) as req:
            req.raise_for_status()

            if req.status_code == 304:
                raise SourceNotModified

            for chunk in req.iter_content(chunk_size=4096):
                yield chunk

    def save_output(self, path, lines):
        # type: (sdk2.Path, Iterable[Tuple[str, int]]) -> None

        logging.info("Saving converted lines to resource file")
        with path.open("w+b") as f:
            self.output_writer(f)(lines)

    def upload_resource_to_s3(self, s3_client, path):
        # type: (Any, sdk2.Path) -> None

        content_type, _ = mimetypes.guess_type(self.Parameters.output_name)

        logging.info("Uploading resource to S3")

        with path.open("rb") as f:
            s3_client.put_object(
                Bucket=self.Parameters.s3_bucket,
                Key=os.path.join(self.Parameters.s3_prefix, self.Parameters.output_name),
                Body=f,
                ContentType=content_type,
            )

        logging.info("Uploaded `{}' to S3".format(self.Parameters.output_name))

    def get_lines_from_source(self, source_url, if_modified_since):
        # type: (str, str) -> Iterable[Tuple[str, int]]
        source_chunks = self.download_source(source_url, if_modified_since=if_modified_since)
        source_lines = self.decode_chunks_to_lines(source_chunks)
        return self.parse_lines(source_lines)

    def on_execute(self):
        logging.info("Started execution")

        s3_client = None
        if self.Parameters.upload_to_s3:
            s3_client = self.create_s3_client()

        source_url = str(self.Parameters.source_url)

        output_last_modified = None
        output_file = sdk2.Path(self.Parameters.output_name)

        if self.Parameters.upload_to_s3 and self.Parameters.check_last_modified:
            output_last_modified = self.get_last_modified(s3_client)
        else:
            logging.info("Skipping Last-Modified check")

        try:
            parsed_lines = self.get_lines_from_source(source_url, output_last_modified)
            self.save_output(output_file, parsed_lines)
        except SourceNotModified:
            logging.info("Source is not modified since last build, skipping")
        else:
            logging.info("Creating resource")
            resource = StrmClickhouseDictionary(
                self,
                "{} clickhouse dictionary".format(self.Parameters.dictionary_name),
                str(output_file),
                ttl=7,
                dictionary_name=self.Parameters.dictionary_name,
            )
            resource_data = sdk2.ResourceData(resource)
            resource_data.ready()

            if self.Parameters.upload_to_s3:
                self.upload_resource_to_s3(s3_client, output_file)
