"""
A very simple ClickHouse client
"""

import json
import re
import requests
from datetime import date, datetime
from urllib.parse import urlencode
from io import BytesIO as StringIO
from base64 import b64encode

__all__ = [
    "ClickHouseQuery",
    "ClickHouseClient",
]

QUERY_SIZE_GUARD = 16384  # added to query size to guard against a ClickHouse parsing bug

DEFAULT_HOST = "localhost"
DEFAULT_PORT = 8123


re_escape = re.compile(b"[\\b\\f\\r\\n\\t\\0\\'\\\\]", re.S)
re_escapes = {
    b"\b": b"\\b",
    b"\f": b"\\f",
    b"\r": b"\\r",
    b"\n": b"\\n",
    b"\t": b"\\t",
    b"\0": b"\\0",
    b"'": b"\\'",
    b"\\": b"\\\\",
}

re_unescape = re.compile(b"\\\\.", re.S)
re_unescapes = {
    b"\\b": b"\b",
    b"\\f": b"\f",
    b"\\r": b"\r",
    b"\\n": b"\n",
    b"\\t": b"\t",
    b"\\0": b"\0",
}


def escape_clickhouse_char(m):
    return re_escapes[m.group()]


def unescape_clickhouse_char(m):
    v = m.group()
    r = re_unescapes.get(v)
    if r is None:
        r = v[1:]
    return r


def quote_value(value):
    """Returns value as a string, according to ClickHouse quoting rules"""
    if isinstance(value, datetime):
        return b"%04d-%02d-%02d %02d:%02d:%02d" % (
            value.year,
            value.month,
            value.day,
            value.hour,
            value.minute,
            value.second,
        )
    if isinstance(value, date):
        return b"%04d-%02d-%02d" % (value.year, value.month, value.day)
    if isinstance(value, int):
        return b"%d" % (value,)
    if isinstance(value, float):
        return b"%f" % (value,)
    if isinstance(value, str):
        value = value.encode("utf8")
    if isinstance(value, bytes):
        return re_escape.sub(escape_clickhouse_char, value)
    raise TypeError("Cannot quote values of type %s" % (type(value).__name__,))


def unquote_value(value):
    """Returns value from a quoted string"""
    if isinstance(value, str):
        value = value.encode("utf8")
    if isinstance(value, bytes):
        return re_unescape.sub(unescape_clickhouse_char, value)
    raise TypeError("Cannot unquote values of type %s" % (type(value).__name,))


def quote_row(*values):
    """Quotes multiple values as a row in ClickHouse tab-separated format"""
    return b"\t".join(quote_value(value) for value in values) + b"\n"


def unquote_row(line):
    """Unquotes ClickHouse tab-separeted format row and returns it a a list"""
    if line.endswith(b"\n"):
        line = line[:-1]
    return [unquote_value(value) for value in line.split(b"\t")]


class Error(Exception):
    pass


class ConnectionError(Error):
    pass


class TimeoutError(Error):
    pass


class QueryError(Error):
    pass


class Query:
    r"""Buffer for creating a ClickHouse queries

    Primarily for generating data in a tab-separated format

    >>> Query()
    Query('')
    >>> q = Query('INSERT INTO table FORMAT TabSeparated')
    >>> q.value
    'INSERT INTO table FORMAT TabSeparated\n'
    >>> q.appendValue('hello\tworld')
    >>> q.value
    'INSERT INTO table FORMAT TabSeparated\nhello\\tworld'
    >>> q.reset()
    >>> q.appendRow('hello\tworld', 123)
    >>> q.value
    'INSERT INTO table FORMAT TabSeparated\nhello\\tworld\t123\n'
    """

    def __init__(self, query=None, external_file=None, structure=None, external_name=None):
        if query is not None and isinstance(query, str):
            query = query.encode("utf-8")
        self.query = query
        self.external_file = external_file
        self.structure = structure
        self.external_name = external_name

        self.reset()

    def __repr__(self):
        return "%s(%r)" % (self.__class__.__name__, self.value)

    def reset(self):
        """Resets Query to its initial state"""
        self.buffer = StringIO()
        if self.query is not None:
            self.appendLine(self.query)

    @property
    def value(self):
        """Returns Query value as a binary string"""
        return self.buffer.getvalue()

    def append(self, data):
        """Append data to this Query"""
        self.buffer.write(data)

    def appendLine(self, line):
        """Append a line of data to this Query"""
        self.buffer.write(line)
        self.buffer.write(b"\n")

    def appendValue(self, value):
        """Append a quoted value to this Query"""
        self.buffer.write(quote_value(value))

    def appendRow(self, *values):
        """Append a quoted tab-separated row to this Query"""
        for i, value in enumerate(values):
            if i != 0:
                self.buffer.write(b"\t")
            self.buffer.write(quote_value(value))
        self.buffer.write(b"\n")


def decode_json(stream):
    """Decodes a json stream"""
    try:
        return json.load(stream)
    finally:
        stream.close()


def decode_tsv(stream):
    """Decodes a TabSeparated format stream, yields each row as a list of columns"""
    try:
        while True:
            line = stream.readline()
            if not line.endswith(b"\n"):
                break
            yield unquote_row(line)
    finally:
        stream.close()


def decode_tsv_named(stream):
    """Decodes a TabSeparatedWithNames format stream, yields each row as a dict"""
    try:
        line = stream.readline()
        if not line.endswith(b"\n"):
            raise QueryError("TabSeparated header line is missing")
        keys = unquote_row(line)
        while True:
            line = stream.readline()
            if not line.endswith(b"\n"):
                break
            yield dict(zip(keys, unquote_row(line)))
    finally:
        stream.close()


class Client:
    """
    A very simplistic ClickHouse client
    """

    def __init__(
        self,
        host=None,
        port=None,
        keep_alive=False,
        timeout=None,
        username=None,
        password=None,
        secure=True,
    ):
        self.secure = secure
        self.host = host or DEFAULT_HOST
        self.port = port or DEFAULT_PORT
        self.keep_alive = keep_alive
        self.timeout = timeout
        self.username = username
        self.password = password
        if keep_alive:
            self._session = requests.Session()

    def __repr__(self):
        return "%s(%r, %r)" % (self.__class__.__name__, self.host, self.port)

    def _make_uri(self, **kwargs):
        proto = "https" if self.secure else "http"
        return "%s://%s:%d/?%s" % (proto, self.host, self.port, urlencode(kwargs))

    def query(self, query, stream=False):
        external_file = None
        external_name = None

        query_kwargs = dict()
        if isinstance(query, Query):
            external_file = query.external_file
            external_name = query.external_name
            if external_file:
                if query.external_name and query.structure:
                    query_kwargs["{0}_structure".format(external_name)] = query.structure
                query_kwargs["query"] = query.value
                query = None
            else:
                query = query.value

        if isinstance(query, str):
            query = query.encode("utf8")

        query_kwargs.update(max_query_size=str((query and len(query) or 0) + QUERY_SIZE_GUARD))
        url = self._make_uri(**query_kwargs)
        headers = {}
        if self.username:
            credentials = "%s:%s" % (self.username, self.password or "")
            credentials = b64encode(credentials.encode()).decode()
            headers["Authorization"] = "Basic %s" % credentials
        request_func = requests.post

        if self.keep_alive:
            request_func = self._session.post

        try:
            response = request_func(
                url,
                data=query,
                headers=headers,
                stream=stream,
                timeout=self.timeout,
                files={external_name: external_file} if external_name and external_file else None,
            )
        except requests.exceptions.Timeout:
            raise TimeoutError()
        except requests.exceptions.RequestException as e:
            raise ConnectionError("%s" % (e.args,))
        if response.status_code == 500:
            raise QueryError(response.text)
        if 400 <= response.status_code < 600:
            raise ConnectionError("%s %s" % (response.status_code, response.reason))
        if stream:
            return response.raw
        return response.content

    def query_json(self, query):
        return decode_json(self.query(query, stream=True))

    def query_tsv(self, query):
        return decode_tsv(self.query(query, stream=True))

    def query_tsv_named(self, query):
        return decode_tsv_named(self.query(query, stream=True))


# Aliases for use with 'from yandex.clickhouse import *'
ClickHouseQuery = Query
ClickHouseClient = Client

try:
    import tornado
except ImportError:
    # tornado is not available
    pass
else:
    from tornado import gen
    from tornado.httpclient import AsyncHTTPClient, HTTPError as AsyncHTTPError

    class AsyncClient:
        def __init__(
            self,
            host=None,
            port=None,
            keep_alive=False,
            timeout=None,
            username=None,
            password=None,
            secure=True,
        ):
            self.secure = secure
            self.host = host or DEFAULT_HOST
            self.port = port or DEFAULT_PORT
            self.keep_alive = keep_alive
            self.timeout = timeout
            self.username = username
            self.password = password
            self._client = AsyncHTTPClient()

        def _make_uri(self, **kwargs):
            proto = "https" if self.secure else "http"
            return "%s://%s:%d/?%s" % (proto, self.host, self.port, urlencode(kwargs))

        @gen.coroutine
        def query(self, query, stream=False):
            if isinstance(query, Query):
                query = query.value
            if isinstance(query, str):
                query = query.encode("utf8")
            url = self._make_uri(max_query_size=str(len(query) + QUERY_SIZE_GUARD))
            kwargs = {}
            if self.timeout is not None:
                kwargs["connect_timeout"] = self.timeout
                kwargs["request_timeout"] = self.timeout
            headers = {}
            if self.username:
                headers["Authorization"] = "Basic %s" % b64encode(
                    "%s:%s" % (self.username, self.password or "")
                )
            try:
                response = yield self._client.fetch(
                    url, method="POST", headers=headers, body=query, allow_ipv6=True, **kwargs
                )
            except AsyncHTTPError as e:
                if e.code == 599:
                    raise TimeoutError()
                if e.code == 500 and e.response is not None:
                    raise QueryError(e.response.body)
                if e.response is not None:
                    raise ConnectionError("%s %s" % (e.response.code, e.response.reason))
                raise ConnectionError(e.message)
            if stream:
                raise gen.Return(response.buffer)
            raise gen.Return(response.body)

        @gen.coroutine
        def query_json(self, query):
            stream = yield self.query(query, stream=True)
            raise gen.Return(decode_json(stream))

        @gen.coroutine
        def query_tsv(self, query):
            stream = yield self.query(query, stream=True)
            raise gen.Return(decode_tsv(stream))

        @gen.coroutine
        def query_tsv_named(self, query):
            stream = yield self.query(query, stream=True)
            raise gen.Return(decode_tsv_named(stream))

    __all__.append("ClickHouseAsyncClient")
    ClickHouseAsyncClient = AsyncClient

if __name__ == "__main__":
    import doctest

    doctest.testmod()
