import csv
from copy import deepcopy
from io import StringIO
from itertools import islice
from operator import itemgetter

from django.conf import settings
from fan.utils.fieldgetters import ADDRESS_FIELD_NAMES, FIELD_ALIASES
from fan.utils.emails import clean


class ReaderError(Exception):
    pass


class HeaderError(ReaderError):
    pass


class UserTemplateVariablesCountExceeded(Exception):
    pass


class UserTemplateVariableLengthExceeded(Exception):
    pass


class UserTemplateVariableValueLengthExceeded(Exception):
    pass


class RecipientsCountExceeded(Exception):
    pass


class InvalidEmails(Exception):
    def __init__(self, invalid_emails):
        self.invalid_emails = invalid_emails
        super().__init__()

    def __str__(self):
        s = ["invalid emails:"]
        for email in self.invalid_emails:
            s.append(email)
        return " ".join(s)


class EmailColumnNotFound(HeaderError):
    def __init__(self, expected_fields):
        self.expected_fields = expected_fields
        super().__init__()

    def __str__(self):
        s = ["'email' column not found"]
        if self.expected_fields:
            s.append("expected one of {}".format(self.expected_fields))
        return " ".join(s)


def parse_csv_data(csv_data):
    sample = _get_sample(csv_data)
    dialect = _guess_dialect(sample)

    headers, native_headers = _get_headers(sample, dialect)
    _filter_underscored_headers(headers)
    _transform_headers_to_lower(headers)
    _filter_duplicated_headers(headers)
    _validate_headers(headers)

    csv_maillist = []
    for row in _iter_data(csv_data, dialect, headers, native_headers):
        if row.get("email"):
            csv_maillist.append({key: val for key, val in row.items()})
    _validate_csv_maillist(csv_maillist)
    return csv_maillist


def get_subscribers_number(csv_maillist):
    return len(csv_maillist)


def get_preview(csv_maillist, size=5):
    return csv_maillist[:size]


def _guess_dialect(data):
    if not data:
        return csv.excel

    dialect = _guess_first_line_dialect(data)
    if not dialect:
        try:
            dialect = csv.Sniffer().sniff(data, delimiters=";,\t")
        except csv.Error:
            return csv.excel

    return _fix_dialect_quoting(dialect)


def _guess_first_line_dialect(data):
    """Определение диалекта по заголовку"""
    first_line = data.splitlines()[0]
    if "@" in first_line:  # Предполагаем, что файл без заголовка
        return None
    try:
        return csv.Sniffer().sniff(first_line, delimiters=";,\t")
    except csv.Error:
        return None


def _fix_dialect_quoting(dialect):
    """Снифер может не проставлять параметры экранирования - фиксим"""
    if not dialect.doublequote and not dialect.escapechar:
        dialect.doublequote = True
    return dialect


def _get_reader(data, dialect, cls=None, **kwargs):
    cls = cls or csv.reader
    kwargs.setdefault("dialect", dialect)
    if isinstance(data, str):
        data = StringIO(data)
    return cls(data, **kwargs)


def _get_headers(data, dialect):
    if not data:
        raise ReaderError()

    if "@" in _get_first_line(data):
        # Вероятно, файл без заголовка
        native_headers = []
        n = _columns_count(data, dialect)
        email_pos = _search_email_in_data(data, dialect)

        if not n:
            raise HeaderError()

        if email_pos is None:
            raise EmailColumnNotFound(expected_fields=ADDRESS_FIELD_NAMES)

        headers = ["col%d" % (i + 1) for i in range(n)]
        headers[email_pos] = "email"
        return headers, native_headers

    else:
        native_headers = _get_first_row(data, dialect)
        headers = deepcopy(native_headers)
        email_pos = None
        try:
            for i, field in enumerate(headers):
                field = field.lower()
                field = FIELD_ALIASES.get(field, field)
                if field in ADDRESS_FIELD_NAMES:
                    email_pos = i
                    headers[i] = field  # нормализуем имя адресного поля: нижний регистр и алиас
        except ValueError:
            pass

        if email_pos is None:
            email_pos = _search_email_in_data(data, dialect)
            if email_pos is None:
                raise EmailColumnNotFound(expected_fields=ADDRESS_FIELD_NAMES)
            headers[email_pos] = "email"

        # Бывает, что в заголовке пустые строки
        for i, header in enumerate(headers):
            if not headers[i]:
                headers[i] = "col%d" % (i + 1)

        return headers, native_headers


def _columns_count(data, dialect):
    return max([len(row) for row in _get_reader(data, dialect)])


def _search_email_in_data(data, dialect):
    pos = {}
    rows = 0
    for row in _get_reader(data, dialect):
        if not row:
            continue
        rows += 1
        for n, cell in enumerate(row):
            if isinstance(cell, str) and "@" in cell:
                pos.setdefault(n, 0)
                pos[n] += 1

    if not pos:
        return None

    column, at_symbol_found_times = sorted(list(pos.items()), key=itemgetter(1))[-1]
    if at_symbol_found_times >= max(rows / 2, 1):
        return column


def _get_first_row(data, dialect):
    for row in _get_reader(data, dialect):
        return row


def _iter_data(data, dialect, headers, native_headers):
    reader = _get_reader(data, dialect, cls=csv.DictReader, fieldnames=headers)
    if native_headers:
        next(reader)
    for row in reader:
        row.pop(None, None)  # иногда вылезают данные вида {None: ['', '', ...]}
        row.pop("", None)
        for k in row:
            if row[k] is None:
                row[k] = ""
        yield row


def _filter_underscored_headers(headers):
    for i in range(len(headers)):
        if headers[i].startswith("_"):
            headers[i] = ""


def _transform_headers_to_lower(headers):
    for i in range(len(headers)):
        headers[i] = headers[i].lower()


def _filter_duplicated_headers(headers):
    headers_seen = set()
    for i in range(len(headers)):
        if headers[i] in headers_seen:
            headers[i] = ""
        headers_seen.add(headers[i])


def _get_sample(data):
    if not data:
        return ""
    data = StringIO(data)

    def data_generator(d):
        for line in d:
            if line == "\n" or line == "\r\n":
                continue
            yield line

    sample = "\n".join(islice(data_generator(data), 0, 200))
    return sample


def _check_email(email, allow_empty=True):
    """
    Проверяем корректность поля email.
    Фактически поле не обязательно (:param:allow_empty), если есть данные для обогащения
    """
    if allow_empty and (not email or not email.strip()):
        return True
    if not clean(email, raise_on_error=False):
        return False
    return True


def _get_first_line(data):
    lines = [line.strip() for line in data.split("\n")]
    lines = _remove_empty_lines(lines)
    if lines:
        return lines[0]
    return ""


def _remove_empty_lines(lines):
    res = []
    for line in lines:
        if not line:
            continue
        res.append(line)
    return res


def _validate_headers(headers):
    _validate_user_template_variables_max_count(headers)
    _validate_user_template_variable_max_length(headers)


def _validate_csv_maillist(csv_maillist):
    if not csv_maillist:
        raise ReaderError()
    _validate_recipients_max_count(csv_maillist)

    invalid_emails = []
    for row in csv_maillist:
        _validate_user_template_variable_values_max_length(row)
        if not _check_email(row.get("email")):
            invalid_emails.append(row.get("email"))
    if invalid_emails:
        raise InvalidEmails(invalid_emails)


def _validate_recipients_max_count(csv_maillist):
    if get_subscribers_number(csv_maillist) > settings.MAX_MAILLIST_RECIPIENTS:
        raise RecipientsCountExceeded()


def _validate_user_template_variables_max_count(headers):
    visible_headers_count = sum([1 for header in headers if header != ""])
    # email + user_template_variables
    if visible_headers_count > settings.USER_TEMPLATE_VARIABLES_MAX_COUNT + 1:
        raise UserTemplateVariablesCountExceeded()


def _validate_user_template_variable_max_length(headers):
    for header in headers:
        if len(header) > settings.USER_TEMPLATE_VARIABLE_MAX_LENGTH:
            raise UserTemplateVariableLengthExceeded()


def _validate_user_template_variable_values_max_length(row):
    for key, value in list(row.items()):
        if key == "email":
            continue
        if len(value) > settings.USER_TEMPLATE_VARIABLE_VALUE_MAX_LENGTH:
            raise UserTemplateVariableValueLengthExceeded()
