# -*- coding: utf-8 -*-
import csv
import json
import logging

from passport.backend.core.historydb.entry import EVENT_LOG_SENSITIVE_FIELDS_BY_VERSION
from passport.backend.library.historydbloader.historydb.exceptions import BadLogLineError
from passport.backend.library.historydbloader.historydb.processors import (
    decrypt_processor,
    default_processor,
    float_processor,
    int_processor,
    ip_processor,
    json_processor,
    rfctime_processor,
    unhexify_processor,
)
import six
from six.moves import zip


log = logging.getLogger('historydbloader.historydb.parser')


class ParserType(object):
    AUTH = 'auth'
    AUTH_CHALLENGE = 'auth-challenge'
    AVATARS = 'avatars'
    EVENT = 'event'
    LOYALTY = 'loyalty'
    MAIL_USER_JOURNAL = 'mail_user_journal'
    OAUTH = 'oauth'
    RESTORE = 'restore'
    SOCIAL_BINDINGS = 'social-bindings'
    STATBOX_SYNTHETIC = 'statbox-synthetic-log'
    PASSPORT_MESSAGES = 'passport-messages'
    PASSPORT_STATBOX = 'passport-statbox'
    PASSPORT_TOLOKA = 'passport-toloka'
    YASMS_PRIVATE = 'yasms-private'
    by_filename = {
        'account_modification.log': PASSPORT_STATBOX,
        'auth.log': AUTH,
        'auth_challenge.log': AUTH_CHALLENGE,
        'authnew.log': AUTH,
        'blackbox-auth.log': AUTH,
        'event.log': EVENT,
        'mda-resign-auth.log': AUTH,
        'mda-root-auth.log': AUTH,
        'oauth.event.log': OAUTH,
        'restore.log': RESTORE,
        'social-bindings.log': SOCIAL_BINDINGS,
        'social-bindings.statbox.log': SOCIAL_BINDINGS,
        'statbox-synthetic-log': STATBOX_SYNTHETIC,
        'passport-messages.log': PASSPORT_MESSAGES,
        'statbox.log': PASSPORT_STATBOX,
        'avatars.log': AVATARS,
        'social-broker.avatars.log': AVATARS,
        'user_journal.tskv': MAIL_USER_JOURNAL,
        'yasms.dlr.tskv': YASMS_PRIVATE,
        'yasms.private.tskv': YASMS_PRIVATE,
    }


class EntryParser(object):
    def process_line(self, line_values):
        raise NotImplementedError()

    def parse(self, fileobj, ignore_errors=True):
        raise NotImplementedError()


class JSONEntryParser(EntryParser):
    def process_line(self, line):
        return json.loads(line)

    def parse(self, fileobj, ignore_errors=True):
        for line in fileobj:
            line = line.strip()
            if not line:
                continue
            try:
                yield self.process_line(line)
            except ValueError as e:
                message = 'Invalid json line: "%s"' % line
                if ignore_errors:
                    log.warning(message)
                else:
                    raise BadLogLineError(message)


class CSVEntryParser(EntryParser):
    FIELDS = []
    FIELDS_PROCESSORS = {}
    FIELDS_NAMES_MAPPER = {}
    FIELD_SEPARATOR = ' '
    QUOTECHAR = '`'
    REQUIRED_FIELDS = []
    # Список полей, добавляемых к формату записи - для расширения существующего формата
    # После перехода на новый формат нужно перенести значения в основной список полей
    FIELDS_IN_TRANSITION = []

    def process_line(self, line_values):
        """line_values -- список полей"""
        entry = {}
        if (len(line_values) - len(self.FIELDS)) not in (0, len(self.FIELDS_IN_TRANSITION)):
            raise BadLogLineError('Line contains bad number of values')
        for field_name, value in zip(
            self.FIELDS + self.FIELDS_IN_TRANSITION, line_values,
        ):
            proccessor = self.FIELDS_PROCESSORS.get(field_name,
                                                    default_processor)
            value = proccessor(value)
            if value is None:
                continue
            field_name = self.FIELDS_NAMES_MAPPER.get(field_name, field_name)
            entry[field_name] = value
        if not all(field in entry for field in self.REQUIRED_FIELDS):
            raise BadLogLineError('Missing required fields')
        return entry

    def post_process_entry(self, entry):
        return entry

    def parse(self, fileobj, ignore_errors=True):

        reader = csv.reader(fileobj,
                            delimiter=self.FIELD_SEPARATOR,
                            quotechar=self.QUOTECHAR)
        while 1:
            try:
                line_values = six.next(reader)
            except csv.Error as e:
                message = 'Invalid csv line: %s' % reader.line_num
                if ignore_errors:
                    log.warning(message)
                    continue
                raise BadLogLineError(message)
            except StopIteration:
                break
            # Строчка оказалась валидной с точки зрения csv парсера
            try:
                entry = self.process_line(line_values)
                yield self.post_process_entry(entry)
            except BadLogLineError as e:
                if ignore_errors:
                    log.warning('Bad log line content: %s (%s) line: %s', e, repr(line_values), reader.line_num)
                    continue
                raise


class AuthEntryParser(CSVEntryParser):
    FIELDS = [
        'version',
        'rfctime',
        'host_id',
        'client_name',
        'uid',
        'login',
        'sid',
        'type',
        'status',
        'comment',
        'user_ip',
        'proxy_ip',
        'yandexuid',
        'referer',
        'retpath',
        'useragent',
    ]

    FIELDS_PROCESSORS = {
        'version': int_processor,
        'rfctime': rfctime_processor,
        'host_id': unhexify_processor,
        'uid': int_processor,
        'sid': int_processor,
        'user_ip': ip_processor,
        'proxy_ip': ip_processor,
    }

    FIELDS_NAMES_MAPPER = {
        'rfctime': 'timestamp',
    }

    REQUIRED_FIELDS = [
        'timestamp',
        'host_id',
        'status',
    ]


class EventEntryParser(CSVEntryParser):
    FIELDS = [
        'version',
        'rfctime',
        'host_id',
        'client_name',
        'uid',
        # Имя события
        'name',
        # Значения события
        'value',
        'user_ip',
        'proxy_ip',
        'yandexuid',
        # Логин администратора
        'admin',
        'comment',
    ]

    FIELDS_PROCESSORS = {
        'version': int_processor,
        'rfctime': rfctime_processor,
        'host_id': unhexify_processor,
        'uid': int_processor,
        'user_ip': ip_processor,
        'proxy_ip': ip_processor,
    }

    FIELDS_NAMES_MAPPER = {
        'rfctime': 'timestamp',
    }

    REQUIRED_FIELDS = [
        'timestamp',
        'uid',
        'name',
    ]

    def post_process_entry(self, entry):
        version = entry.get('version')

        if entry['name'] in EVENT_LOG_SENSITIVE_FIELDS_BY_VERSION.get(version, []) and 'value' in entry:
            entry['value'] = decrypt_processor(entry['value'])

        return entry


class LoyaltyEntryParser(CSVEntryParser):
    FIELDS = [
        'version',
        'timestamp',
        'user_id_type',
        'user_id',
        'user_ip',
        'domain',
        'name',
        'meta',
    ]

    REQUIRED_FIELDS = [
        'timestamp',
        'user_id_type',
        'user_id',
        'user_ip',
        'domain',
        'name',
    ]

    FIELDS_PROCESSORS = {
        'version': int_processor,
        'timestamp': float_processor,
        'meta': json_processor,
    }


class RestoreEntryParser(CSVEntryParser):
    FIELDS = [
        'version',
        'action',
        'rfctime',
        'uid',
        'restore_id',
        'data_json',
    ]

    FIELDS_NAMES_MAPPER = {
        'rfctime': 'timestamp',
    }

    REQUIRED_FIELDS = [
        'version',
        'action',
        'timestamp',
        'uid',
        'restore_id',
        'data_json',
    ]

    FIELDS_PROCESSORS = {
        'version': int_processor,
        'rfctime': rfctime_processor,
        'uid': int_processor,
        'data_json': default_processor,
    }

    def post_process_entry(self, entry):
        version = entry['version']
        data_json = entry['data_json']

        if version == 2:
            data_json = decrypt_processor(data_json)
        entry['data_json'] = json_processor(data_json)

        return entry


class AuthChallengeEntryParser(CSVEntryParser):
    FIELDS = [
        'version',
        'rfctime',
        'action',
        'uid',
        'env_profile_id',
        'env_profile_pb2_base64',
        'env_json',
    ]

    FIELDS_NAMES_MAPPER = {
        'rfctime': 'timestamp',
    }

    REQUIRED_FIELDS = [
        'version',
        'timestamp',
        'action',
        'uid',
        'env_profile_id',
        'env_json',
    ]

    FIELDS_PROCESSORS = {
        'version': int_processor,
        'rfctime': rfctime_processor,
        'uid': int_processor,
        'env_json': json_processor,
    }

    FIELDS_IN_TRANSITION = [
        'comment',
    ]


class TSKVEntryParser(EntryParser):
    FIELDS = []
    FIELDS_PROCESSORS = {}
    FIELDS_NAMES_MAPPER = {}
    REQUIRED_FIELDS = []

    def __init__(self, tskv_prefix=False):
        self.tskv_prefix = tskv_prefix

    def process_line(self, line):
        """line -- строка в tskv формате"""
        entry = {}

        line_parts = line.split('\t')

        if self.tskv_prefix:
            line_parts = line_parts[1:]

        if len(line_parts) < 1:
            raise BadLogLineError('Empty line')

        for line_part in line_parts:
            if not line_part:
                continue
            key_value = line_part.split('=', 1)
            if len(key_value) <= 1:
                raise BadLogLineError('Empty value %s' % key_value[0])
            else:
                try:
                    key, value = key_value
                except ValueError:
                    raise BadLogLineError('Invalid values for line part %s' % line_part)

                if self.FIELDS and key not in self.FIELDS:
                    continue

                proccessor = self.FIELDS_PROCESSORS.get(key, default_processor)
                value = proccessor(value)
                if value is None:
                    continue
                field_name = self.FIELDS_NAMES_MAPPER.get(key, key)
                entry[field_name] = value
        if not all(field in entry for field in self.REQUIRED_FIELDS):
            raise BadLogLineError('Missing required fields')
        return entry

    def parse(self, fileobj, ignore_errors=True):
        line_num = 0
        for line in fileobj.readlines():
            line_num += 1
            if line is not None:
                line = line.rstrip('\n')

            if not line:
                continue

            try:
                yield self.process_line(line)
            except BadLogLineError as e:
                if ignore_errors:
                    log.warning('Bad log line content: %s (%s) line: %s', e, repr(line), line_num)
                    continue
                raise


class OAuthEntryParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=False):
        super(OAuthEntryParser, self).__init__(tskv_prefix)

    # see all fields for each event https://beta.wiki.yandex-team.ru/oauth/historydb/
    FIELDS = [
    ]

    FIELDS_NAMES_MAPPER = {
        'v': 'version',
        'user_agent': 'useragent',
    }

    REQUIRED_FIELDS = [
        'version',
        'timestamp',
        'action',
        'target'
    ]

    FIELDS_PROCESSORS = {
        'v': int_processor,
        'timestamp': float_processor,
        'uid': int_processor,
    }


class MailUserJournalEntryParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=True):
        super(MailUserJournalEntryParser, self).__init__(tskv_prefix)

    FIELDS = [
    ]

    FIELDS_NAMES_MAPPER = {
        'v': 'version'
    }

    REQUIRED_FIELDS = [
        'date',
        'uid',
    ]

    FIELDS_PROCESSORS = {
        'v': int_processor,
        'date': int_processor,
        'uid': int_processor,
    }


class StatboxSyntheticParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=True):
        super(StatboxSyntheticParser, self).__init__(tskv_prefix)

    FIELDS = []

    FIELDS_NAMES_MAPPER = {}

    REQUIRED_FIELDS = [
        'usec',
        'timestamp',
        'count',
    ]

    FIELDS_PROCESSORS = {
        'count': int_processor,
        'usec': int_processor,
    }


class PassportMessagesParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=True):
        super(PassportMessagesParser, self).__init__(tskv_prefix)

    FIELDS = []

    FIELDS_NAMES_MAPPER = {}

    REQUIRED_FIELDS = [
        'mode',
    ]

    FIELDS_PROCESSORS = {
        'uid': int_processor,
        'expires_at': int_processor,
    }


class PassportStatboxParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=True):
        super(PassportStatboxParser, self).__init__(tskv_prefix)

    FIELDS = []

    FIELDS_NAMES_MAPPER = {}

    REQUIRED_FIELDS = []

    FIELDS_PROCESSORS = {
        'uid': int_processor,
        'unixtime': int_processor,
    }


class AvatarsParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=True):
        super(AvatarsParser, self).__init__(tskv_prefix)

    FIELDS = []

    FIELDS_NAMES_MAPPER = {}

    REQUIRED_FIELDS = [
        'mode',
    ]

    FIELDS_PROCESSORS = {
        'uid': int_processor,
    }


class SocialBindingsParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=True):
        super(SocialBindingsParser, self).__init__(tskv_prefix)

    FIELDS = []

    FIELDS_NAMES_MAPPER = {}

    REQUIRED_FIELDS = [
        'action',
    ]

    FIELDS_PROCESSORS = {
        'master_uid': int_processor,
    }


class YasmsPrivateEntryParser(TSKVEntryParser):
    def __init__(self, tskv_prefix=True):
        super(YasmsPrivateEntryParser, self).__init__(tskv_prefix)

    FIELDS = []

    FIELDS_NAMES_MAPPER = {
        'unixtimef': 'timestamp',
        'encryptedtext': 'text',
    }

    REQUIRED_FIELDS = [
        'unixtime',
        'timestamp',
        'action',
    ]

    FIELDS_PROCESSORS = {
        'unixtime': int_processor,
        'unixtimef': float_processor,
        'encryptedtext': decrypt_processor,
    }


parsers = {
    ParserType.AUTH: AuthEntryParser(),
    ParserType.AUTH_CHALLENGE: AuthChallengeEntryParser(),
    ParserType.AVATARS: AvatarsParser(),
    ParserType.EVENT: EventEntryParser(),
    ParserType.MAIL_USER_JOURNAL: MailUserJournalEntryParser(),
    ParserType.OAUTH: OAuthEntryParser(),
    ParserType.RESTORE: RestoreEntryParser(),
    ParserType.SOCIAL_BINDINGS: SocialBindingsParser(),
    ParserType.STATBOX_SYNTHETIC: StatboxSyntheticParser(),
    ParserType.PASSPORT_MESSAGES: PassportMessagesParser(),
    ParserType.PASSPORT_STATBOX: PassportStatboxParser(),
    ParserType.PASSPORT_TOLOKA: JSONEntryParser(),
    ParserType.YASMS_PRIVATE: YasmsPrivateEntryParser(),
}


def get_parser_type(logfilename):
    result_type = ParserType.by_filename.get(logfilename)
    if result_type is None:
        if logfilename.startswith('blackbox-auth-'):
            return ParserType.AUTH
    return result_type


def get_parser_by_filename(logfilename):
    parser_type = get_parser_type(logfilename)
    return get_parser(parser_type)


def get_parser(name):
    return parsers.get(name)
