#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import sys
import socket
import re
import time
import urlparse

from logutils.queue import QueueHandler, QueueListener

from google.protobuf.message import DecodeError

# for version >= 2.3
from google.protobuf.internal.encoder import _VarintBytes as Encoder
from google.protobuf.internal.decoder import _DecodeVarint32 as Decoder

from errors import IndexerException, IncorrectDocumentException, \
    ClosedConnection
import suggest_message as smessage
import rtyserver_pb2 as rtyserver


LOG_RE = re.compile("""^.*?\[(?P<datetime>.*?)\s(?:[\+\-]\d+)\].*?/+\?*(?P<request>.*) HTTP/\d\.\d" (?P<status>\d+) (?P<len>(\d+|\-)).* (?P<search_results_count>[\"\']?[\d|\-]+[\"\']?) (?P<session_id>[\"\']?.+[\"\']?).*$""")
#old log example:
#95.108.253.233 - - [08/08/2013:14:34:25 +0400] "GET /?user=42924016&db=mdb200&wizard=on&spcctx=doc&how=tm&np=1&utf8=1&format=json&getfields=mid&remote_ip=109.188.127.75&&text=%D0%B8%D0%BD%D1%84%D0%BE%D0%BC%D0%B0%D0%B9%D0%BD&length=10000 HTTP/1.1" 200 13506 143460 "227" "-"
#new log example:
#2a02:6b8:0:1402:0:0:0:401 - - [30/Oct/2014:16:31:18 +0300] "GET /api/mail/search?connection_id=5846e44b718fccf71cf080a71c64e8&fid=2500000450029314141&mdb=mdb150&page=1&per_page=15&remote_ip=84.201.165.51&request=%D0%92%D0%B0%D1%81%D1%8F&suid=797144819 HTTP/1.1" 200 7383 157 HSD312 4
EMAIL_RE = re.compile(
    r"([-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*"  # dot-atom
    r'|"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"'    # quoted-string
    r')@((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.?)+[A-Z]{2,6}\.?)'    # domain
    r'|\[(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\]', re.IGNORECASE)  # literal

SEND_TYPES = {
    'modify': rtyserver.TMessage.MODIFY_DOCUMENT,
    'add': rtyserver.TMessage.ADD_DOCUMENT,
    'delete': rtyserver.TMessage.DELETE_DOCUMENT,
    'reopen': rtyserver.TMessage.REOPEN_INDEXERS,
}
MESSAGE_ID = 1

LOG = logging.getLogger('suggest.utils')


def encode_varint(output_data):
    return Encoder(output_data)


def decode_varint(input_data):
    try:
        length, position = Decoder(input_data, 0)
        return position, length
    except DecodeError:
        return None, None


class WaitQueueHandler(QueueHandler):
    def enqueue(self, record):
        self.queue.put(record)


class WaitQueueListener(QueueListener):
    def stop(self):
        self._stop.set()
        self.queue.put(self._sentinel)
        self._thread.join()
        self._thread = None


def setup_log_handler(queue, level):
    queue_handler = WaitQueueHandler(queue)
    root = logging.root
    [root.removeHandler(h) for h in root.handlers]
    root.addHandler(queue_handler)
    root.setLevel(level)


def setup_log_listener_writer(queue, conf):
    """
        invoke in main process
        for start using @return.start()
        before exit @return.stop()
    """
    if conf.get('log'):
        log_writer = logging.FileHandler(conf['log'])
    else:
        log_writer = logging.StreamHandler(sys.stdout)
    queue_listener = WaitQueueListener(queue, log_writer)

    format = '[%(process)s %(thread)d] %(asctime)s %(levelname)s %(name)s: %(message)s'

    f = logging.Formatter(format)
    log_writer.setFormatter(f)
    return queue_listener


def check_response(response):
    if response:
        if response.Status == rtyserver.TReply.OK:
            return
        elif response.Status == rtyserver.TReply.NOTNOW:
            raise IndexerException('failed indexing message with response.status=NOTNOW,'
                ' rtyserver is busy')
        elif response.Status == rtyserver.TReply.INCORRECT_DOCUMENT:
            raise IncorrectDocumentException('failed indexing message,'
                ' because message is bad: not document, bad service or not keyprefix')
        elif response.Status == rtyserver.TReply.SEND_FAILED:
            raise IndexerException('failed indexing message'
                ' with response.status=SEND_FAILED, because failed to send message to rtyserver ')
        elif response.Status == rtyserver.TReply.READ_FAILED:
            raise IndexerException('failed indexing message with'
                ' response.status=READ_FAILED, because unable to read response from rtyserver')
        elif response.Status == rtyserver.TReply.STORE_FAILED:
            raise IndexerException('failed indexing message with'
                ' response.status=STORE_FAILED, because, failed to send message to rtyserver')
        elif response.MessageId != MESSAGE_ID:
            raise IndexerException('failed indexing message with response.status=%s,'
                ' response.message_id=%s and response %s' % (response.Status, response.MessageId, response.StatusMessage))
        else:
            raise IndexerException('failed indexing message with response.status=%s' % (response.Status,))
    else:
        raise IndexerException('failed indexing message, because empty response')


def send_message(sock, suggest_message, with_response=True, host=None):
    if host is not None:
        LOG.debug('send message to host %s: %s', host, suggest_message)
    else:
        LOG.debug('send message: %s', suggest_message)

    if with_response:
        sock.sendall(suggest_message.create_proto_message(with_response=with_response))

        byte_stream = sock.recv(1024)
        response = rtyserver.TReply()

        if not byte_stream:
            raise ClosedConnection("Empty response for message (keyprefix=%s, url=%s)" % \
                (suggest_message.keyprefix, suggest_message.url))

        start, length = decode_varint(byte_stream)
        response.ParseFromString(byte_stream[start: start + length])

        check_response(response)
    else:
        sock.sendall(suggest_message.create_proto_message(with_response=with_response))


def _get_sock(host, port, timeout):
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(timeout)
        sock.connect((host, port))
        return sock
    except Exception, e:
        sock.close()
        LOG.warn('Error in socket: %s', e, exc_info=sys.exc_info())
        return None


def get_sock(host, port, timeout):
    while True:
        sock = _get_sock(host, port, timeout)
        if sock is not None:
            return sock
        else:
            LOG.info('failed to connect %s:%s, sleep 2 seconds', host, port)
            time.sleep(2)


def send_reopen_indexers(sock):
    message = rtyserver.TMessage()
    message.MessageType = SEND_TYPES.get('reopen')
    sock.sendall(encode_varint(message.ByteSize()) + message.SerializeToString())


def parse_log(obj):
    '''
    всё что в формате json имеет размер пустого ответа в 31 байт
    остальное имеет размер в 9 байт
    '''

    in_files = 0
    for line in obj:
        in_files += 1
        m = LOG_RE.match(line)

        if m:
            try:
                result = m.groupdict()
                #summary for filter: +200 +request +user +text>=3 -otrs -imap -scope
                # запросы с кодом ответа не 200 не интересны
                status = int(result.get('status'))
                if status == 200:
                    request = result.get('request').strip()
                    if '?' in request and request.find('?') < request.find('&'):
                        request = request.split('?', 1)[1]
#message_type
                    if request:
                        response_length = int(result.get('len'))
                        query = dict(urlparse.parse_qsl(request))

                        date_raw = result.get('datetime')
                        timestamp = int(time.time())
                        if date_raw:
                            try:
                                date_t = time.strptime(date_raw, '%d/%m/%Y:%H:%M:%S')
                                timestamp = int(time.mktime(date_t))
                            except ValueError:
                                try:
                                    date_t = time.strptime(date_raw, '%d/%b/%Y:%H:%M:%S')
                                    timestamp = int(time.mktime(date_t))
                                except ValueError, ex:
                                    LOG.error("Error while parsing date: %s", ex)

                        uid = query.pop('user', 0) or query.pop('suid', 0)
                        if uid:
                            try:
                                uid = int(uid)
                            except ValueError:
                                continue

                            text = query.pop('text', '') or query.pop('request', '')
                            if len(text) < 3:
                                continue
                            try:
                                text = unicode(urlparse.unquote(text), 'utf-8')
                                text = text.strip()
                                if not text:
                                    continue
                            except UnicodeDecodeError:
                                LOG.warn('bad text str %s for message %s', text, uid)
                                continue

                            query['db'] = query.get('db') or query.pop('mdb', '')

                            # запросы с поиском email и от OTRS и imap и scope не интересны
                            if query.get('db', '') != 'OTRS' and query.get('imap') is None and query.get('scope') is None:
                                offset = int(query.get('offset', 0))
                                length = query.get('length')
                                if offset == 0 or not (offset != 0 and ((length is not None and (offset % int(length) == 0)) or length is None)):
                                    if query.get('format', '').strip() == 'json':
                                        try:
                                            search_results_count = int(result.get('search_results_count').strip("""\'\""""))
                                            if search_results_count <= 0:
                                                send_type = 'delete'
                                            else:
                                                send_type = 'modify'
                                        except ValueError:
                                            if response_length < 31:
                                                send_type = 'delete'
                                            else:
                                                send_type = 'modify'
                                    else:
                                        if response_length <= 9:
                                            send_type = 'delete'
                                        else:
                                            send_type = 'modify'

                                    emails = [m.group() for m in EMAIL_RE.finditer(text)]
                                    #try to define if text with emails only or text+emails;
                                    #emails only --> +emails, -orig_text
                                    #emails+text --> -emails, +orig_text
                                    text_nomails = text
                                    for em in emails:
                                        text_nomails = text_nomails.replace(em, '')
                                    if len(text_nomails.replace(' ', '')) > len(emails)+2:
                                        emails = []

                                    yield smessage.SuggestMessage(
                                        keyprefix=uid,
                                        text=text,
                                        query=query,
                                        send_type=send_type,
                                        timestamp=timestamp,
                                        emails = emails)
            except Exception, e:
                LOG.error('some error where parsing %s: %s', line, e, exc_info=sys.exc_info())

        else:
            LOG.error('Bad log-string %s', line)
    LOG.debug('count line in file %s', in_files)
