package ru.yandex.util.storage;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Locale;

import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.entity.ContentType;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.stream.EntityState;
import org.apache.james.mime4j.stream.Field;
import org.apache.james.mime4j.stream.MimeTokenStream;
import org.apache.james.mime4j.stream.ParserCursor;
import org.apache.james.mime4j.stream.RawBody;
import org.apache.james.mime4j.stream.RawFieldParser;
import org.apache.james.mime4j.stream.RecursionMode;
import org.apache.james.mime4j.util.ByteArrayBuffer;

import ru.yandex.charset.Decoder;
import ru.yandex.charset.Encoder;
import ru.yandex.function.ByteArrayProcessable;
import ru.yandex.function.Processable;
import ru.yandex.http.util.UnsupportedMediaTypeException;
import ru.yandex.io.BufferedPushbackInputStream;
import ru.yandex.io.DecodableByteArrayOutputStream;
import ru.yandex.io.IOStreamUtils;
import ru.yandex.io.LimitedIOException;
import ru.yandex.io.LimitedOutputStream;
import ru.yandex.mail.mime.BodyDecoder;
import ru.yandex.mail.mime.DefaultMimeConfig;
import ru.yandex.mail.mime.OverwritingBodyDescriptorBuilder;
import ru.yandex.mail.mime.Utf8FieldBuilder;
import ru.yandex.util.string.StringUtils;

public class MailTextExtractor implements DataExtractor {
    public static final MailTextExtractor INSTANCE = new MailTextExtractor();

    protected MailTextExtractor() {
    }

    private static final byte[] SKIP_MARK =
        "\n</message>\n".getBytes(StandardCharsets.UTF_8);

    public static RawBody parse(final String body) {
        byte[] buf = body.getBytes(StandardCharsets.UTF_8);
        return RawFieldParser.DEFAULT.parseRawBody(
            new ByteArrayBuffer(buf, true),
            new ParserCursor(0, buf.length));
    }

    private static StorageData extractData(
        final MimeTokenStream tokenStream,
        final ContentType contentType,
        final ImmutableDataExtractorConfig config)
        throws IOException
    {
        try (InputStream in = tokenStream.getInputStream();
            InputStream decoded = BodyDecoder.INSTANCE.apply(
                in,
                tokenStream.getBodyDescriptor().getTransferEncoding()))
        {
            DecodableByteArrayOutputStream out =
                new DecodableByteArrayOutputStream();
            OutputStream limiter;
            int maxInputLength = config.maxInputLength();
            if (maxInputLength < Integer.MAX_VALUE) {
                limiter = new LimitedOutputStream(out, maxInputLength);
            } else {
                limiter = out;
            }
            try {
                IOStreamUtils.copy(decoded, limiter);
            } catch (LimitedIOException e) {
                if (!config.truncateLongInput()) {
                    return null;
                }
            }
            Charset targetCharset = config.charset();
            String charsetName;
            if (targetCharset == null) {
                // We don't care about input charset
                charsetName = null;
            } else {
                charsetName = contentType.getParameter("charset");
            }
            if (charsetName == null) {
                return new StorageData(contentType, out);
            } else {
                Charset charset;
                try {
                    charset = Charset.forName(charsetName);
                } catch (IllegalCharsetNameException
                    | UnsupportedCharsetException e)
                {
                    return null;
                }
                Processable<byte[]> data;
                if (targetCharset.equals(charset)) {
                    return new StorageData(contentType, out);
                } else {
                    Decoder decoder = new Decoder(
                        charset.newDecoder()
                            .onMalformedInput(CodingErrorAction.REPLACE)
                            .onUnmappableCharacter(CodingErrorAction.REPLACE));
                    out.processWith(decoder);
                    Encoder encoder = new Encoder(
                        targetCharset.newEncoder()
                            .onMalformedInput(CodingErrorAction.REPLACE)
                            .onUnmappableCharacter(CodingErrorAction.REPLACE));
                    decoder.processWith(encoder);
                    data = encoder;
                    return new StorageData(
                        contentType.withCharset(targetCharset),
                        data);
                }
            }
        }
    }

    // name expected to be already lowercased
    protected void processField(final String name, final Field field)
        throws IOException, MimeException
    {
    }

    protected boolean findFirstHtml() {
        return false;
    }

    protected boolean findHeadersOnly() {
        return false;
    }

    // Input won't contain lenulca `<message>...</message>' prefix
    public StorageData extractDataFromEml(
        final InputStream input,
        final ImmutableDataExtractorConfig config)
        throws IOException, MimeException
    {
        StorageData data = null;
        String contentTypeString = null;
        String contentDisposition = null;

        MimeTokenStream tokenStream = new MimeTokenStream(
            DefaultMimeConfig.INSTANCE,
            null,
            new Utf8FieldBuilder(),
            new OverwritingBodyDescriptorBuilder());
        tokenStream.setRecursionMode(RecursionMode.M_NO_RECURSE);
        tokenStream.parse(input);

        EntityState state = tokenStream.getState();
        while (state != EntityState.T_END_OF_STREAM) {
            switch (state) {
                case T_END_HEADER:
                    if (findHeadersOnly()) {
                        return new StorageData(
                            ContentType.APPLICATION_OCTET_STREAM,
                            new ByteArrayProcessable());
                    }
                    break;
                case T_START_HEADER:
                    contentTypeString = null;
                    contentDisposition = null;
                    break;
                case T_FIELD:
                    Field field = tokenStream.getField();
                    String name = field.getName().toLowerCase(Locale.ROOT);
                    switch (name) {
                        case "content-type":
                            contentTypeString = field.getBody();
                            break;
                        case "content-disposition":
                            contentDisposition = field.getBody();
                            break;
                        default:
                            processField(name, field);
                            break;
                    }
                    break;
                case T_BODY:
                    if (contentTypeString != null) {
                        int semicolon = contentTypeString.indexOf(';');
                        String mimetype;
                        if (semicolon == -1) {
                            contentTypeString =
                                contentTypeString.toLowerCase(Locale.ROOT);
                            mimetype = contentTypeString;
                        } else {
                            mimetype =
                                contentTypeString.substring(0, semicolon);
                            String lowercaseMimetype =
                                mimetype.toLowerCase(Locale.ROOT);
                            if (!mimetype.equals(lowercaseMimetype)) {
                                contentTypeString = StringUtils.concat(
                                    lowercaseMimetype,
                                    contentTypeString.substring(semicolon));
                                mimetype = lowercaseMimetype;
                            }
                        }
                        ContentType contentType;
                        try {
                            contentType = ContentType.parse(contentTypeString);
                            boolean isHtml = "text/html".equals(mimetype);
                            if ((isHtml || "text/plain".equals(mimetype))
                                && contentType.getParameter("name") == null
                                && (contentDisposition == null
                                    || "inline".equals(
                                        parse(contentDisposition).getValue())))
                            {
                                StorageData extracted = extractData(
                                    tokenStream,
                                    contentType,
                                    config);
                                if (extracted != null) {
                                    data = extracted;
                                    if (isHtml && findFirstHtml()) {
                                        return data;
                                    }
                                }
                            }
                        } catch (RuntimeException e) {
                            // Just ignore invalid content type
                        }
                    }
                    break;
                default:
                    break;
            }
            state = tokenStream.next();
        }
        return data;
    }

    @Override
    public StorageData extractData(
        final InputStream in,
        final long contentLength,
        final ImmutableDataExtractorConfig config)
        throws HttpException, IOException
    {
        StorageData data = null;
        try (BufferedPushbackInputStream input =
                new BufferedPushbackInputStream(in))
        {
            int first = input.read();
            if (first == -1) {
                throw new EOFException("Empty stream");
            }
            if (first == '<') {
                IOStreamUtils.skipTo(input, SKIP_MARK);
            } else {
                input.unread(first);
            }
            data = extractDataFromEml(input, config);
        } catch (EOFException | MimeException e) {
            throw new UnsupportedMediaTypeException(e);
        }
        return data;
    }

    @Override
    public StorageData extractData(
        final HttpResponse response,
        final ImmutableDataExtractorConfig config)
        throws HttpException, IOException
    {
        return extractData(response.getEntity().getContent(), -1, config);
    }
}

