package ru.yandex.parser.mail.received;

import java.util.Arrays;
import java.util.function.Consumer;

import ru.yandex.function.NullConsumer;
import ru.yandex.parser.mail.errors.ErrorInfo;

public class ReceivedParser {
    %% machine ReceivedParser;
    %% write data;

    private static final int INITIAL_STACK_LENGTH = 16;
    private static final char[] EMPTY_BUF = new char[0];

    private int[] stack = new int[INITIAL_STACK_LENGTH];
    private char[] buf = EMPTY_BUF;

    public ReceivedParser() {
    }

    public ReceivedInfo parse(final String received) {
        return parse(NullConsumer.INSTANCE, received);
    }

    public ReceivedInfo parse(
        final Consumer<? super ErrorInfo> errorsConsumer,
        final String received)
    {
        int len = received.length();
        if (len > buf.length) {
            buf = new char[Math.max(len, buf.length << 1)];
        }
        received.getChars(0, len, buf, 0);
        return parse(errorsConsumer, buf, 0, len);
    }

    public ReceivedInfo parse(final char[] data) {
        return parse(NullConsumer.INSTANCE, data);
    }

    public ReceivedInfo parse(
        final Consumer<? super ErrorInfo> errorsConsumer,
        final char[] data)
    {
        return parse(errorsConsumer, data, 0, data.length);
    }

    @SuppressWarnings("fallthrough")
    public ReceivedInfo parse(
        final Consumer<? super ErrorInfo> errorsConsumer,
        final char[] data,
        final int off,
        final int len)
    {
        int p = off;
        int eof = off + len;
        int pe = eof;
        int cs;
        int top;
        ReceivedInfo receivedInfo = null;
        ReceivedInfoBuilder builder =
            new ReceivedInfoBuilder(errorsConsumer, data);

        %%{
            write init;

            action nested_comment {
                if (top == stack.length) {
                    stack = Arrays.copyOf(stack, top << 1);
                }
                fcall nested_comment;
            }

            action comment_end {
                fret;
            }

            action extended_domain_domain_start {
                builder.extendedDomainDomainStart(p);
            }

            action extended_domain_domain_end {
                builder.extendedDomainDomainEnd(p);
            }

            action extended_domain_addr_start {
                builder.extendedDomainAddressStart(p);
            }

            action extended_domain_addr_end {
                builder.extendedDomainAddressEnd(p);
            }

            action tcp_info_domain_start {
                builder.tcpInfoDomainStart(p);
            }

            action tcp_info_domain_end {
                builder.tcpInfoDomainEnd(p);
            }

            action tcp_info_addr_start {
                builder.tcpInfoAddressStart(p);
            }

            action tcp_info_addr_end {
                builder.tcpInfoAddressEnd(p);
            }

            action commit_from {
                builder.commitFrom();
            }

            action start_by {
                builder.startBy();
            }

            action commit_by {
                builder.commitBy();
            }

            action by_comment_start {
                builder.byCommentStart(p);
            }

            action by_comment_end {
                builder.byCommentEnd(p);
            }

            action protocol_start {
                builder.protocolStart(p);
            }

            action protocol_end {
                builder.protocolEnd(p);
            }

            action id_start {
                builder.idStart(p);
            }

            action id_end {
                builder.idEnd(p);
            }

            action recipient_start {
                builder.recipientStart(p);
            }

            action recipient_end {
                builder.recipientEnd(p);
            }

            action timestamp_start {
                builder.timestampStart(p);
            }

            action timestamp_end {
                builder.timestampEnd(p);
            }

            action end {
                if (receivedInfo == null) {
                    receivedInfo = builder.build();
                }
            }

            atext = alnum | [!#$%&'*+/=?^_`{|}~] | "-";
            atom = atext+;
            id_atom = (atext | [@.<>])+;
            ctext = graph - [()\\];
            quoted_pair = "\\" any;
            quoted_text = " " | "!" | [#-\[] | [\]-~];
            comment_start = "(" @nested_comment;
            ccontent = ctext | quoted_pair | comment_start;
            comment = (space* ccontent)+ space*;
            nested_comment := comment ")" @{fret;};
            cfws = (space* "(" comment ")")+ space* | space+;

            dec_octet =
                digit
                | [1-9] digit
                | "1" digit digit
                | "2" ([0-4] digit | "5" [0-5]);
            ipv4_addr = dec_octet ("." dec_octet){3};

            ipv6_hex = xdigit{1,4};
            ipv6_full = ipv6_hex (":" ipv6_hex){7};
            ipv6_comp =
                "::"
                | ":" (":" ipv6_hex){1,6}
                | ipv6_hex "::" (ipv6_hex (":" ipv6_hex){,4})?
                | ipv6_hex ":" ipv6_hex "::" (ipv6_hex (":" ipv6_hex){,3})?
                | ipv6_hex (":" ipv6_hex){2} "::" (ipv6_hex (":" ipv6_hex){,2})?
                | ipv6_hex (":" ipv6_hex){3} "::" (ipv6_hex (":" ipv6_hex)?)?
                | ipv6_hex (":" ipv6_hex){4} "::" ipv6_hex?;
            ipv6v4_full = ipv6_hex (":" ipv6_hex){5} ":" ipv4_addr;
            ipv6v4_comp =
                "::" (ipv6_hex ":"){,4} ipv4_addr
                | ipv6_hex "::" (ipv6_hex ":"){,3} ipv4_addr
                | ipv6_hex ":" ipv6_hex "::" (ipv6_hex ":"){,2} ipv4_addr
                | ipv6_hex (":" ipv6_hex){2} "::" (ipv6_hex ":")? ipv4_addr
                | ipv6_hex (":" ipv6_hex){3} "::" ipv4_addr;
            ipv6_addr = ipv6_full | ipv6_comp | ipv6v4_full | ipv6v4_comp;

            dcontent = [!-Z^-~];
            ldh_str = (alnum | "-")* alnum;
            general_addr = ldh_str ":" dcontent+;

            addr_literal =
                "[" ("ipv6:"i? ipv6_addr | ipv4_addr | general_addr) "]"
                | ipv6_addr | ipv4_addr;

            subdomain = alnum ldh_str?;
            domain = subdomain ("." subdomain)*;

            dot_string = atom ("." atom)*;
            quoted_context = quoted_text | quoted_pair;
            quoted_string = ["] quoted_context* ["];
            local_part = dot_string | quoted_string;
            at_domain = "@" domain;
            a_d_l = at_domain ("," at_domain)*;
            mailbox = local_part "@" (domain | addr_literal);
            path = "<" (a_d_l ":")? mailbox ">";
            string = atom | quoted_string;
            dtext = [!-Z^-~] | quoted_pair;
            msg_id_left = dot_string | quoted_string;
            msg_id_right = dot_string | "[" dtext* "]";
            msg_id = "<" msg_id_left "@" msg_id_right ">";

            day_name =
                "mon"i | "tue"i | "wed"i | "thu"i | "fri"i | "sat"i | "sun"i;
            day = space* digit digit? space+;
            month =
                "jan"i | "feb"i | "mar"i | "apr"i | "may"i | "jun"i
                | "jul"i | "aug"i | "sep"i | "oct"i | "nov"i | "dec"i;
            year = space+ digit{4} space+;
            time_of_day = digit digit ":" digit digit (":" digit digit)?;
            timezone = space+ ("+" | "-") digit{2,4} ":"? digit digit;
            date = day month year;
            time = time_of_day timezone;
            timestamp =
                (space* day_name ",")?
                (date time) >timestamp_start %timestamp_end cfws?;

            tcp_info =
                (domain >tcp_info_domain_start %tcp_info_domain_end space+)?
                addr_literal >tcp_info_addr_start %tcp_info_addr_end;

            addr_domain =
                addr_literal
                >extended_domain_addr_start %extended_domain_addr_end
                space+ "(" tcp_info ")"
                | (ipv6_addr | ipv4_addr)
                >extended_domain_addr_start %extended_domain_addr_end;

            extended_domain =
                domain
                >extended_domain_domain_start %extended_domain_domain_end
                (space+ "(" tcp_info ")")?
                |
                addr_domain;

            extended_from_domain =
                (((alnum | [+_.\-])+ "@")? domain)
                >extended_domain_domain_start %extended_domain_domain_end
                (space+ "(" tcp_info ")")?
                |
                addr_domain;

            from_domain = "from"i space+ extended_from_domain %commit_from;
            by_domain = "by"i %start_by space+ extended_domain %commit_by;
            by_comment = cfws >by_comment_start %by_comment_end;
            # MS Exchange violates RFC and uses plain strings with spaces
            via_atom =
                atom | ("Mailbox Transport" | "Frontend Transport") cfws?;
            via = by_comment "via"i space+ via_atom;
            with_atom = atom | "Microsoft SMTP Server" cfws?;
            with = by_comment "with"i space+ with_atom >protocol_start %protocol_end;
            # NWSMTP violates https://tools.ietf.org/html/rfc5321#section-4.4
            # which declares ID to be either atom or msg-id and atom couldn't
            # contain dots, but still there is dots and angle brackets in ID
            id = by_comment "id"i space+ cfws? (id_atom | msg_id) >id_start %id_end;
            recipient = by_comment "for"i space+ (path | mailbox)
                >recipient_start %recipient_end;
            additional = by_comment atom space+ string;
            # https://tools.ietf.org/html/rfc822#section-4.1 allowed multiple
            # WITH clauses, so allow it here
            # Also, MS Exchange never heard about elements order in RFC
            opt_info = (via? with* id?|with* id? via?) recipient? additional*;

            # NWSMTP violates https://tools.ietf.org/html/rfc5321#section-4.4
            # which declares FROM clause as mandatory field
            # made it optional here
            main :=
                space* (from_domain cfws)? by_domain opt_info by_comment? ";" space+ timestamp %end;

            write exec;
        }%%;

        if (receivedInfo == null) {
            errorsConsumer.accept(
                new ErrorInfo(
                    ErrorInfo.Scope.RECEIVED,
                    ErrorInfo.Type.SYNTAX_ERROR,
                    "Failed to parse Received: <"
                    + new String(data, off, len) + '>'));
        }

        return receivedInfo;
    }
}

