package ru.yandex.parser.html.generated;

import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import ru.yandex.parser.html.HtmlProcessorBase;

public class HtmlProcessor extends HtmlProcessorBase {
    %% machine HtmlProcessor;
    %% write data;

    private int cs = 0;

    public HtmlProcessor(final ContentHandler handler) {
        super(handler);
        %% write init;
    }

    @SuppressWarnings("fallthrough")
    @Override
    public void process(final char[] data, final int off, final int len)
        throws SAXException
    {
        int p = off;
        int eof = data == null ? off : -1;
        int pe = data == null ? off : (off + len);
        %%{
            action bad_char_ref {
                addCharEntity(fc);
            }

            action recover_token {
                clearBuffer();
                fhold;
                fgoto recover_token;
            }

            recover_char = [\r\n<>];
            recover_token := ^recover_char* (recover_char @{fgoto body;});

            name_char = alnum | [._:\-];
            name = (alpha | [_:]) name_char*;

            # i don't care about xml version, encoding, standalone flag etc.
            xmldecl = "<?xml" space+ [^>]* ">";

            system_id = '"' [^"]* '"' | "'" [^']* "'";
            public_id_char = alnum | [ \t\-()+,./:?;!*\#@$_%];
            public_id = '"' (public_id_char | "'")* '"'
                | "'" (public_id_char | '"')* "'";
            external_id = ("SYSTEM" | "PUBLIC" space* public_id)
                (space+ system_id)?;
            doctype =
                "<!DOCTYPE" space+ name
                (space+ external_id)? space*
                ("[" [^\]]* "]" space*)? ">";

            comment = "<!--" (any* - (any* "-->" any*)) "-->";
            cdata_body =
                start: (
                    "]" -> one |
                    [^\]] @{addCharNoResize(fc);} -> start),
                one: (
                    "]" -> two |
                    [^\]] @{addCharNoResize(']', fc);} -> start),
                two: (
                    ">" -> final |
                    "]" @{addCharNoResize(']');} -> two |
                    [^\]>] @{addCharNoResize(']', ']', fc);} -> start);
            cdata = "<![CDATA[" cdata_body;

            char_ref_body = "&" (alnum | [.#])*;
            char_ref = char_ref_body >{clearEntityBuffer();} ${addEntChar(fc);}
                (";" ${charRef(fc);} | (any - (alnum | [.#;])) >bad_char_ref);

            tag_name = name ${addChar(fc);} %{storeName();};
            tag_attr_name = name_char+ ${addChar(fc);} %{storeAttrName();};
            double_quoted = '"' ([^"] ${addChar(fc);})* '"';
            single_quoted = "'" ([^'] ${addChar(fc);})* "'";
            unquoted = ^(['"=<>`]|space)+ ${addChar(fc);};
            tag_attr_value =
                (double_quoted | single_quoted | unquoted) %{storeTagValue();};
            tag_attr = tag_attr_name space* "=" space* tag_attr_value;
            tag_body = tag_name (space+ tag_attr)* space*;
            tag_end = ("/>" @{emptyTag();} | ">" @{tagStart();});
            tag = "<">{clearAttributes();} tag_body tag_end;
            end_tag = "</" tag_name ">" @{tagEnd();};
            text = (char_ref | ([^<&] ${addCharNoResize(fc);}))+;
            tags = (comment | cdata | tag | end_tag) >{barrier();};
            token = tags | text;
            comments = space* (comment space*)*;
            body := token* %{barrier();} %{documentEnd();} $err(recover_token);
            main :=
                ((doctype | xmldecl)? comments token*) >{documentStart();}
                %{barrier();} %{documentEnd();} $err(recover_token);

            write exec;
        }%%
    }
}

