package ru.yandex.url.processor;

import java.util.function.Consumer;

public class UrlProcessor extends UrlProcessorBase {
    %% machine UrlProcessor;
    %% write data;

    private int cs;

    public UrlProcessor(final Consumer<UrlInfo> collector) {
        this(collector, ImmutableUrlProcessorConfig.DEFAULT_CONFIG);
    }

    public UrlProcessor(
        final Consumer<UrlInfo> collector,
        final ImmutableUrlProcessorConfig config)
    {
        super(collector, config);
        reset();
    }

    @Override
    public void reset() {
        super.reset();
        %% write init;
    }

    @Override
    @SuppressWarnings("fallthrough")
    protected void processInternal(
        final char[] data,
        final int off,
        final int len)
    {
        int p = off;
        int eof = data == null? off : -1;
        int pe = off + len;
        %%{
            action add_char {
                accum.append(fc);
            }

            action scheme {
                accum.scheme();
            }

            action www_prefix {
                accum.wwwPrefix();
            }

            action idn_prefix {
                accum.idnPrefix();
            }

            action userinfo {
                accum.userinfo();
            }

            action add {
                add(false);
            }

            action clear {
                clear(p);
            }

            action recover_from_err {
                if (accum.canBeUrl()) {
                    add(true);
                }
                fhold;
                fgoto recover_from_err;
            }

            # made with mematic^W junk/dpotapov/ragel_class_generator/Main.java
            unialpha =
                0x41..0x5a | 0x61..0x7a | 0xaa | 0xb5 | 0xba | 0xc0..0xd6
                | 0xd8..0xf6 | 0xf8..0x2c1 | 0x2c6..0x2d1 | 0x2e0..0x2e4
                | 0x2ec | 0x2ee | 0x345 | 0x370..0x374 | 0x376..0x377
                | 0x37a..0x37d | 0x37f | 0x386 | 0x388..0x38a | 0x38c
                | 0x38e..0x3a1 | 0x3a3..0x3f5 | 0x3f7..0x481 | 0x48a..0x52f
                | 0x531..0x556 | 0x559 | 0x561..0x587 | 0x5b0..0x5bd | 0x5bf
                | 0x5c1..0x5c2 | 0x5c4..0x5c5 | 0x5c7 | 0x5d0..0x5ea
                | 0x5f0..0x5f2 | 0x610..0x61a | 0x620..0x657 | 0x659..0x65f
                | 0x66e..0x6d3 | 0x6d5..0x6dc | 0x6e1..0x6e8 | 0x6ed..0x6ef
                | 0x6fa..0x6fc | 0x6ff | 0x710..0x73f | 0x74d..0x7b1
                | 0x7ca..0x7ea | 0x7f4..0x7f5 | 0x7fa | 0x800..0x817
                | 0x81a..0x82c | 0x840..0x858 | 0x860..0x86a | 0x8a0..0x8b4
                | 0x8b6..0x8bd | 0x8d4..0x8df | 0x8e3..0x8e9 | 0x8f0..0x93b
                | 0x93d..0x94c | 0x94e..0x950 | 0x955..0x963 | 0x971..0x983
                | 0x985..0x98c | 0x98f..0x990 | 0x993..0x9a8 | 0x9aa..0x9b0
                | 0x9b2 | 0x9b6..0x9b9 | 0x9bd..0x9c4 | 0x9c7..0x9c8
                | 0x9cb..0x9cc | 0x9ce | 0x9d7 | 0x9dc..0x9dd | 0x9df..0x9e3
                | 0x9f0..0x9f1 | 0x9fc | 0xa01..0xa03 | 0xa05..0xa0a
                | 0xa0f..0xa10 | 0xa13..0xa28 | 0xa2a..0xa30 | 0xa32..0xa33
                | 0xa35..0xa36 | 0xa38..0xa39 | 0xa3e..0xa42 | 0xa47..0xa48
                | 0xa4b..0xa4c | 0xa51 | 0xa59..0xa5c | 0xa5e | 0xa70..0xa75
                | 0xa81..0xa83 | 0xa85..0xa8d | 0xa8f..0xa91 | 0xa93..0xaa8
                | 0xaaa..0xab0 | 0xab2..0xab3 | 0xab5..0xab9 | 0xabd..0xac5
                | 0xac7..0xac9 | 0xacb..0xacc | 0xad0 | 0xae0..0xae3
                | 0xaf9..0xafc | 0xb01..0xb03 | 0xb05..0xb0c | 0xb0f..0xb10
                | 0xb13..0xb28 | 0xb2a..0xb30 | 0xb32..0xb33 | 0xb35..0xb39
                | 0xb3d..0xb44 | 0xb47..0xb48 | 0xb4b..0xb4c | 0xb56..0xb57
                | 0xb5c..0xb5d | 0xb5f..0xb63 | 0xb71 | 0xb82..0xb83
                | 0xb85..0xb8a | 0xb8e..0xb90 | 0xb92..0xb95 | 0xb99..0xb9a
                | 0xb9c | 0xb9e..0xb9f | 0xba3..0xba4 | 0xba8..0xbaa
                | 0xbae..0xbb9 | 0xbbe..0xbc2 | 0xbc6..0xbc8 | 0xbca..0xbcc
                | 0xbd0 | 0xbd7 | 0xc00..0xc03 | 0xc05..0xc0c | 0xc0e..0xc10
                | 0xc12..0xc28 | 0xc2a..0xc39 | 0xc3d..0xc44 | 0xc46..0xc48
                | 0xc4a..0xc4c | 0xc55..0xc56 | 0xc58..0xc5a | 0xc60..0xc63
                | 0xc80..0xc83 | 0xc85..0xc8c | 0xc8e..0xc90 | 0xc92..0xca8
                | 0xcaa..0xcb3 | 0xcb5..0xcb9 | 0xcbd..0xcc4 | 0xcc6..0xcc8
                | 0xcca..0xccc | 0xcd5..0xcd6 | 0xcde | 0xce0..0xce3
                | 0xcf1..0xcf2 | 0xd00..0xd03 | 0xd05..0xd0c | 0xd0e..0xd10
                | 0xd12..0xd3a | 0xd3d..0xd44 | 0xd46..0xd48 | 0xd4a..0xd4c
                | 0xd4e | 0xd54..0xd57 | 0xd5f..0xd63 | 0xd7a..0xd7f
                | 0xd82..0xd83 | 0xd85..0xd96 | 0xd9a..0xdb1 | 0xdb3..0xdbb
                | 0xdbd | 0xdc0..0xdc6 | 0xdcf..0xdd4 | 0xdd6 | 0xdd8..0xddf
                | 0xdf2..0xdf3 | 0xe01..0xe3a | 0xe40..0xe46 | 0xe4d
                | 0xe81..0xe82 | 0xe84 | 0xe87..0xe88 | 0xe8a | 0xe8d
                | 0xe94..0xe97 | 0xe99..0xe9f | 0xea1..0xea3 | 0xea5 | 0xea7
                | 0xeaa..0xeab | 0xead..0xeb9 | 0xebb..0xebd | 0xec0..0xec4
                | 0xec6 | 0xecd | 0xedc..0xedf | 0xf00 | 0xf40..0xf47
                | 0xf49..0xf6c | 0xf71..0xf81 | 0xf88..0xf97 | 0xf99..0xfbc
                | 0x1000..0x1036 | 0x1038 | 0x103b..0x103f | 0x1050..0x1062
                | 0x1065..0x1068 | 0x106e..0x1086 | 0x108e | 0x109c..0x109d
                | 0x10a0..0x10c5 | 0x10c7 | 0x10cd | 0x10d0..0x10fa
                | 0x10fc..0x1248 | 0x124a..0x124d | 0x1250..0x1256 | 0x1258
                | 0x125a..0x125d | 0x1260..0x1288 | 0x128a..0x128d
                | 0x1290..0x12b0 | 0x12b2..0x12b5 | 0x12b8..0x12be | 0x12c0
                | 0x12c2..0x12c5 | 0x12c8..0x12d6 | 0x12d8..0x1310
                | 0x1312..0x1315 | 0x1318..0x135a | 0x135f | 0x1380..0x138f
                | 0x13a0..0x13f5 | 0x13f8..0x13fd | 0x1401..0x166c
                | 0x166f..0x167f | 0x1681..0x169a | 0x16a0..0x16ea
                | 0x16ee..0x16f8 | 0x1700..0x170c | 0x170e..0x1713
                | 0x1720..0x1733 | 0x1740..0x1753 | 0x1760..0x176c
                | 0x176e..0x1770 | 0x1772..0x1773 | 0x1780..0x17b3
                | 0x17b6..0x17c8 | 0x17d7 | 0x17dc | 0x1820..0x1877
                | 0x1880..0x18aa | 0x18b0..0x18f5 | 0x1900..0x191e
                | 0x1920..0x192b | 0x1930..0x1938 | 0x1950..0x196d
                | 0x1970..0x1974 | 0x1980..0x19ab | 0x19b0..0x19c9
                | 0x1a00..0x1a1b | 0x1a20..0x1a5e | 0x1a61..0x1a74 | 0x1aa7
                | 0x1b00..0x1b33 | 0x1b35..0x1b43 | 0x1b45..0x1b4b
                | 0x1b80..0x1ba9 | 0x1bac..0x1baf | 0x1bba..0x1be5
                | 0x1be7..0x1bf1 | 0x1c00..0x1c35 | 0x1c4d..0x1c4f
                | 0x1c5a..0x1c7d | 0x1c80..0x1c88 | 0x1ce9..0x1cec
                | 0x1cee..0x1cf3 | 0x1cf5..0x1cf6 | 0x1d00..0x1dbf
                | 0x1de7..0x1df4 | 0x1e00..0x1f15 | 0x1f18..0x1f1d
                | 0x1f20..0x1f45 | 0x1f48..0x1f4d | 0x1f50..0x1f57
                | 0x1f59 | 0x1f5b | 0x1f5d | 0x1f5f..0x1f7d | 0x1f80..0x1fb4
                | 0x1fb6..0x1fbc | 0x1fbe | 0x1fc2..0x1fc4 | 0x1fc6..0x1fcc
                | 0x1fd0..0x1fd3 | 0x1fd6..0x1fdb | 0x1fe0..0x1fec
                | 0x1ff2..0x1ff4 | 0x1ff6..0x1ffc | 0x2071 | 0x207f
                | 0x2090..0x209c | 0x2102 | 0x2107 | 0x210a..0x2113 | 0x2115
                | 0x2119..0x211d | 0x2124 | 0x2126 | 0x2128 | 0x212a..0x212d
                | 0x212f..0x2139 | 0x213c..0x213f | 0x2145..0x2149 | 0x214e
                | 0x2160..0x2188 | 0x24b6..0x24e9 | 0x2c00..0x2c2e
                | 0x2c30..0x2c5e | 0x2c60..0x2ce4 | 0x2ceb..0x2cee
                | 0x2cf2..0x2cf3 | 0x2d00..0x2d25 | 0x2d27 | 0x2d2d
                | 0x2d30..0x2d67 | 0x2d6f | 0x2d80..0x2d96 | 0x2da0..0x2da6
                | 0x2da8..0x2dae | 0x2db0..0x2db6 | 0x2db8..0x2dbe
                | 0x2dc0..0x2dc6 | 0x2dc8..0x2dce | 0x2dd0..0x2dd6
                | 0x2dd8..0x2dde | 0x2de0..0x2dff | 0x2e2f | 0x3005..0x3007
                | 0x3021..0x3029 | 0x3031..0x3035 | 0x3038..0x303c
                | 0x3041..0x3096 | 0x309d..0x309f | 0x30a1..0x30fa
                | 0x30fc..0x30ff | 0x3105..0x312e | 0x3131..0x318e
                | 0x31a0..0x31ba | 0x31f0..0x31ff | 0x3400..0x4db5
                | 0x4e00..0x9fea | 0xa000..0xa48c | 0xa4d0..0xa4fd
                | 0xa500..0xa60c | 0xa610..0xa61f | 0xa62a..0xa62b
                | 0xa640..0xa66e | 0xa674..0xa67b | 0xa67f..0xa6ef
                | 0xa717..0xa71f | 0xa722..0xa788 | 0xa78b..0xa7ae
                | 0xa7b0..0xa7b7 | 0xa7f7..0xa801 | 0xa803..0xa805
                | 0xa807..0xa80a | 0xa80c..0xa827 | 0xa840..0xa873
                | 0xa880..0xa8c3 | 0xa8c5 | 0xa8f2..0xa8f7 | 0xa8fb | 0xa8fd
                | 0xa90a..0xa92a | 0xa930..0xa952 | 0xa960..0xa97c
                | 0xa980..0xa9b2 | 0xa9b4..0xa9bf | 0xa9cf | 0xa9e0..0xa9e4
                | 0xa9e6..0xa9ef | 0xa9fa..0xa9fe | 0xaa00..0xaa36
                | 0xaa40..0xaa4d | 0xaa60..0xaa76 | 0xaa7a | 0xaa7e..0xaabe
                | 0xaac0 | 0xaac2 | 0xaadb..0xaadd | 0xaae0..0xaaef
                | 0xaaf2..0xaaf5 | 0xab01..0xab06 | 0xab09..0xab0e
                | 0xab11..0xab16 | 0xab20..0xab26 | 0xab28..0xab2e
                | 0xab30..0xab5a | 0xab5c..0xab65 | 0xab70..0xabea
                | 0xac00..0xd7a3 | 0xd7b0..0xd7c6 | 0xd7cb..0xd7fb
                | 0xf900..0xfa6d | 0xfa70..0xfad9 | 0xfb00..0xfb06
                | 0xfb13..0xfb17 | 0xfb1d..0xfb28 | 0xfb2a..0xfb36
                | 0xfb38..0xfb3c | 0xfb3e | 0xfb40..0xfb41 | 0xfb43..0xfb44
                | 0xfb46..0xfbb1 | 0xfbd3..0xfd3d | 0xfd50..0xfd8f
                | 0xfd92..0xfdc7 | 0xfdf0..0xfdfb | 0xfe70..0xfe74
                | 0xfe76..0xfefc | 0xff21..0xff3a | 0xff41..0xff5a
                | 0xff66..0xffbe | 0xffc2..0xffc7 | 0xffca..0xffcf
                | 0xffd2..0xffd7 | 0xffda..0xffdc;
            unialnum =
                unialpha
                | 0x30..0x39 | 0x660..0x669 | 0x6f0..0x6f9 | 0x7c0..0x7c9
                | 0x966..0x96f | 0x9e6..0x9ef | 0xa66..0xa6f | 0xae6..0xaef
                | 0xb66..0xb6f | 0xbe6..0xbef | 0xc66..0xc6f | 0xce6..0xcef
                | 0xd66..0xd6f | 0xde6..0xdef | 0xe50..0xe59 | 0xed0..0xed9
                | 0xf20..0xf29 | 0x1040..0x1049 | 0x1090..0x1099
                | 0x17e0..0x17e9 | 0x1810..0x1819 | 0x1946..0x194f
                | 0x19d0..0x19d9 | 0x1a80..0x1a89 | 0x1a90..0x1a99
                | 0x1b50..0x1b59 | 0x1bb0..0x1bb9 | 0x1c40..0x1c49
                | 0x1c50..0x1c59 | 0xa620..0xa629 | 0xa8d0..0xa8d9
                | 0xa900..0xa909 | 0xa9d0..0xa9d9 | 0xa9f0..0xa9f9
                | 0xaa50..0xaa59 | 0xabf0..0xabf9 | 0xff10..0xff19;
            pct = "%" xdigit xdigit;
            subdelims = [!$&'()*+,;=];
            unreserved = unialnum | [._~] | "-";
            userinfochar = pct | subdelims | unreserved | ":";
            pchar = userinfochar | "@";
            querychar = pchar | [/?#];
            # delim = ^querychar, but pct express 3-char sequence, which is not
            # allowed for negation, so, recombine them
            delim = ^(subdelims | unreserved | [:@/?#%]);
            tokenstart = unialnum | "_";
            dec_octet =
                digit |
                [1-9] digit |
                "1" digit digit |
                "2" ([0-4] digit| "5" [0-5]);
            hex_octet = "0" [xX] xdigit xdigit?;
            oct_octet = "0"+ ([1-7] [0-7]? | [1-3] [0-7] [0-7]);
            ipv4_octet = dec_octet | hex_octet | oct_octet;
            ipv4 = ipv4_octet ("." ipv4_octet){3};
            scheme =
                ("http://"i | "https://"i | "ftp://"i | "ssh://"i) @scheme;
            path = [/?#] querychar*;
            port = ":" digit*;
            idn_prefix = "xn--"i @idn_prefix;
            idn_label = idn_prefix alnum ((alnum | "-")* alnum)?;
            topdomain = unialpha unialnum+ | idn_label;
            label_char = unialnum | "_";
            alnum_label = alnum ((alnum | "_" | "-")* alnum)?;
            unialnum_label = label_char ((label_char | "-")* label_char)?;
            label = alnum_label | unialnum_label;
            hostend = (label ".")* topdomain;
            host = (label ".")+ topdomain;
            numeric_host = [0][xX]xdigit+ | [0-9]+;
            userinfo = userinfochar* "@";
            uri_full = scheme userinfo? (host | ipv4 | numeric_host) port?;
            uri_mailto =
                "mailto:"i @scheme userinfo? (host | ipv4)
                | userinfochar+ "@" %userinfo host;
            uri_tel = "tel:"i @scheme "+"? digit+;
            uri_custom_scheme =
                alpha (alnum "+"?)* alnum "://" @scheme
                userinfo? (hostend | ipv4 | numeric_host) port?;
            uri_www = "www."i @www_prefix hostend port?;
            uri_idn = idn_label "." hostend port?;
            uri_maybe = host port?;
            uri_optend =
                (uri_full
                | uri_mailto
                | uri_tel
                | uri_custom_scheme
                | uri_www
                | uri_idn
                | uri_maybe) path?;
            uri_ipv4 = ipv4 port? path;
            uri = (uri_optend | uri_ipv4) >clear $add_char %add;
            token = uri $err(recover_from_err);
            recover_from_err := ^delim* delim @{fgoto main;};
            main := ^tokenstart* (token (delim ^tokenstart* token)*)? delim*;

            write exec;
        }%%
    }
}

