import deobfuscator from "common.dsl";

create binary_jniwapper extractor ocraas(binary image) -> string {
    library = $(LIBOCRAAS)
    ctor = JniWrapperCreateOcraas
    dtor = JniWrapperDestroyOcraas
    main = JniWrapperParseImage
    free = JniWrapperFree
    config =\\
    <Ocraas>\\n\\
        Workers : $(OCRAAS_INSTANCES)\\n\\
        OcrConfig : $(OCRAAS_CONFIG)\\n\\
        MaxProcessTime : $(OCRAAS_MAX_PROCESS_TIME)\\n\\
    </Ocraas>
}

create chain extractor trace_ocraas(binary image) -> string {
    trace ocraas(image) -> ocr_text;
    return ocr_text;
}

create narrow extractor narrow_ocraas(binary image) -> string {
    \{
        limit = $(OCRAAS_INSTANCES)
        queued = $(QUEUE_OCR_WORKERS)
        extractor = trace_ocraas
        # stater config
        stats-prefix = ocr
        charts-category = main-extract-nodes
        charts-title = OCR
    \}
}

create narrow extractor html2png(string html) -> binary {
    \{
        limit = $(SELENIUM_INSTANCES)
        queued = $(QUEUE_OCR_WORKERS)
        extractor = trace_render
        # stater config
        stats-prefix = html2png
        charts-category = main-extract-nodes
        charts-title = OCR
    \}

    create chain extractor trace_render(string html) -> binary \{
        create html2png extractor render(string html) -> binary \{
            driver-path = /usr/bin/geckodriver
            binary-path = /usr/bin/firefox
            width = 1280
            height = 1080
        \}

        trace render(html) -> png;
        return png;
    \}
}

create eml2html extractor eml2html(raw_mail mail) -> html {
    max-input-length = 1572864
    truncate-long-input = true
    sanitizing-config = $(SANITIZER_CONFIGS_ROOT)/configs/mail-secproxy.conf
}

create embed_html_images extractor embed_html_images(html short_html, raw_mail mail) -> string {
    sanitizing-config = $(SANITIZER_CONFIGS_ROOT)/configs/mail-secproxy.conf
}

create chain extractor render_html(string html) -> binary {
    html2png(html) -> png;

    return png if not is_any_null(html);
}

create chain extractor png2text(binary png) -> string {
    narrow_ocraas(png) -> text;

    return text if not is_any_null(png);
}

create chain extractor ocr(raw_mail mail, smtp_envelope envelope) -> json_map {
    create compose_doc extractor compose_doc(
        json_map base,
        string html_text,
        string ocr_text,
        string plain_text,
        string deobfuscated_ocr_text,
        string deobfuscated_plain_text)
        -> json_map
    {
    }

    trace async eml2html(mail) -> html;
    trace embed_html_images(html, mail) -> full_html;
    render_html(full_html) -> png;
    png2text(png) -> ocr_text;
    trace html_text(html) -> plain_text;
    trace deobfuscator(plain_text) -> deobfuscated_plain_text;
    trim_string(ocr_text) -> ocr_trimmed;
    trace deobfuscator(ocr_trimmed) -> deobfuscated_ocr_text;

    create_json_map() -> empty_map;
    compose_doc(
        empty_map,
        full_html,
        ocr_trimmed,
        plain_text,
        deobfuscated_ocr_text,
        deobfuscated_plain_text)
        -> result;

    return result;
}

